diff options
Diffstat (limited to 'drivers/infiniband/core/umem_odp.c')
-rw-r--r-- | drivers/infiniband/core/umem_odp.c | 645 |
1 files changed, 197 insertions, 448 deletions
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index c0e15db34680..b8c657b28380 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -39,417 +39,211 @@ #include <linux/export.h> #include <linux/vmalloc.h> #include <linux/hugetlb.h> -#include <linux/interval_tree_generic.h> +#include <linux/interval_tree.h> #include <linux/pagemap.h> #include <rdma/ib_verbs.h> #include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> -/* - * The ib_umem list keeps track of memory regions for which the HW - * device request to receive notification when the related memory - * mapping is changed. - * - * ib_umem_lock protects the list. - */ +#include "uverbs.h" -static u64 node_start(struct umem_odp_node *n) +static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp, + const struct mmu_interval_notifier_ops *ops) { - struct ib_umem_odp *umem_odp = - container_of(n, struct ib_umem_odp, interval_tree); - - return ib_umem_start(umem_odp); -} - -/* Note that the representation of the intervals in the interval tree - * considers the ending point as contained in the interval, while the - * function ib_umem_end returns the first address which is not contained - * in the umem. - */ -static u64 node_last(struct umem_odp_node *n) -{ - struct ib_umem_odp *umem_odp = - container_of(n, struct ib_umem_odp, interval_tree); - - return ib_umem_end(umem_odp) - 1; -} - -INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, - node_start, node_last, static, rbt_ib_umem) - -static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp) -{ - mutex_lock(&umem_odp->umem_mutex); - if (umem_odp->notifiers_count++ == 0) - /* - * Initialize the completion object for waiting on - * notifiers. Since notifier_count is zero, no one should be - * waiting right now. - */ - reinit_completion(&umem_odp->notifier_completion); - mutex_unlock(&umem_odp->umem_mutex); -} - -static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp) -{ - mutex_lock(&umem_odp->umem_mutex); - /* - * This sequence increase will notify the QP page fault that the page - * that is going to be mapped in the spte could have been freed. - */ - ++umem_odp->notifiers_seq; - if (--umem_odp->notifiers_count == 0) - complete_all(&umem_odp->notifier_completion); - mutex_unlock(&umem_odp->umem_mutex); -} - -static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp, - u64 start, u64 end, void *cookie) -{ - /* - * Increase the number of notifiers running, to - * prevent any further fault handling on this MR. - */ - ib_umem_notifier_start_account(umem_odp); - complete_all(&umem_odp->notifier_completion); - umem_odp->umem.context->invalidate_range( - umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); - return 0; -} + int ret; -static void ib_umem_notifier_release(struct mmu_notifier *mn, - struct mm_struct *mm) -{ - struct ib_ucontext_per_mm *per_mm = - container_of(mn, struct ib_ucontext_per_mm, mn); - - down_read(&per_mm->umem_rwsem); - if (per_mm->active) - rbt_ib_umem_for_each_in_range( - &per_mm->umem_tree, 0, ULLONG_MAX, - ib_umem_notifier_release_trampoline, true, NULL); - up_read(&per_mm->umem_rwsem); -} + umem_odp->umem.is_odp = 1; + mutex_init(&umem_odp->umem_mutex); -static int invalidate_range_start_trampoline(struct ib_umem_odp *item, - u64 start, u64 end, void *cookie) -{ - ib_umem_notifier_start_account(item); - item->umem.context->invalidate_range(item, start, end); - return 0; -} + if (!umem_odp->is_implicit_odp) { + size_t page_size = 1UL << umem_odp->page_shift; + unsigned long start; + unsigned long end; + size_t pages; + + start = ALIGN_DOWN(umem_odp->umem.address, page_size); + if (check_add_overflow(umem_odp->umem.address, + (unsigned long)umem_odp->umem.length, + &end)) + return -EOVERFLOW; + end = ALIGN(end, page_size); + if (unlikely(end < page_size)) + return -EOVERFLOW; + + pages = (end - start) >> umem_odp->page_shift; + if (!pages) + return -EINVAL; -static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct ib_ucontext_per_mm *per_mm = - container_of(mn, struct ib_ucontext_per_mm, mn); - int rc; + umem_odp->page_list = kvcalloc( + pages, sizeof(*umem_odp->page_list), GFP_KERNEL); + if (!umem_odp->page_list) + return -ENOMEM; - if (mmu_notifier_range_blockable(range)) - down_read(&per_mm->umem_rwsem); - else if (!down_read_trylock(&per_mm->umem_rwsem)) - return -EAGAIN; + umem_odp->dma_list = kvcalloc( + pages, sizeof(*umem_odp->dma_list), GFP_KERNEL); + if (!umem_odp->dma_list) { + ret = -ENOMEM; + goto out_page_list; + } - if (!per_mm->active) { - up_read(&per_mm->umem_rwsem); - /* - * At this point active is permanently set and visible to this - * CPU without a lock, that fact is relied on to skip the unlock - * in range_end. - */ - return 0; + ret = mmu_interval_notifier_insert(&umem_odp->notifier, + umem_odp->umem.owning_mm, + start, end - start, ops); + if (ret) + goto out_dma_list; } - rc = rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start, - range->end, - invalidate_range_start_trampoline, - mmu_notifier_range_blockable(range), - NULL); - if (rc) - up_read(&per_mm->umem_rwsem); - return rc; -} - -static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start, - u64 end, void *cookie) -{ - ib_umem_notifier_end_account(item); return 0; -} - -static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct ib_ucontext_per_mm *per_mm = - container_of(mn, struct ib_ucontext_per_mm, mn); - - if (unlikely(!per_mm->active)) - return; - - rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start, - range->end, - invalidate_range_end_trampoline, true, NULL); - up_read(&per_mm->umem_rwsem); -} -static const struct mmu_notifier_ops ib_umem_notifiers = { - .release = ib_umem_notifier_release, - .invalidate_range_start = ib_umem_notifier_invalidate_range_start, - .invalidate_range_end = ib_umem_notifier_invalidate_range_end, -}; - -static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp) -{ - struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; - - down_write(&per_mm->umem_rwsem); - if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) - rbt_ib_umem_insert(&umem_odp->interval_tree, - &per_mm->umem_tree); - up_write(&per_mm->umem_rwsem); -} - -static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp) -{ - struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; - - down_write(&per_mm->umem_rwsem); - if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) - rbt_ib_umem_remove(&umem_odp->interval_tree, - &per_mm->umem_tree); - complete_all(&umem_odp->notifier_completion); - - up_write(&per_mm->umem_rwsem); +out_dma_list: + kvfree(umem_odp->dma_list); +out_page_list: + kvfree(umem_odp->page_list); + return ret; } -static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx, - struct mm_struct *mm) +/** + * ib_umem_odp_alloc_implicit - Allocate a parent implicit ODP umem + * + * Implicit ODP umems do not have a VA range and do not have any page lists. + * They exist only to hold the per_mm reference to help the driver create + * children umems. + * + * @device: IB device to create UMEM + * @access: ib_reg_mr access flags + */ +struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device, + int access) { - struct ib_ucontext_per_mm *per_mm; + struct ib_umem *umem; + struct ib_umem_odp *umem_odp; int ret; - per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL); - if (!per_mm) - return ERR_PTR(-ENOMEM); - - per_mm->context = ctx; - per_mm->mm = mm; - per_mm->umem_tree = RB_ROOT_CACHED; - init_rwsem(&per_mm->umem_rwsem); - per_mm->active = true; + if (access & IB_ACCESS_HUGETLB) + return ERR_PTR(-EINVAL); - rcu_read_lock(); - per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); - rcu_read_unlock(); - - WARN_ON(mm != current->mm); + umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL); + if (!umem_odp) + return ERR_PTR(-ENOMEM); + umem = &umem_odp->umem; + umem->ibdev = device; + umem->writable = ib_access_writable(access); + umem->owning_mm = current->mm; + umem_odp->is_implicit_odp = 1; + umem_odp->page_shift = PAGE_SHIFT; - per_mm->mn.ops = &ib_umem_notifiers; - ret = mmu_notifier_register(&per_mm->mn, per_mm->mm); + umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); + ret = ib_init_umem_odp(umem_odp, NULL); if (ret) { - dev_err(&ctx->device->dev, - "Failed to register mmu_notifier %d\n", ret); - goto out_pid; - } - - list_add(&per_mm->ucontext_list, &ctx->per_mm_list); - return per_mm; - -out_pid: - put_pid(per_mm->tgid); - kfree(per_mm); - return ERR_PTR(ret); -} - -static int get_per_mm(struct ib_umem_odp *umem_odp) -{ - struct ib_ucontext *ctx = umem_odp->umem.context; - struct ib_ucontext_per_mm *per_mm; - - /* - * Generally speaking we expect only one or two per_mm in this list, - * so no reason to optimize this search today. - */ - mutex_lock(&ctx->per_mm_list_lock); - list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) { - if (per_mm->mm == umem_odp->umem.owning_mm) - goto found; - } - - per_mm = alloc_per_mm(ctx, umem_odp->umem.owning_mm); - if (IS_ERR(per_mm)) { - mutex_unlock(&ctx->per_mm_list_lock); - return PTR_ERR(per_mm); + put_pid(umem_odp->tgid); + kfree(umem_odp); + return ERR_PTR(ret); } - -found: - umem_odp->per_mm = per_mm; - per_mm->odp_mrs_count++; - mutex_unlock(&ctx->per_mm_list_lock); - - return 0; + return umem_odp; } +EXPORT_SYMBOL(ib_umem_odp_alloc_implicit); -static void free_per_mm(struct rcu_head *rcu) -{ - kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu)); -} - -static void put_per_mm(struct ib_umem_odp *umem_odp) +/** + * ib_umem_odp_alloc_child - Allocate a child ODP umem under an implicit + * parent ODP umem + * + * @root: The parent umem enclosing the child. This must be allocated using + * ib_alloc_implicit_odp_umem() + * @addr: The starting userspace VA + * @size: The length of the userspace VA + */ +struct ib_umem_odp * +ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr, + size_t size, + const struct mmu_interval_notifier_ops *ops) { - struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; - struct ib_ucontext *ctx = umem_odp->umem.context; - bool need_free; - - mutex_lock(&ctx->per_mm_list_lock); - umem_odp->per_mm = NULL; - per_mm->odp_mrs_count--; - need_free = per_mm->odp_mrs_count == 0; - if (need_free) - list_del(&per_mm->ucontext_list); - mutex_unlock(&ctx->per_mm_list_lock); - - if (!need_free) - return; - /* - * NOTE! mmu_notifier_unregister() can happen between a start/end - * callback, resulting in an start/end, and thus an unbalanced - * lock. This doesn't really matter to us since we are about to kfree - * the memory that holds the lock, however LOCKDEP doesn't like this. + * Caller must ensure that root cannot be freed during the call to + * ib_alloc_odp_umem. */ - down_write(&per_mm->umem_rwsem); - per_mm->active = false; - up_write(&per_mm->umem_rwsem); - - WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root)); - mmu_notifier_unregister_no_release(&per_mm->mn, per_mm->mm); - put_pid(per_mm->tgid); - mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm); -} - -struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root, - unsigned long addr, size_t size) -{ - struct ib_ucontext_per_mm *per_mm = root->per_mm; - struct ib_ucontext *ctx = per_mm->context; struct ib_umem_odp *odp_data; struct ib_umem *umem; - int pages = size >> PAGE_SHIFT; int ret; + if (WARN_ON(!root->is_implicit_odp)) + return ERR_PTR(-EINVAL); + odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL); if (!odp_data) return ERR_PTR(-ENOMEM); umem = &odp_data->umem; - umem->context = ctx; + umem->ibdev = root->umem.ibdev; umem->length = size; umem->address = addr; - odp_data->page_shift = PAGE_SHIFT; umem->writable = root->umem.writable; - umem->is_odp = 1; - odp_data->per_mm = per_mm; - umem->owning_mm = per_mm->mm; - mmgrab(umem->owning_mm); - - mutex_init(&odp_data->umem_mutex); - init_completion(&odp_data->notifier_completion); - - odp_data->page_list = - vzalloc(array_size(pages, sizeof(*odp_data->page_list))); - if (!odp_data->page_list) { - ret = -ENOMEM; - goto out_odp_data; - } + umem->owning_mm = root->umem.owning_mm; + odp_data->page_shift = PAGE_SHIFT; + odp_data->notifier.ops = ops; - odp_data->dma_list = - vzalloc(array_size(pages, sizeof(*odp_data->dma_list))); - if (!odp_data->dma_list) { - ret = -ENOMEM; - goto out_page_list; + odp_data->tgid = get_pid(root->tgid); + ret = ib_init_umem_odp(odp_data, ops); + if (ret) { + put_pid(odp_data->tgid); + kfree(odp_data); + return ERR_PTR(ret); } - - /* - * Caller must ensure that the umem_odp that the per_mm came from - * cannot be freed during the call to ib_alloc_odp_umem. - */ - mutex_lock(&ctx->per_mm_list_lock); - per_mm->odp_mrs_count++; - mutex_unlock(&ctx->per_mm_list_lock); - add_umem_to_per_mm(odp_data); - return odp_data; - -out_page_list: - vfree(odp_data->page_list); -out_odp_data: - mmdrop(umem->owning_mm); - kfree(odp_data); - return ERR_PTR(ret); } -EXPORT_SYMBOL(ib_alloc_odp_umem); +EXPORT_SYMBOL(ib_umem_odp_alloc_child); -int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) +/** + * ib_umem_odp_get - Create a umem_odp for a userspace va + * + * @device: IB device struct to get UMEM + * @addr: userspace virtual address to start at + * @size: length of region to pin + * @access: IB_ACCESS_xxx flags for memory being pinned + * + * The driver should use when the access flags indicate ODP memory. It avoids + * pinning, instead, stores the mm for future page fault handling in + * conjunction with MMU notifiers. + */ +struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device, + unsigned long addr, size_t size, int access, + const struct mmu_interval_notifier_ops *ops) { - struct ib_umem *umem = &umem_odp->umem; - /* - * NOTE: This must called in a process context where umem->owning_mm - * == current->mm - */ - struct mm_struct *mm = umem->owning_mm; - int ret_val; - - umem_odp->page_shift = PAGE_SHIFT; - if (access & IB_ACCESS_HUGETLB) { - struct vm_area_struct *vma; - struct hstate *h; - - down_read(&mm->mmap_sem); - vma = find_vma(mm, ib_umem_start(umem_odp)); - if (!vma || !is_vm_hugetlb_page(vma)) { - up_read(&mm->mmap_sem); - return -EINVAL; - } - h = hstate_vma(vma); - umem_odp->page_shift = huge_page_shift(h); - up_read(&mm->mmap_sem); - } - - mutex_init(&umem_odp->umem_mutex); - - init_completion(&umem_odp->notifier_completion); - - if (ib_umem_odp_num_pages(umem_odp)) { - umem_odp->page_list = - vzalloc(array_size(sizeof(*umem_odp->page_list), - ib_umem_odp_num_pages(umem_odp))); - if (!umem_odp->page_list) - return -ENOMEM; + struct ib_umem_odp *umem_odp; + struct mm_struct *mm; + int ret; - umem_odp->dma_list = - vzalloc(array_size(sizeof(*umem_odp->dma_list), - ib_umem_odp_num_pages(umem_odp))); - if (!umem_odp->dma_list) { - ret_val = -ENOMEM; - goto out_page_list; - } - } + if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND))) + return ERR_PTR(-EINVAL); - ret_val = get_per_mm(umem_odp); - if (ret_val) - goto out_dma_list; - add_umem_to_per_mm(umem_odp); + umem_odp = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL); + if (!umem_odp) + return ERR_PTR(-ENOMEM); - return 0; + umem_odp->umem.ibdev = device; + umem_odp->umem.length = size; + umem_odp->umem.address = addr; + umem_odp->umem.writable = ib_access_writable(access); + umem_odp->umem.owning_mm = mm = current->mm; + umem_odp->notifier.ops = ops; -out_dma_list: - vfree(umem_odp->dma_list); -out_page_list: - vfree(umem_odp->page_list); - return ret_val; + umem_odp->page_shift = PAGE_SHIFT; +#ifdef CONFIG_HUGETLB_PAGE + if (access & IB_ACCESS_HUGETLB) + umem_odp->page_shift = HPAGE_SHIFT; +#endif + + umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); + ret = ib_init_umem_odp(umem_odp, ops); + if (ret) + goto err_put_pid; + return umem_odp; + +err_put_pid: + put_pid(umem_odp->tgid); + kfree(umem_odp); + return ERR_PTR(ret); } +EXPORT_SYMBOL(ib_umem_odp_get); void ib_umem_odp_release(struct ib_umem_odp *umem_odp) { @@ -459,14 +253,19 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) * It is the driver's responsibility to ensure, before calling us, * that the hardware will not attempt to access the MR any more. */ - ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); - - remove_umem_from_per_mm(umem_odp); - put_per_mm(umem_odp); - vfree(umem_odp->dma_list); - vfree(umem_odp->page_list); + if (!umem_odp->is_implicit_odp) { + mutex_lock(&umem_odp->umem_mutex); + ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); + mutex_unlock(&umem_odp->umem_mutex); + mmu_interval_notifier_remove(&umem_odp->notifier); + kvfree(umem_odp->dma_list); + kvfree(umem_odp->page_list); + put_pid(umem_odp->tgid); + } + kfree(umem_odp); } +EXPORT_SYMBOL(ib_umem_odp_release); /* * Map for DMA and insert a single page into the on-demand paging page tables. @@ -482,29 +281,21 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) * The function returns -EFAULT if the DMA mapping operation fails. It returns * -EAGAIN if a concurrent invalidation prevents us from updating the page. * - * The page is released via put_user_page even if the operation failed. For - * on-demand pinning, the page is released whenever it isn't stored in the - * umem. + * The page is released via put_page even if the operation failed. For on-demand + * pinning, the page is released whenever it isn't stored in the umem. */ static int ib_umem_odp_map_dma_single_page( struct ib_umem_odp *umem_odp, - int page_index, + unsigned int page_index, struct page *page, u64 access_mask, unsigned long current_seq) { - struct ib_ucontext *context = umem_odp->umem.context; - struct ib_device *dev = context->device; + struct ib_device *dev = umem_odp->umem.ibdev; dma_addr_t dma_addr; - int remove_existing_mapping = 0; int ret = 0; - /* - * Note: we avoid writing if seq is different from the initial seq, to - * handle case of a racing notifier. This check also allows us to bail - * early if we have a notifier running in parallel with us. - */ - if (ib_umem_mmu_notifier_retry(umem_odp, current_seq)) { + if (mmu_interval_check_retry(&umem_odp->notifier, current_seq)) { ret = -EAGAIN; goto out; } @@ -522,28 +313,29 @@ static int ib_umem_odp_map_dma_single_page( } else if (umem_odp->page_list[page_index] == page) { umem_odp->dma_list[page_index] |= access_mask; } else { - pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n", - umem_odp->page_list[page_index], page); - /* Better remove the mapping now, to prevent any further - * damage. */ - remove_existing_mapping = 1; - } - -out: - put_user_page(page); - - if (remove_existing_mapping) { - ib_umem_notifier_start_account(umem_odp); - context->invalidate_range( - umem_odp, - ib_umem_start(umem_odp) + - (page_index << umem_odp->page_shift), - ib_umem_start(umem_odp) + - ((page_index + 1) << umem_odp->page_shift)); - ib_umem_notifier_end_account(umem_odp); + /* + * This is a race here where we could have done: + * + * CPU0 CPU1 + * get_user_pages() + * invalidate() + * page_fault() + * mutex_lock(umem_mutex) + * page from GUP != page in ODP + * + * It should be prevented by the retry test above as reading + * the seq number should be reliable under the + * umem_mutex. Thus something is really not working right if + * things get here. + */ + WARN(true, + "Got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n", + umem_odp->page_list[page_index], page); ret = -EAGAIN; } +out: + put_page(page); return ret; } @@ -606,7 +398,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, * existing beyond the lifetime of the originating process.. Presumably * mmget_not_zero will fail in this case. */ - owning_process = get_pid_task(umem_odp->per_mm->tgid, PIDTYPE_PID); + owning_process = get_pid_task(umem_odp->tgid, PIDTYPE_PID); if (!owning_process || !mmget_not_zero(owning_mm)) { ret = -EINVAL; goto out_put_task; @@ -620,7 +412,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, while (bcnt > 0) { const size_t gup_num_pages = min_t(size_t, - (bcnt + BIT(page_shift) - 1) >> page_shift, + ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE, PAGE_SIZE / sizeof(struct page *)); down_read(&owning_mm->mmap_sem); @@ -653,7 +445,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, ret = -EFAULT; break; } - put_user_page(local_page_list[j]); + put_page(local_page_list[j]); continue; } @@ -680,8 +472,8 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, * ib_umem_odp_map_dma_single_page(). */ if (npages - (j + 1) > 0) - put_user_pages(&local_page_list[j+1], - npages - (j + 1)); + release_pages(&local_page_list[j+1], + npages - (j + 1)); break; } } @@ -707,7 +499,9 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, { int idx; u64 addr; - struct ib_device *dev = umem_odp->umem.context->device; + struct ib_device *dev = umem_odp->umem.ibdev; + + lockdep_assert_held(&umem_odp->umem_mutex); virt = max_t(u64, virt, ib_umem_start(umem_odp)); bound = min_t(u64, bound, ib_umem_end(umem_odp)); @@ -716,7 +510,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, * faults from completion. We might be racing with other * invalidations, so we must make sure we free each page only * once. */ - mutex_lock(&umem_odp->umem_mutex); for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) { idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; if (umem_odp->page_list[idx]) { @@ -747,49 +540,5 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, umem_odp->npages--; } } - mutex_unlock(&umem_odp->umem_mutex); } EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); - -/* @last is not a part of the interval. See comment for function - * node_last. - */ -int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, - u64 start, u64 last, - umem_call_back cb, - bool blockable, - void *cookie) -{ - int ret_val = 0; - struct umem_odp_node *node, *next; - struct ib_umem_odp *umem; - - if (unlikely(start == last)) - return ret_val; - - for (node = rbt_ib_umem_iter_first(root, start, last - 1); - node; node = next) { - /* TODO move the blockable decision up to the callback */ - if (!blockable) - return -EAGAIN; - next = rbt_ib_umem_iter_next(node, start, last - 1); - umem = container_of(node, struct ib_umem_odp, interval_tree); - ret_val = cb(umem, start, last, cookie) || ret_val; - } - - return ret_val; -} -EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range); - -struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, - u64 addr, u64 length) -{ - struct umem_odp_node *node; - - node = rbt_ib_umem_iter_first(root, addr, addr + length - 1); - if (node) - return container_of(node, struct ib_umem_odp, interval_tree); - return NULL; - -} -EXPORT_SYMBOL(rbt_ib_umem_lookup); |