diff options
Diffstat (limited to 'drivers/infiniband/hw/hns/hns_roce_mr.c')
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_mr.c | 692 |
1 files changed, 636 insertions, 56 deletions
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index e387360e3780..da86a8117bd5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -47,6 +47,7 @@ unsigned long key_to_hw_index(u32 key) { return (key << 24) | (key >> 8); } +EXPORT_SYMBOL_GPL(key_to_hw_index); static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox, @@ -65,6 +66,7 @@ int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT, HNS_ROCE_CMD_TIMEOUT_MSECS); } +EXPORT_SYMBOL_GPL(hns_roce_hw2sw_mpt); static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order, unsigned long *seg) @@ -175,18 +177,28 @@ static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy) } static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, - unsigned long *seg) + unsigned long *seg, u32 mtt_type) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - int ret = 0; + struct hns_roce_hem_table *table; + struct hns_roce_buddy *buddy; + int ret; + + if (mtt_type == MTT_TYPE_WQE) { + buddy = &mr_table->mtt_buddy; + table = &mr_table->mtt_table; + } else { + buddy = &mr_table->mtt_cqe_buddy; + table = &mr_table->mtt_cqe_table; + } - ret = hns_roce_buddy_alloc(&mr_table->mtt_buddy, order, seg); + ret = hns_roce_buddy_alloc(buddy, order, seg); if (ret == -1) return -1; - if (hns_roce_table_get_range(hr_dev, &mr_table->mtt_table, *seg, + if (hns_roce_table_get_range(hr_dev, table, *seg, *seg + (1 << order) - 1)) { - hns_roce_buddy_free(&mr_table->mtt_buddy, *seg, order); + hns_roce_buddy_free(buddy, *seg, order); return -1; } @@ -196,7 +208,7 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift, struct hns_roce_mtt *mtt) { - int ret = 0; + int ret; int i; /* Page num is zero, correspond to DMA memory register */ @@ -215,7 +227,8 @@ int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift, ++mtt->order; /* Allocate MTT entry */ - ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg); + ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg, + mtt->mtt_type); if (ret == -1) return -ENOMEM; @@ -229,18 +242,261 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) if (mtt->order < 0) return; - hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order); - hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, mtt->first_seg, - mtt->first_seg + (1 << mtt->order) - 1); + if (mtt->mtt_type == MTT_TYPE_WQE) { + hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, + mtt->order); + hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + } else { + hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg, + mtt->order); + hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + } +} +EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup); + +static void hns_roce_loop_free(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr, int err_loop_index, + int loop_i, int loop_j) +{ + struct device *dev = hr_dev->dev; + u32 mhop_num; + u32 pbl_bt_sz; + u64 bt_idx; + int i, j; + + pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); + mhop_num = hr_dev->caps.pbl_hop_num; + + i = loop_i; + if (mhop_num == 3 && err_loop_index == 2) { + for (; i >= 0; i--) { + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + + for (j = 0; j < pbl_bt_sz / 8; j++) { + if (i == loop_i && j >= loop_j) + break; + + bt_idx = i * pbl_bt_sz / 8 + j; + dma_free_coherent(dev, pbl_bt_sz, + mr->pbl_bt_l2[bt_idx], + mr->pbl_l2_dma_addr[bt_idx]); + } + } + } else if (mhop_num == 3 && err_loop_index == 1) { + for (i -= 1; i >= 0; i--) { + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + + for (j = 0; j < pbl_bt_sz / 8; j++) { + bt_idx = i * pbl_bt_sz / 8 + j; + dma_free_coherent(dev, pbl_bt_sz, + mr->pbl_bt_l2[bt_idx], + mr->pbl_l2_dma_addr[bt_idx]); + } + } + } else if (mhop_num == 2 && err_loop_index == 1) { + for (i -= 1; i >= 0; i--) + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + } else { + dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.", + mhop_num, err_loop_index); + return; + } + + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr); + mr->pbl_bt_l0 = NULL; + mr->pbl_l0_dma_addr = 0; +} + +/* PBL multi hop addressing */ +static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages, + struct hns_roce_mr *mr) +{ + struct device *dev = hr_dev->dev; + int mr_alloc_done = 0; + int npages_allocated; + int i = 0, j = 0; + u32 pbl_bt_sz; + u32 mhop_num; + u64 pbl_last_bt_num; + u64 pbl_bt_cnt = 0; + u64 bt_idx; + u64 size; + + mhop_num = hr_dev->caps.pbl_hop_num; + pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); + pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); + + if (mhop_num == HNS_ROCE_HOP_NUM_0) + return 0; + + /* hop_num = 1 */ + if (mhop_num == 1) { + if (npages > pbl_bt_sz / 8) { + dev_err(dev, "npages %d is larger than buf_pg_sz!", + npages); + return -EINVAL; + } + mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, + &(mr->pbl_dma_addr), + GFP_KERNEL); + if (!mr->pbl_buf) + return -ENOMEM; + + mr->pbl_size = npages; + mr->pbl_ba = mr->pbl_dma_addr; + mr->pbl_hop_num = hr_dev->caps.pbl_hop_num; + mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; + mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; + return 0; + } + + mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8, + sizeof(*mr->pbl_l1_dma_addr), + GFP_KERNEL); + if (!mr->pbl_l1_dma_addr) + return -ENOMEM; + + mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1), + GFP_KERNEL); + if (!mr->pbl_bt_l1) + goto err_kcalloc_bt_l1; + + if (mhop_num == 3) { + mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num, + sizeof(*mr->pbl_l2_dma_addr), + GFP_KERNEL); + if (!mr->pbl_l2_dma_addr) + goto err_kcalloc_l2_dma; + + mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num, + sizeof(*mr->pbl_bt_l2), + GFP_KERNEL); + if (!mr->pbl_bt_l2) + goto err_kcalloc_bt_l2; + } + + /* alloc L0 BT */ + mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz, + &(mr->pbl_l0_dma_addr), + GFP_KERNEL); + if (!mr->pbl_bt_l0) + goto err_dma_alloc_l0; + + if (mhop_num == 2) { + /* alloc L1 BT */ + for (i = 0; i < pbl_bt_sz / 8; i++) { + if (pbl_bt_cnt + 1 < pbl_last_bt_num) { + size = pbl_bt_sz; + } else { + npages_allocated = i * (pbl_bt_sz / 8); + size = (npages - npages_allocated) * 8; + } + mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size, + &(mr->pbl_l1_dma_addr[i]), + GFP_KERNEL); + if (!mr->pbl_bt_l1[i]) { + hns_roce_loop_free(hr_dev, mr, 1, i, 0); + goto err_dma_alloc_l0; + } + + *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; + + pbl_bt_cnt++; + if (pbl_bt_cnt >= pbl_last_bt_num) + break; + } + } else if (mhop_num == 3) { + /* alloc L1, L2 BT */ + for (i = 0; i < pbl_bt_sz / 8; i++) { + mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz, + &(mr->pbl_l1_dma_addr[i]), + GFP_KERNEL); + if (!mr->pbl_bt_l1[i]) { + hns_roce_loop_free(hr_dev, mr, 1, i, 0); + goto err_dma_alloc_l0; + } + + *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; + + for (j = 0; j < pbl_bt_sz / 8; j++) { + bt_idx = i * pbl_bt_sz / 8 + j; + + if (pbl_bt_cnt + 1 < pbl_last_bt_num) { + size = pbl_bt_sz; + } else { + npages_allocated = bt_idx * + (pbl_bt_sz / 8); + size = (npages - npages_allocated) * 8; + } + mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent( + dev, size, + &(mr->pbl_l2_dma_addr[bt_idx]), + GFP_KERNEL); + if (!mr->pbl_bt_l2[bt_idx]) { + hns_roce_loop_free(hr_dev, mr, 2, i, j); + goto err_dma_alloc_l0; + } + + *(mr->pbl_bt_l1[i] + j) = + mr->pbl_l2_dma_addr[bt_idx]; + + pbl_bt_cnt++; + if (pbl_bt_cnt >= pbl_last_bt_num) { + mr_alloc_done = 1; + break; + } + } + + if (mr_alloc_done) + break; + } + } + + mr->l0_chunk_last_num = i + 1; + if (mhop_num == 3) + mr->l1_chunk_last_num = j + 1; + + mr->pbl_size = npages; + mr->pbl_ba = mr->pbl_l0_dma_addr; + mr->pbl_hop_num = hr_dev->caps.pbl_hop_num; + mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; + mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; + + return 0; + +err_dma_alloc_l0: + kfree(mr->pbl_bt_l2); + mr->pbl_bt_l2 = NULL; + +err_kcalloc_bt_l2: + kfree(mr->pbl_l2_dma_addr); + mr->pbl_l2_dma_addr = NULL; + +err_kcalloc_l2_dma: + kfree(mr->pbl_bt_l1); + mr->pbl_bt_l1 = NULL; + +err_kcalloc_bt_l1: + kfree(mr->pbl_l1_dma_addr); + mr->pbl_l1_dma_addr = NULL; + + return -ENOMEM; } static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, u64 size, u32 access, int npages, struct hns_roce_mr *mr) { + struct device *dev = hr_dev->dev; unsigned long index = 0; int ret = 0; - struct device *dev = &hr_dev->pdev->dev; /* Allocate a key for mr from mr_table */ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); @@ -258,22 +514,117 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, mr->type = MR_TYPE_DMA; mr->pbl_buf = NULL; mr->pbl_dma_addr = 0; + /* PBL multi-hop addressing parameters */ + mr->pbl_bt_l2 = NULL; + mr->pbl_bt_l1 = NULL; + mr->pbl_bt_l0 = NULL; + mr->pbl_l2_dma_addr = NULL; + mr->pbl_l1_dma_addr = NULL; + mr->pbl_l0_dma_addr = 0; } else { mr->type = MR_TYPE_MR; - mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, - &(mr->pbl_dma_addr), - GFP_KERNEL); - if (!mr->pbl_buf) - return -ENOMEM; + if (!hr_dev->caps.pbl_hop_num) { + mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, + &(mr->pbl_dma_addr), + GFP_KERNEL); + if (!mr->pbl_buf) + return -ENOMEM; + } else { + ret = hns_roce_mhop_alloc(hr_dev, npages, mr); + } } - return 0; + return ret; +} + +static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr) +{ + struct device *dev = hr_dev->dev; + int npages_allocated; + int npages; + int i, j; + u32 pbl_bt_sz; + u32 mhop_num; + u64 bt_idx; + + npages = ib_umem_page_count(mr->umem); + pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); + mhop_num = hr_dev->caps.pbl_hop_num; + + if (mhop_num == HNS_ROCE_HOP_NUM_0) + return; + + /* hop_num = 1 */ + if (mhop_num == 1) { + dma_free_coherent(dev, (unsigned int)(npages * 8), + mr->pbl_buf, mr->pbl_dma_addr); + return; + } + + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, + mr->pbl_l0_dma_addr); + + if (mhop_num == 2) { + for (i = 0; i < mr->l0_chunk_last_num; i++) { + if (i == mr->l0_chunk_last_num - 1) { + npages_allocated = i * (pbl_bt_sz / 8); + + dma_free_coherent(dev, + (npages - npages_allocated) * 8, + mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + + break; + } + + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + } + } else if (mhop_num == 3) { + for (i = 0; i < mr->l0_chunk_last_num; i++) { + dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], + mr->pbl_l1_dma_addr[i]); + + for (j = 0; j < pbl_bt_sz / 8; j++) { + bt_idx = i * (pbl_bt_sz / 8) + j; + + if ((i == mr->l0_chunk_last_num - 1) + && j == mr->l1_chunk_last_num - 1) { + npages_allocated = bt_idx * + (pbl_bt_sz / 8); + + dma_free_coherent(dev, + (npages - npages_allocated) * 8, + mr->pbl_bt_l2[bt_idx], + mr->pbl_l2_dma_addr[bt_idx]); + + break; + } + + dma_free_coherent(dev, pbl_bt_sz, + mr->pbl_bt_l2[bt_idx], + mr->pbl_l2_dma_addr[bt_idx]); + } + } + } + + kfree(mr->pbl_bt_l1); + kfree(mr->pbl_l1_dma_addr); + mr->pbl_bt_l1 = NULL; + mr->pbl_l1_dma_addr = NULL; + if (mhop_num == 3) { + kfree(mr->pbl_bt_l2); + kfree(mr->pbl_l2_dma_addr); + mr->pbl_bt_l2 = NULL; + mr->pbl_l2_dma_addr = NULL; + } } static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; int npages = 0; int ret; @@ -286,10 +637,18 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, if (mr->size != ~0ULL) { npages = ib_umem_page_count(mr->umem); - dma_free_coherent(dev, (unsigned int)(npages * 8), mr->pbl_buf, - mr->pbl_dma_addr); + + if (!hr_dev->caps.pbl_hop_num) + dma_free_coherent(dev, (unsigned int)(npages * 8), + mr->pbl_buf, mr->pbl_dma_addr); + else + hns_roce_mhop_free(hr_dev, mr); } + if (mr->enabled) + hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, + key_to_hw_index(mr->key)); + hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, key_to_hw_index(mr->key), BITMAP_NO_RR); } @@ -299,7 +658,7 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, { int ret; unsigned long mtpt_idx = key_to_hw_index(mr->key); - struct device *dev = &hr_dev->pdev->dev; + struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; @@ -345,28 +704,44 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, u32 start_index, u32 npages, u64 *page_list) { - u32 i = 0; - __le64 *mtts = NULL; + struct hns_roce_hem_table *table; dma_addr_t dma_handle; + __le64 *mtts; u32 s = start_index * sizeof(u64); + u32 bt_page_size; + u32 i; + + if (mtt->mtt_type == MTT_TYPE_WQE) + bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); + else + bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); /* All MTTs must fit in the same page */ - if (start_index / (PAGE_SIZE / sizeof(u64)) != - (start_index + npages - 1) / (PAGE_SIZE / sizeof(u64))) + if (start_index / (bt_page_size / sizeof(u64)) != + (start_index + npages - 1) / (bt_page_size / sizeof(u64))) return -EINVAL; if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1)) return -EINVAL; - mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table, + if (mtt->mtt_type == MTT_TYPE_WQE) + table = &hr_dev->mr_table.mtt_table; + else + table = &hr_dev->mr_table.mtt_cqe_table; + + mtts = hns_roce_table_find(hr_dev, table, mtt->first_seg + s / hr_dev->caps.mtt_entry_sz, &dma_handle); if (!mtts) return -ENOMEM; /* Save page addr, low 12 bits : 0 */ - for (i = 0; i < npages; ++i) - mtts[i] = (cpu_to_le64(page_list[i])) >> PAGE_ADDR_SHIFT; + for (i = 0; i < npages; ++i) { + if (!hr_dev->caps.mtt_hop_num) + mtts[i] = cpu_to_le64(page_list[i] >> PAGE_ADDR_SHIFT); + else + mtts[i] = cpu_to_le64(page_list[i]); + } return 0; } @@ -377,12 +752,18 @@ static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev, { int chunk; int ret; + u32 bt_page_size; if (mtt->order < 0) return -EINVAL; + if (mtt->mtt_type == MTT_TYPE_WQE) + bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); + else + bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); + while (npages > 0) { - chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages); + chunk = min_t(int, bt_page_size / sizeof(u64), npages); ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk, page_list); @@ -400,9 +781,9 @@ static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev, int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct hns_roce_buf *buf) { - u32 i = 0; - int ret = 0; - u64 *page_list = NULL; + u64 *page_list; + int ret; + u32 i; page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL); if (!page_list) @@ -425,7 +806,7 @@ int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - int ret = 0; + int ret; ret = hns_roce_bitmap_init(&mr_table->mtpt_bitmap, hr_dev->caps.num_mtpts, @@ -439,8 +820,17 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) if (ret) goto err_buddy; + if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) { + ret = hns_roce_buddy_init(&mr_table->mtt_cqe_buddy, + ilog2(hr_dev->caps.num_cqe_segs)); + if (ret) + goto err_buddy_cqe; + } return 0; +err_buddy_cqe: + hns_roce_buddy_cleanup(&mr_table->mtt_buddy); + err_buddy: hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); return ret; @@ -451,13 +841,15 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; hns_roce_buddy_cleanup(&mr_table->mtt_buddy); + if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) + hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); } struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) { - int ret = 0; - struct hns_roce_mr *mr = NULL; + struct hns_roce_mr *mr; + int ret; mr = kmalloc(sizeof(*mr), GFP_KERNEL); if (mr == NULL) @@ -489,25 +881,44 @@ err_free: int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct ib_umem *umem) { + struct device *dev = hr_dev->dev; struct scatterlist *sg; + unsigned int order; int i, k, entry; + int npage = 0; int ret = 0; + int len; + u64 page_addr; u64 *pages; + u32 bt_page_size; u32 n; - int len; - pages = (u64 *) __get_free_page(GFP_KERNEL); + order = mtt->mtt_type == MTT_TYPE_WQE ? hr_dev->caps.mtt_ba_pg_sz : + hr_dev->caps.cqe_ba_pg_sz; + bt_page_size = 1 << (order + PAGE_SHIFT); + + pages = (u64 *) __get_free_pages(GFP_KERNEL, order); if (!pages) return -ENOMEM; i = n = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> mtt->page_shift; + len = sg_dma_len(sg) >> PAGE_SHIFT; for (k = 0; k < len; ++k) { - pages[i++] = sg_dma_address(sg) + - (k << umem->page_shift); - if (i == PAGE_SIZE / sizeof(u64)) { + page_addr = + sg_dma_address(sg) + (k << umem->page_shift); + if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) { + if (page_addr & ((1 << mtt->page_shift) - 1)) { + dev_err(dev, "page_addr 0x%llx is not page_shift %d alignment!\n", + page_addr, mtt->page_shift); + ret = -EINVAL; + goto out; + } + pages[i++] = page_addr; + } + npage++; + if (i == bt_page_size / sizeof(u64)) { ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages); if (ret) @@ -526,16 +937,44 @@ out: return ret; } -static int hns_roce_ib_umem_write_mr(struct hns_roce_mr *mr, +static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr, struct ib_umem *umem) { - int i = 0; - int entry; struct scatterlist *sg; + int i = 0, j = 0, k; + int entry; + int len; + u64 page_addr; + u32 pbl_bt_sz; + + if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0) + return 0; + pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - mr->pbl_buf[i] = ((u64)sg_dma_address(sg)) >> 12; - i++; + len = sg_dma_len(sg) >> PAGE_SHIFT; + for (k = 0; k < len; ++k) { + page_addr = sg_dma_address(sg) + + (k << umem->page_shift); + + if (!hr_dev->caps.pbl_hop_num) { + mr->pbl_buf[i++] = page_addr >> 12; + } else if (hr_dev->caps.pbl_hop_num == 1) { + mr->pbl_buf[i++] = page_addr; + } else { + if (hr_dev->caps.pbl_hop_num == 2) + mr->pbl_bt_l1[i][j] = page_addr; + else if (hr_dev->caps.pbl_hop_num == 3) + mr->pbl_bt_l2[i][j] = page_addr; + + j++; + if (j >= (pbl_bt_sz / 8)) { + i++; + j = 0; + } + } + } } /* Memory barrier */ @@ -549,10 +988,12 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_mr *mr = NULL; - int ret = 0; - int n = 0; + struct device *dev = hr_dev->dev; + struct hns_roce_mr *mr; + int bt_size; + int ret; + int n; + int i; mr = kmalloc(sizeof(*mr), GFP_KERNEL); if (!mr) @@ -573,11 +1014,27 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_umem; } - if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) { - dev_err(dev, " MR len %lld err. MR is limited to 4G at most!\n", - length); - ret = -EINVAL; - goto err_umem; + if (!hr_dev->caps.pbl_hop_num) { + if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) { + dev_err(dev, + " MR len %lld err. MR is limited to 4G at most!\n", + length); + ret = -EINVAL; + goto err_umem; + } + } else { + int pbl_size = 1; + + bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8; + for (i = 0; i < hr_dev->caps.pbl_hop_num; i++) + pbl_size *= bt_size; + if (n > pbl_size) { + dev_err(dev, + " MR len %lld err. MR page num is limited to %d!\n", + length, pbl_size); + ret = -EINVAL; + goto err_umem; + } } ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length, @@ -585,7 +1042,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (ret) goto err_umem; - ret = hns_roce_ib_umem_write_mr(mr, mr->umem); + ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem); if (ret) goto err_mr; @@ -608,6 +1065,129 @@ err_free: return ERR_PTR(ret); } +int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); + struct hns_roce_mr *mr = to_hr_mr(ibmr); + struct hns_roce_cmd_mailbox *mailbox; + struct device *dev = hr_dev->dev; + unsigned long mtpt_idx; + u32 pdn = 0; + int npages; + int ret; + + if (!mr->enabled) + return -EINVAL; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1); + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0, + HNS_ROCE_CMD_QUERY_MPT, + HNS_ROCE_CMD_TIMEOUT_MSECS); + if (ret) + goto free_cmd_mbox; + + ret = hns_roce_hw2sw_mpt(hr_dev, NULL, mtpt_idx); + if (ret) + dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret); + + mr->enabled = 0; + + if (flags & IB_MR_REREG_PD) + pdn = to_hr_pd(pd)->pdn; + + if (flags & IB_MR_REREG_TRANS) { + if (mr->size != ~0ULL) { + npages = ib_umem_page_count(mr->umem); + + if (hr_dev->caps.pbl_hop_num) + hns_roce_mhop_free(hr_dev, mr); + else + dma_free_coherent(dev, npages * 8, mr->pbl_buf, + mr->pbl_dma_addr); + } + ib_umem_release(mr->umem); + + mr->umem = ib_umem_get(ibmr->uobject->context, start, length, + mr_access_flags, 0); + if (IS_ERR(mr->umem)) { + ret = PTR_ERR(mr->umem); + mr->umem = NULL; + goto free_cmd_mbox; + } + npages = ib_umem_page_count(mr->umem); + + if (hr_dev->caps.pbl_hop_num) { + ret = hns_roce_mhop_alloc(hr_dev, npages, mr); + if (ret) + goto release_umem; + } else { + mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, + &(mr->pbl_dma_addr), + GFP_KERNEL); + if (!mr->pbl_buf) { + ret = -ENOMEM; + goto release_umem; + } + } + } + + ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, + mr_access_flags, virt_addr, + length, mailbox->buf); + if (ret) { + if (flags & IB_MR_REREG_TRANS) + goto release_umem; + else + goto free_cmd_mbox; + } + + if (flags & IB_MR_REREG_TRANS) { + ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem); + if (ret) { + if (mr->size != ~0ULL) { + npages = ib_umem_page_count(mr->umem); + + if (hr_dev->caps.pbl_hop_num) + hns_roce_mhop_free(hr_dev, mr); + else + dma_free_coherent(dev, npages * 8, + mr->pbl_buf, + mr->pbl_dma_addr); + } + + goto release_umem; + } + } + + ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, mtpt_idx); + if (ret) { + dev_err(dev, "SW2HW_MPT failed (%d)\n", ret); + goto release_umem; + } + + mr->enabled = 1; + if (flags & IB_MR_REREG_ACCESS) + mr->access = mr_access_flags; + + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return 0; + +release_umem: + ib_umem_release(mr->umem); + +free_cmd_mbox: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} + int hns_roce_dereg_mr(struct ib_mr *ibmr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); |