From eeb8461e36c99fdf2d058751be924a2aab215005 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 28 Jan 2014 13:40:15 +0200 Subject: IB: Refactor umem to use linear SG table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch refactors the IB core umem code and vendor drivers to use a linear (chained) SG table instead of chunk list. With this change the relevant code becomes clearer—no need for nested loops to build and use umem. Signed-off-by: Shachar Raindel Signed-off-by: Yishai Hadas Signed-off-by: Roland Dreier --- drivers/infiniband/core/umem.c | 120 ++++++------- drivers/infiniband/hw/amso1100/c2_provider.c | 23 +-- drivers/infiniband/hw/cxgb3/iwch_provider.c | 19 +- drivers/infiniband/hw/cxgb4/mem.c | 39 ++--- drivers/infiniband/hw/ehca/ehca_classes.h | 2 +- drivers/infiniband/hw/ehca/ehca_mrmw.c | 245 ++++++++++---------------- drivers/infiniband/hw/ipath/ipath_mr.c | 39 ++--- drivers/infiniband/hw/mlx4/doorbell.c | 4 +- drivers/infiniband/hw/mlx4/mr.c | 39 ++--- drivers/infiniband/hw/mlx5/doorbell.c | 4 +- drivers/infiniband/hw/mlx5/mem.c | 80 ++++----- drivers/infiniband/hw/mthca/mthca_provider.c | 42 ++--- drivers/infiniband/hw/nes/nes_verbs.c | 253 ++++++++++++++------------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 66 ++++--- drivers/infiniband/hw/qib/qib_mr.c | 14 +- include/rdma/ib_umem.h | 11 +- 16 files changed, 448 insertions(+), 552 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index a84112322071..a3a2e9c1639b 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -42,29 +42,29 @@ #include "uverbs.h" -#define IB_UMEM_MAX_PAGE_CHUNK \ - ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \ - ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \ - (void *) &((struct ib_umem_chunk *) 0)->page_list[0])) static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) { - struct ib_umem_chunk *chunk, *tmp; + struct scatterlist *sg; + struct page *page; int i; - list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) { - ib_dma_unmap_sg(dev, chunk->page_list, - chunk->nents, DMA_BIDIRECTIONAL); - for (i = 0; i < chunk->nents; ++i) { - struct page *page = sg_page(&chunk->page_list[i]); + if (umem->nmap > 0) + ib_dma_unmap_sg(dev, umem->sg_head.sgl, + umem->nmap, + DMA_BIDIRECTIONAL); - if (umem->writable && dirty) - set_page_dirty_lock(page); - put_page(page); - } + for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { - kfree(chunk); + page = sg_page(sg); + if (umem->writable && dirty) + set_page_dirty_lock(page); + put_page(page); } + + sg_free_table(&umem->sg_head); + return; + } /** @@ -81,15 +81,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, struct ib_umem *umem; struct page **page_list; struct vm_area_struct **vma_list; - struct ib_umem_chunk *chunk; unsigned long locked; unsigned long lock_limit; unsigned long cur_base; unsigned long npages; int ret; - int off; int i; DEFINE_DMA_ATTRS(attrs); + struct scatterlist *sg, *sg_list_start; + int need_release = 0; if (dmasync) dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); @@ -97,7 +97,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (!can_do_mlock()) return ERR_PTR(-EPERM); - umem = kmalloc(sizeof *umem, GFP_KERNEL); + umem = kzalloc(sizeof *umem, GFP_KERNEL); if (!umem) return ERR_PTR(-ENOMEM); @@ -117,8 +117,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, /* We assume the memory is from hugetlb until proved otherwise */ umem->hugetlb = 1; - INIT_LIST_HEAD(&umem->chunk_list); - page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { kfree(umem); @@ -147,7 +145,18 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, cur_base = addr & PAGE_MASK; - ret = 0; + if (npages == 0) { + ret = -EINVAL; + goto out; + } + + ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); + if (ret) + goto out; + + need_release = 1; + sg_list_start = umem->sg_head.sgl; + while (npages) { ret = get_user_pages(current, current->mm, cur_base, min_t(unsigned long, npages, @@ -157,54 +166,38 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (ret < 0) goto out; + umem->npages += ret; cur_base += ret * PAGE_SIZE; npages -= ret; - off = 0; - - while (ret) { - chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) * - min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK), - GFP_KERNEL); - if (!chunk) { - ret = -ENOMEM; - goto out; - } - - chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK); - sg_init_table(chunk->page_list, chunk->nents); - for (i = 0; i < chunk->nents; ++i) { - if (vma_list && - !is_vm_hugetlb_page(vma_list[i + off])) - umem->hugetlb = 0; - sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0); - } - - chunk->nmap = ib_dma_map_sg_attrs(context->device, - &chunk->page_list[0], - chunk->nents, - DMA_BIDIRECTIONAL, - &attrs); - if (chunk->nmap <= 0) { - for (i = 0; i < chunk->nents; ++i) - put_page(sg_page(&chunk->page_list[i])); - kfree(chunk); - - ret = -ENOMEM; - goto out; - } - - ret -= chunk->nents; - off += chunk->nents; - list_add_tail(&chunk->list, &umem->chunk_list); + for_each_sg(sg_list_start, sg, ret, i) { + if (vma_list && !is_vm_hugetlb_page(vma_list[i])) + umem->hugetlb = 0; + + sg_set_page(sg, page_list[i], PAGE_SIZE, 0); } - ret = 0; + /* preparing for next loop */ + sg_list_start = sg; } + umem->nmap = ib_dma_map_sg_attrs(context->device, + umem->sg_head.sgl, + umem->npages, + DMA_BIDIRECTIONAL, + &attrs); + + if (umem->nmap <= 0) { + ret = -ENOMEM; + goto out; + } + + ret = 0; + out: if (ret < 0) { - __ib_umem_release(context->device, umem, 0); + if (need_release) + __ib_umem_release(context->device, umem, 0); kfree(umem); } else current->mm->pinned_vm = locked; @@ -278,17 +271,16 @@ EXPORT_SYMBOL(ib_umem_release); int ib_umem_page_count(struct ib_umem *umem) { - struct ib_umem_chunk *chunk; int shift; int i; int n; + struct scatterlist *sg; shift = ilog2(umem->page_size); n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (i = 0; i < chunk->nmap; ++i) - n += sg_dma_len(&chunk->page_list[i]) >> shift; + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) + n += sg_dma_len(sg) >> shift; return n; } diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index 07eb3a8067d8..8af33cf1fc4e 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -431,9 +431,9 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 *pages; u64 kva = 0; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct c2_pd *c2pd = to_c2pd(pd); struct c2_mr *c2mr; @@ -452,10 +452,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } shift = ffs(c2mr->umem->page_size) - 1; - - n = 0; - list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) - n += chunk->nents; + n = c2mr->umem->nmap; pages = kmalloc(n * sizeof(u64), GFP_KERNEL); if (!pages) { @@ -464,14 +461,12 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } i = 0; - list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) { - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = - sg_dma_address(&chunk->page_list[j]) + - (c2mr->umem->page_size * k); - } + for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = + sg_dma_address(sg) + + (c2mr->umem->page_size * k); } } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index d2283837d451..811b24a539c0 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -618,14 +618,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { __be64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; struct iwch_dev *rhp; struct iwch_pd *php; struct iwch_mr *mhp; struct iwch_reg_user_mr_resp uresp; - + struct scatterlist *sg; PDBG("%s ib_pd %p\n", __func__, pd); php = to_iwch_pd(pd); @@ -645,9 +644,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = ffs(mhp->umem->page_size) - 1; - n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - n += chunk->nents; + n = mhp->umem->nmap; err = iwch_alloc_pbl(mhp, n); if (err) @@ -661,12 +658,10 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; + for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; for (k = 0; k < len; ++k) { - pages[i++] = cpu_to_be64(sg_dma_address( - &chunk->page_list[j]) + + pages[i++] = cpu_to_be64(sg_dma_address(sg) + mhp->umem->page_size * k); if (i == PAGE_SIZE / sizeof *pages) { err = iwch_write_pbl(mhp, pages, i, n); @@ -676,7 +671,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = 0; } } - } + } if (i) err = iwch_write_pbl(mhp, pages, i, n); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 41b11951a30a..392d422b00cb 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -678,9 +678,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { __be64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct c4iw_dev *rhp; struct c4iw_pd *php; struct c4iw_mr *mhp; @@ -710,10 +710,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = ffs(mhp->umem->page_size) - 1; - n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - n += chunk->nents; - + n = mhp->umem->nmap; err = alloc_pbl(mhp, n); if (err) goto err; @@ -726,24 +723,22 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = cpu_to_be64(sg_dma_address( - &chunk->page_list[j]) + - mhp->umem->page_size * k); - if (i == PAGE_SIZE / sizeof *pages) { - err = write_pbl(&mhp->rhp->rdev, - pages, - mhp->attr.pbl_addr + (n << 3), i); - if (err) - goto pbl_done; - n += i; - i = 0; - } + for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = cpu_to_be64(sg_dma_address(sg) + + mhp->umem->page_size * k); + if (i == PAGE_SIZE / sizeof *pages) { + err = write_pbl(&mhp->rhp->rdev, + pages, + mhp->attr.pbl_addr + (n << 3), i); + if (err) + goto pbl_done; + n += i; + i = 0; } } + } if (i) err = write_pbl(&mhp->rhp->rdev, pages, diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index f08f6eaf3fa8..bd45e0f3923f 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -322,7 +322,7 @@ struct ehca_mr_pginfo { } phy; struct { /* type EHCA_MR_PGI_USER section */ struct ib_umem *region; - struct ib_umem_chunk *next_chunk; + struct scatterlist *next_sg; u64 next_nmap; } usr; struct { /* type EHCA_MR_PGI_FMR section */ diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index bcfb0c183620..7168f594d457 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -400,10 +400,7 @@ reg_user_mr_fallback: pginfo.num_hwpages = num_hwpages; pginfo.u.usr.region = e_mr->umem; pginfo.next_hwpage = e_mr->umem->offset / hwpage_size; - pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk, - (&e_mr->umem->chunk_list), - list); - + pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl; ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); @@ -1858,61 +1855,39 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, u64 *kpage) { int ret = 0; - struct ib_umem_chunk *prev_chunk; - struct ib_umem_chunk *chunk; u64 pgaddr; - u32 i = 0; u32 j = 0; int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; - - /* loop over desired chunk entries */ - chunk = pginfo->u.usr.next_chunk; - prev_chunk = pginfo->u.usr.next_chunk; - list_for_each_entry_continue( - chunk, (&(pginfo->u.usr.region->chunk_list)), list) { - for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { - pgaddr = page_to_pfn(sg_page(&chunk->page_list[i])) - << PAGE_SHIFT ; - *kpage = pgaddr + (pginfo->next_hwpage * - pginfo->hwpage_size); - if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%llx " - "chunk->page_list[i]=%llx " - "i=%x next_hwpage=%llx", - pgaddr, (u64)sg_dma_address( - &chunk->page_list[i]), - i, pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - (pginfo->next_hwpage)++; - kpage++; - if (pginfo->next_hwpage % hwpages_per_kpage == 0) { - (pginfo->kpage_cnt)++; - (pginfo->u.usr.next_nmap)++; - pginfo->next_hwpage = 0; - i++; - } - j++; - if (j >= number) break; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { + pgaddr = page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT; + *kpage = pgaddr + (pginfo->next_hwpage * + pginfo->hwpage_size); + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx " + "sg_dma_address=%llx " + "entry=%llx next_hwpage=%llx", + pgaddr, (u64)sg_dma_address(*sg), + pginfo->u.usr.next_nmap, + pginfo->next_hwpage); + return -EFAULT; } - if ((pginfo->u.usr.next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + kpage++; + if (pginfo->next_hwpage % hwpages_per_kpage == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.usr.next_nmap)++; + pginfo->next_hwpage = 0; + *sg = sg_next(*sg); + } + j++; + if (j >= number) break; - else - prev_chunk = chunk; } - pginfo->u.usr.next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->u.usr.region->chunk_list)), - list); + return ret; } @@ -1920,20 +1895,19 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, * check given pages for contiguous layout * last page addr is returned in prev_pgaddr for further check */ -static int ehca_check_kpages_per_ate(struct scatterlist *page_list, - int start_idx, int end_idx, +static int ehca_check_kpages_per_ate(struct scatterlist **sg, + int num_pages, u64 *prev_pgaddr) { - int t; - for (t = start_idx; t <= end_idx; t++) { - u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT; + for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) { + u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT; if (ehca_debug_level >= 3) ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, *(u64 *)__va(pgaddr)); if (pgaddr - PAGE_SIZE != *prev_pgaddr) { ehca_gen_err("uncontiguous page found pgaddr=%llx " - "prev_pgaddr=%llx page_list_i=%x", - pgaddr, *prev_pgaddr, t); + "prev_pgaddr=%llx entries_left_in_hwpage=%x", + pgaddr, *prev_pgaddr, num_pages); return -EINVAL; } *prev_pgaddr = pgaddr; @@ -1947,111 +1921,80 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, u64 *kpage) { int ret = 0; - struct ib_umem_chunk *prev_chunk; - struct ib_umem_chunk *chunk; u64 pgaddr, prev_pgaddr; - u32 i = 0; u32 j = 0; int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; int nr_kpages = kpages_per_hwpage; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { - /* loop over desired chunk entries */ - chunk = pginfo->u.usr.next_chunk; - prev_chunk = pginfo->u.usr.next_chunk; - list_for_each_entry_continue( - chunk, (&(pginfo->u.usr.region->chunk_list)), list) { - for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { - if (nr_kpages == kpages_per_hwpage) { - pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i])) - << PAGE_SHIFT ); - *kpage = pgaddr; - if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%llx i=%x", - pgaddr, i); + if (nr_kpages == kpages_per_hwpage) { + pgaddr = (page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT); + *kpage = pgaddr; + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx entry=%llx", + pgaddr, pginfo->u.usr.next_nmap); + ret = -EFAULT; + return ret; + } + /* + * The first page in a hwpage must be aligned; + * the first MR page is exempt from this rule. + */ + if (pgaddr & (pginfo->hwpage_size - 1)) { + if (pginfo->hwpage_cnt) { + ehca_gen_err( + "invalid alignment " + "pgaddr=%llx entry=%llx " + "mr_pgsize=%llx", + pgaddr, pginfo->u.usr.next_nmap, + pginfo->hwpage_size); ret = -EFAULT; return ret; } - /* - * The first page in a hwpage must be aligned; - * the first MR page is exempt from this rule. - */ - if (pgaddr & (pginfo->hwpage_size - 1)) { - if (pginfo->hwpage_cnt) { - ehca_gen_err( - "invalid alignment " - "pgaddr=%llx i=%x " - "mr_pgsize=%llx", - pgaddr, i, - pginfo->hwpage_size); - ret = -EFAULT; - return ret; - } - /* first MR page */ - pginfo->kpage_cnt = - (pgaddr & - (pginfo->hwpage_size - 1)) >> - PAGE_SHIFT; - nr_kpages -= pginfo->kpage_cnt; - *kpage = pgaddr & - ~(pginfo->hwpage_size - 1); - } - if (ehca_debug_level >= 3) { - u64 val = *(u64 *)__va(pgaddr); - ehca_gen_dbg("kpage=%llx chunk_page=%llx " - "value=%016llx", - *kpage, pgaddr, val); - } - prev_pgaddr = pgaddr; - i++; - pginfo->kpage_cnt++; - pginfo->u.usr.next_nmap++; - nr_kpages--; - if (!nr_kpages) - goto next_kpage; - continue; + /* first MR page */ + pginfo->kpage_cnt = + (pgaddr & + (pginfo->hwpage_size - 1)) >> + PAGE_SHIFT; + nr_kpages -= pginfo->kpage_cnt; + *kpage = pgaddr & + ~(pginfo->hwpage_size - 1); } - if (i + nr_kpages > chunk->nmap) { - ret = ehca_check_kpages_per_ate( - chunk->page_list, i, - chunk->nmap - 1, &prev_pgaddr); - if (ret) return ret; - pginfo->kpage_cnt += chunk->nmap - i; - pginfo->u.usr.next_nmap += chunk->nmap - i; - nr_kpages -= chunk->nmap - i; - break; + if (ehca_debug_level >= 3) { + u64 val = *(u64 *)__va(pgaddr); + ehca_gen_dbg("kpage=%llx page=%llx " + "value=%016llx", + *kpage, pgaddr, val); } + prev_pgaddr = pgaddr; + *sg = sg_next(*sg); + pginfo->kpage_cnt++; + pginfo->u.usr.next_nmap++; + nr_kpages--; + if (!nr_kpages) + goto next_kpage; + continue; + } + + ret = ehca_check_kpages_per_ate(sg, nr_kpages, + &prev_pgaddr); + if (ret) + return ret; + pginfo->kpage_cnt += nr_kpages; + pginfo->u.usr.next_nmap += nr_kpages; - ret = ehca_check_kpages_per_ate(chunk->page_list, i, - i + nr_kpages - 1, - &prev_pgaddr); - if (ret) return ret; - i += nr_kpages; - pginfo->kpage_cnt += nr_kpages; - pginfo->u.usr.next_nmap += nr_kpages; next_kpage: - nr_kpages = kpages_per_hwpage; - (pginfo->hwpage_cnt)++; - kpage++; - j++; - if (j >= number) break; - } - if ((pginfo->u.usr.next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) + nr_kpages = kpages_per_hwpage; + (pginfo->hwpage_cnt)++; + kpage++; + j++; + if (j >= number) break; - else - prev_chunk = chunk; } - pginfo->u.usr.next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->u.usr.region->chunk_list)), - list); + return ret; } diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index e346d3890a0e..5e61e9bff697 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -188,8 +188,8 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct ipath_mr *mr; struct ib_umem *umem; - struct ib_umem_chunk *chunk; - int n, m, i; + int n, m, entry; + struct scatterlist *sg; struct ib_mr *ret; if (length == 0) { @@ -202,10 +202,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (IS_ERR(umem)) return (void *) umem; - n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - n += chunk->nents; - + n = umem->nmap; mr = alloc_mr(n, &to_idev(pd->device)->lk_table); if (!mr) { ret = ERR_PTR(-ENOMEM); @@ -224,22 +221,20 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, m = 0; n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) { - for (i = 0; i < chunk->nents; i++) { - void *vaddr; - - vaddr = page_address(sg_page(&chunk->page_list[i])); - if (!vaddr) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - mr->mr.map[m]->segs[n].vaddr = vaddr; - mr->mr.map[m]->segs[n].length = umem->page_size; - n++; - if (n == IPATH_SEGSZ) { - m++; - n = 0; - } + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + void *vaddr; + + vaddr = page_address(sg_page(sg)); + if (!vaddr) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + mr->mr.map[m]->segs[n].vaddr = vaddr; + mr->mr.map[m]->segs[n].length = umem->page_size; + n++; + if (n == IPATH_SEGSZ) { + m++; + n = 0; } } ret = &mr->ibmr; diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index 8aee4233b388..c51740986367 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -45,7 +45,6 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, struct mlx4_db *db) { struct mlx4_ib_user_db_page *page; - struct ib_umem_chunk *chunk; int err = 0; mutex_lock(&context->db_page_mutex); @@ -73,8 +72,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, list_add(&page->list, &context->db_page_list); found: - chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list); - db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK); + db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); db->u.user_page = page; ++page->refcnt; diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index e471f089ff00..cb2a8727f3fb 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -90,11 +90,11 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_umem *umem) { u64 *pages; - struct ib_umem_chunk *chunk; - int i, j, k; + int i, k, entry; int n; int len; int err = 0; + struct scatterlist *sg; pages = (u64 *) __get_free_page(GFP_KERNEL); if (!pages) @@ -102,26 +102,25 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, i = n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift; - for (k = 0; k < len; ++k) { - pages[i++] = sg_dma_address(&chunk->page_list[j]) + - umem->page_size * k; - /* - * Be friendly to mlx4_write_mtt() and - * pass it chunks of appropriate size. - */ - if (i == PAGE_SIZE / sizeof (u64)) { - err = mlx4_write_mtt(dev->dev, mtt, n, - i, pages); - if (err) - goto out; - n += i; - i = 0; - } + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> mtt->page_shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(sg) + + umem->page_size * k; + /* + * Be friendly to mlx4_write_mtt() and + * pass it chunks of appropriate size. + */ + if (i == PAGE_SIZE / sizeof (u64)) { + err = mlx4_write_mtt(dev->dev, mtt, n, + i, pages); + if (err) + goto out; + n += i; + i = 0; } } + } if (i) err = mlx4_write_mtt(dev->dev, mtt, n, i, pages); diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index 256a23344f28..ece028fc47d6 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -47,7 +47,6 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, struct mlx5_db *db) { struct mlx5_ib_user_db_page *page; - struct ib_umem_chunk *chunk; int err = 0; mutex_lock(&context->db_page_mutex); @@ -75,8 +74,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, list_add(&page->list, &context->db_page_list); found: - chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list); - db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK); + db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); db->u.user_page = page; ++page->refcnt; diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 3a5322870b96..8499aec94db6 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -44,16 +44,17 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, int *ncont, int *order) { - struct ib_umem_chunk *chunk; unsigned long tmp; unsigned long m; - int i, j, k; + int i, k; u64 base = 0; int p = 0; int skip; int mask; u64 len; u64 pfn; + struct scatterlist *sg; + int entry; addr = addr >> PAGE_SHIFT; tmp = (unsigned long)addr; @@ -61,32 +62,31 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, skip = 1 << m; mask = skip - 1; i = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (j = 0; j < chunk->nmap; j++) { - len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT; - pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT; - for (k = 0; k < len; k++) { - if (!(i & mask)) { - tmp = (unsigned long)pfn; - m = min(m, find_first_bit(&tmp, sizeof(tmp))); + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> PAGE_SHIFT; + pfn = sg_dma_address(sg) >> PAGE_SHIFT; + for (k = 0; k < len; k++) { + if (!(i & mask)) { + tmp = (unsigned long)pfn; + m = min(m, find_first_bit(&tmp, sizeof(tmp))); + skip = 1 << m; + mask = skip - 1; + base = pfn; + p = 0; + } else { + if (base + p != pfn) { + tmp = (unsigned long)p; + m = find_first_bit(&tmp, sizeof(tmp)); skip = 1 << m; mask = skip - 1; base = pfn; p = 0; - } else { - if (base + p != pfn) { - tmp = (unsigned long)p; - m = find_first_bit(&tmp, sizeof(tmp)); - skip = 1 << m; - mask = skip - 1; - base = pfn; - p = 0; - } } - p++; - i++; } + p++; + i++; } + } if (i) { m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); @@ -112,32 +112,32 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, { int shift = page_shift - PAGE_SHIFT; int mask = (1 << shift) - 1; - struct ib_umem_chunk *chunk; - int i, j, k; + int i, k; u64 cur = 0; u64 base; int len; + struct scatterlist *sg; + int entry; i = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (j = 0; j < chunk->nmap; j++) { - len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT; - base = sg_dma_address(&chunk->page_list[j]); - for (k = 0; k < len; k++) { - if (!(i & mask)) { - cur = base + (k << PAGE_SHIFT); - if (umr) - cur |= 3; + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> PAGE_SHIFT; + base = sg_dma_address(sg); + for (k = 0; k < len; k++) { + if (!(i & mask)) { + cur = base + (k << PAGE_SHIFT); + if (umr) + cur |= 3; - pas[i >> shift] = cpu_to_be64(cur); - mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", - i >> shift, be64_to_cpu(pas[i >> shift])); - } else - mlx5_ib_dbg(dev, "=====> 0x%llx\n", - base + (k << PAGE_SHIFT)); - i++; - } + pas[i >> shift] = cpu_to_be64(cur); + mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", + i >> shift, be64_to_cpu(pas[i >> shift])); + } else + mlx5_ib_dbg(dev, "=====> 0x%llx\n", + base + (k << PAGE_SHIFT)); + i++; } + } } int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 5b71d43bd89c..64408000f1c7 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -976,12 +976,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata) { struct mthca_dev *dev = to_mdev(pd->device); - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct mthca_mr *mr; struct mthca_reg_mr ucmd; u64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; int write_mtt_size; @@ -1009,10 +1009,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } shift = ffs(mr->umem->page_size) - 1; - - n = 0; - list_for_each_entry(chunk, &mr->umem->chunk_list, list) - n += chunk->nents; + n = mr->umem->nmap; mr->mtt = mthca_alloc_mtt(dev, n); if (IS_ERR(mr->mtt)) { @@ -1030,25 +1027,24 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); - list_for_each_entry(chunk, &mr->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = sg_dma_address(&chunk->page_list[j]) + - mr->umem->page_size * k; - /* - * Be friendly to write_mtt and pass it chunks - * of appropriate size. - */ - if (i == write_mtt_size) { - err = mthca_write_mtt(dev, mr->mtt, n, pages, i); - if (err) - goto mtt_done; - n += i; - i = 0; - } + for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(sg) + + mr->umem->page_size * k; + /* + * Be friendly to write_mtt and pass it chunks + * of appropriate size. + */ + if (i == write_mtt_size) { + err = mthca_write_mtt(dev, mr->mtt, n, pages, i); + if (err) + goto mtt_done; + n += i; + i = 0; } } + } if (i) err = mthca_write_mtt(dev, mr->mtt, n, pages, i); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 8308e3634767..32d3682daaf5 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -2307,7 +2307,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; struct ib_mr *ibmr = ERR_PTR(-EINVAL); - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct nes_ucontext *nes_ucontext; struct nes_pbl *nespbl; struct nes_mr *nesmr; @@ -2315,7 +2315,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct nes_mem_reg_req req; struct nes_vpbl vpbl; struct nes_root_vpbl root_vpbl; - int nmap_index, page_index; + int entry, page_index; int page_count = 0; int err, pbl_depth = 0; int chunk_pages; @@ -2330,6 +2330,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u16 pbl_count; u8 single_page = 1; u8 stag_key; + int first_page = 1; region = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(region)) { @@ -2380,128 +2381,125 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } nesmr->region = region; - list_for_each_entry(chunk, ®ion->chunk_list, list) { - nes_debug(NES_DBG_MR, "Chunk: nents = %u, nmap = %u .\n", - chunk->nents, chunk->nmap); - for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { - if (sg_dma_address(&chunk->page_list[nmap_index]) & ~PAGE_MASK) { - ib_umem_release(region); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n", - (unsigned int) sg_dma_address(&chunk->page_list[nmap_index])); - ibmr = ERR_PTR(-EINVAL); - kfree(nesmr); - goto reg_user_mr_err; - } + for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) { + if (sg_dma_address(sg) & ~PAGE_MASK) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n", + (unsigned int) sg_dma_address(sg)); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_user_mr_err; + } - if (!sg_dma_len(&chunk->page_list[nmap_index])) { - ib_umem_release(region); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - stag_index); - nes_debug(NES_DBG_MR, "Invalid Buffer Size\n"); - ibmr = ERR_PTR(-EINVAL); - kfree(nesmr); - goto reg_user_mr_err; - } + if (!sg_dma_len(sg)) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); + nes_debug(NES_DBG_MR, "Invalid Buffer Size\n"); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_user_mr_err; + } - region_length += sg_dma_len(&chunk->page_list[nmap_index]); - chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12; - region_length -= skip_pages << 12; - for (page_index=skip_pages; page_index < chunk_pages; page_index++) { - skip_pages = 0; - if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length) - goto enough_pages; - if ((page_count&0x01FF) == 0) { - if (page_count >= 1024 * 512) { + region_length += sg_dma_len(sg); + chunk_pages = sg_dma_len(sg) >> 12; + region_length -= skip_pages << 12; + for (page_index = skip_pages; page_index < chunk_pages; page_index++) { + skip_pages = 0; + if ((page_count != 0) && (page_count<<12)-(region->offset&(4096-1)) >= region->length) + goto enough_pages; + if ((page_count&0x01FF) == 0) { + if (page_count >= 1024 * 512) { + ib_umem_release(region); + nes_free_resource(nesadapter, + nesadapter->allocated_mrs, stag_index); + kfree(nesmr); + ibmr = ERR_PTR(-E2BIG); + goto reg_user_mr_err; + } + if (root_pbl_index == 1) { + root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, + 8192, &root_vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n", + root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase); + if (!root_vpbl.pbl_vbase) { ib_umem_release(region); - nes_free_resource(nesadapter, - nesadapter->allocated_mrs, stag_index); + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); kfree(nesmr); - ibmr = ERR_PTR(-E2BIG); + ibmr = ERR_PTR(-ENOMEM); goto reg_user_mr_err; } - if (root_pbl_index == 1) { - root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, - 8192, &root_vpbl.pbl_pbase); - nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n", - root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase); - if (!root_vpbl.pbl_vbase) { - ib_umem_release(region); - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - stag_index); - kfree(nesmr); - ibmr = ERR_PTR(-ENOMEM); - goto reg_user_mr_err; - } - root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, - GFP_KERNEL); - if (!root_vpbl.leaf_vpbl) { - ib_umem_release(region); - pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, - root_vpbl.pbl_pbase); - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - stag_index); - kfree(nesmr); - ibmr = ERR_PTR(-ENOMEM); - goto reg_user_mr_err; - } - root_vpbl.pbl_vbase[0].pa_low = - cpu_to_le32((u32)vpbl.pbl_pbase); - root_vpbl.pbl_vbase[0].pa_high = - cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); - root_vpbl.leaf_vpbl[0] = vpbl; - } - vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, - &vpbl.pbl_pbase); - nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n", - vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase); - if (!vpbl.pbl_vbase) { + root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, + GFP_KERNEL); + if (!root_vpbl.leaf_vpbl) { ib_umem_release(region); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - ibmr = ERR_PTR(-ENOMEM); + pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, + root_vpbl.pbl_pbase); + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); kfree(nesmr); + ibmr = ERR_PTR(-ENOMEM); goto reg_user_mr_err; } - if (1 <= root_pbl_index) { - root_vpbl.pbl_vbase[root_pbl_index].pa_low = - cpu_to_le32((u32)vpbl.pbl_pbase); - root_vpbl.pbl_vbase[root_pbl_index].pa_high = - cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32))); - root_vpbl.leaf_vpbl[root_pbl_index] = vpbl; - } - root_pbl_index++; - cur_pbl_index = 0; + root_vpbl.pbl_vbase[0].pa_low = + cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[0].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); + root_vpbl.leaf_vpbl[0] = vpbl; } - if (single_page) { - if (page_count != 0) { - if ((last_dma_addr+4096) != - (sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096))) - single_page = 0; - last_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096); - } else { - first_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096); - last_dma_addr = first_dma_addr; - } + vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, + &vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n", + vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase); + if (!vpbl.pbl_vbase) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + ibmr = ERR_PTR(-ENOMEM); + kfree(nesmr); + goto reg_user_mr_err; + } + if (1 <= root_pbl_index) { + root_vpbl.pbl_vbase[root_pbl_index].pa_low = + cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[root_pbl_index].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32))); + root_vpbl.leaf_vpbl[root_pbl_index] = vpbl; + } + root_pbl_index++; + cur_pbl_index = 0; + } + if (single_page) { + if (page_count != 0) { + if ((last_dma_addr+4096) != + (sg_dma_address(sg)+ + (page_index*4096))) + single_page = 0; + last_dma_addr = sg_dma_address(sg)+ + (page_index*4096); + } else { + first_dma_addr = sg_dma_address(sg)+ + (page_index*4096); + last_dma_addr = first_dma_addr; } - - vpbl.pbl_vbase[cur_pbl_index].pa_low = - cpu_to_le32((u32)(sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096))); - vpbl.pbl_vbase[cur_pbl_index].pa_high = - cpu_to_le32((u32)((((u64)(sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096))) >> 32))); - cur_pbl_index++; - page_count++; } + + vpbl.pbl_vbase[cur_pbl_index].pa_low = + cpu_to_le32((u32)(sg_dma_address(sg)+ + (page_index*4096))); + vpbl.pbl_vbase[cur_pbl_index].pa_high = + cpu_to_le32((u32)((((u64)(sg_dma_address(sg)+ + (page_index*4096))) >> 32))); + cur_pbl_index++; + page_count++; } } + enough_pages: nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x," " stag_key=0x%08x\n", @@ -2613,25 +2611,28 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nespbl->pbl_size, (unsigned long) nespbl->pbl_pbase, (void *) nespbl->pbl_vbase, nespbl->user_base); - list_for_each_entry(chunk, ®ion->chunk_list, list) { - for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { - chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12; - chunk_pages += (sg_dma_len(&chunk->page_list[nmap_index]) & (4096-1)) ? 1 : 0; - nespbl->page = sg_page(&chunk->page_list[0]); - for (page_index=0; page_indexpage_list[nmap_index])+ - (page_index*4096))); - ((__le32 *)pbl)[1] = cpu_to_le32(((u64) - (sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096)))>>32); - nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl, - (unsigned long long)*pbl, - le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0])); - pbl++; - } + for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) { + chunk_pages = sg_dma_len(sg) >> 12; + chunk_pages += (sg_dma_len(sg) & (4096-1)) ? 1 : 0; + if (first_page) { + nespbl->page = sg_page(sg); + first_page = 0; + } + + for (page_index = 0; page_index < chunk_pages; page_index++) { + ((__le32 *)pbl)[0] = cpu_to_le32((u32) + (sg_dma_address(sg)+ + (page_index*4096))); + ((__le32 *)pbl)[1] = cpu_to_le32(((u64) + (sg_dma_address(sg)+ + (page_index*4096)))>>32); + nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl, + (unsigned long long)*pbl, + le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0])); + pbl++; } } + if (req.reg_type == IWNES_MEMREG_TYPE_QP) { list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list); } else { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e0cc201be41a..0de3473fa7d9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -726,10 +726,10 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr, u32 num_pbes) { struct ocrdma_pbe *pbe; - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table; struct ib_umem *umem = mr->umem; - int i, shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0; + int shift, pg_cnt, pages, pbe_cnt, entry, total_num_pbes = 0; if (!mr->hwmr.num_pbes) return; @@ -739,39 +739,37 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr, shift = ilog2(umem->page_size); - list_for_each_entry(chunk, &umem->chunk_list, list) { - /* get all the dma regions from the chunk. */ - for (i = 0; i < chunk->nmap; i++) { - pages = sg_dma_len(&chunk->page_list[i]) >> shift; - for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { - /* store the page address in pbe */ - pbe->pa_lo = - cpu_to_le32(sg_dma_address - (&chunk->page_list[i]) + - (umem->page_size * pg_cnt)); - pbe->pa_hi = - cpu_to_le32(upper_32_bits - ((sg_dma_address - (&chunk->page_list[i]) + - umem->page_size * pg_cnt))); - pbe_cnt += 1; - total_num_pbes += 1; - pbe++; - - /* if done building pbes, issue the mbx cmd. */ - if (total_num_pbes == num_pbes) - return; - - /* if the given pbl is full storing the pbes, - * move to next pbl. - */ - if (pbe_cnt == - (mr->hwmr.pbl_size / sizeof(u64))) { - pbl_tbl++; - pbe = (struct ocrdma_pbe *)pbl_tbl->va; - pbe_cnt = 0; - } + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + pages = sg_dma_len(sg) >> shift; + for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { + /* store the page address in pbe */ + pbe->pa_lo = + cpu_to_le32(sg_dma_address + (sg) + + (umem->page_size * pg_cnt)); + pbe->pa_hi = + cpu_to_le32(upper_32_bits + ((sg_dma_address + (sg) + + umem->page_size * pg_cnt))); + pbe_cnt += 1; + total_num_pbes += 1; + pbe++; + + /* if done building pbes, issue the mbx cmd. */ + if (total_num_pbes == num_pbes) + return; + + /* if the given pbl is full storing the pbes, + * move to next pbl. + */ + if (pbe_cnt == + (mr->hwmr.pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + pbe_cnt = 0; } + } } } diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index e6687ded8210..9bbb55347cc1 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -232,8 +232,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct qib_mr *mr; struct ib_umem *umem; - struct ib_umem_chunk *chunk; - int n, m, i; + struct scatterlist *sg; + int n, m, entry; struct ib_mr *ret; if (length == 0) { @@ -246,9 +246,7 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (IS_ERR(umem)) return (void *) umem; - n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - n += chunk->nents; + n = umem->nmap; mr = alloc_mr(n, pd); if (IS_ERR(mr)) { @@ -268,11 +266,10 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mr.page_shift = ilog2(umem->page_size); m = 0; n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) { - for (i = 0; i < chunk->nents; i++) { + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { void *vaddr; - vaddr = page_address(sg_page(&chunk->page_list[i])); + vaddr = page_address(sg_page(sg)); if (!vaddr) { ret = ERR_PTR(-EINVAL); goto bail; @@ -284,7 +281,6 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, m++; n = 0; } - } } ret = &mr->ibmr; diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 9ee0d2e51b16..1ea0b65c4cfb 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -46,17 +46,12 @@ struct ib_umem { int page_size; int writable; int hugetlb; - struct list_head chunk_list; struct work_struct work; struct mm_struct *mm; unsigned long diff; -}; - -struct ib_umem_chunk { - struct list_head list; - int nents; - int nmap; - struct scatterlist page_list[0]; + struct sg_table sg_head; + int nmap; + int npages; }; #ifdef CONFIG_INFINIBAND_USER_MEM -- cgit v1.2.3 From 17cd3a2db825506c3e3bb9548ad20f67e2f8d0e7 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:04 +0200 Subject: IB/core: Introduce protected memory regions This commit introduces verbs for creating/destoying memory regions which will allow new types of memory key operations such as protected memory registration. Indirect memory registration is registering several (one of more) pre-registered memory regions in a specific layout. The Indirect region may potentialy describe several regions and some repitition format between them. Protected Memory registration is registering a memory region with various data integrity attributes which will describe protection schemes that will be handled by the HCA in an offloaded manner. These memory regions will be applicable for a new REG_SIG_MR work request introduced later in this patchset. In the future these routines may replace or implement current memory regions creation routines existing today: - ib_reg_user_mr - ib_alloc_fast_reg_mr - ib_get_dma_mr - ib_dereg_mr Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 39 +++++++++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 3ac795115438..ca8ce7d097e3 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1169,6 +1169,45 @@ int ib_dereg_mr(struct ib_mr *mr) } EXPORT_SYMBOL(ib_dereg_mr); +struct ib_mr *ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr) +{ + struct ib_mr *mr; + + if (!pd->device->create_mr) + return ERR_PTR(-ENOSYS); + + mr = pd->device->create_mr(pd, mr_init_attr); + + if (!IS_ERR(mr)) { + mr->device = pd->device; + mr->pd = pd; + mr->uobject = NULL; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + } + + return mr; +} +EXPORT_SYMBOL(ib_create_mr); + +int ib_destroy_mr(struct ib_mr *mr) +{ + struct ib_pd *pd; + int ret; + + if (atomic_read(&mr->usecnt)) + return -EBUSY; + + pd = mr->pd; + ret = mr->device->destroy_mr(mr); + if (!ret) + atomic_dec(&pd->usecnt); + + return ret; +} +EXPORT_SYMBOL(ib_destroy_mr); + struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { struct ib_mr *mr; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6793f32ccb58..cb12e6a4e553 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -461,6 +461,22 @@ int ib_rate_to_mult(enum ib_rate rate) __attribute_const__; */ int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__; +enum ib_mr_create_flags { + IB_MR_SIGNATURE_EN = 1, +}; + +/** + * ib_mr_init_attr - Memory region init attributes passed to routine + * ib_create_mr. + * @max_reg_descriptors: max number of registration descriptors that + * may be used with registration work requests. + * @flags: MR creation flags bit mask. + */ +struct ib_mr_init_attr { + int max_reg_descriptors; + u32 flags; +}; + /** * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate * enum. @@ -1407,6 +1423,9 @@ struct ib_device { int (*query_mr)(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int (*dereg_mr)(struct ib_mr *mr); + int (*destroy_mr)(struct ib_mr *mr); + struct ib_mr * (*create_mr)(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr); struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd, int max_page_list_len); struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device, @@ -2250,6 +2269,25 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); */ int ib_dereg_mr(struct ib_mr *mr); + +/** + * ib_create_mr - Allocates a memory region that may be used for + * signature handover operations. + * @pd: The protection domain associated with the region. + * @mr_init_attr: memory region init attributes. + */ +struct ib_mr *ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr); + +/** + * ib_destroy_mr - Destroys a memory region that was created using + * ib_create_mr and removes it from HW translation tables. + * @mr: The memory region to destroy. + * + * This function can fail, if the memory region has memory windows bound to it. + */ +int ib_destroy_mr(struct ib_mr *mr); + /** * ib_alloc_fast_reg_mr - Allocates memory region usable with the * IB_WR_FAST_REG_MR send work request. -- cgit v1.2.3 From 1b01d33560e78417334c2dc673bbfac6c644424c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:05 +0200 Subject: IB/core: Introduce signature verbs API Introduce a verbs interface for signature-related operations. A signature handover operation configures the layouts of data and protection attributes both in memory and wire domains. Signature operations are: - INSERT: Generate and insert protection information when handing over data from input space to output space. - validate and STRIP: Validate protection information and remove it when handing over data from input space to output space. - validate and PASS: Validate protection information and pass it when handing over data from input space to output space. Once the signature handover opration is done, the HCA will offload data integrity generation/validation while performing the actual data transfer. Additions: 1. HCA signature capabilities in device attributes Verbs provider supporting signature handover operations fills relevant fields in device attributes structure returned by ib_query_device. 2. QP creation flag IB_QP_CREATE_SIGNATURE_EN Creating a QP that will carry signature handover operations may require some special preparations from the verbs provider. So we add QP creation flag IB_QP_CREATE_SIGNATURE_EN to declare that the created QP may carry out signature handover operations. Expose signature support to verbs layer (no support for now). 3. New send work request IB_WR_REG_SIG_MR Signature handover work request. This WR will define the signature handover properties of the memory/wire domains as well as the domains layout. The purpose of this work request is to bind all the needed information for the signature operation: - data to be transferred: wr->sg_list (ib_sge). * The raw data, pre-registered to a single MR (normally, before signature, this MR would have been used directly for the data transfer) - data protection guards: sig_handover.prot (ib_sge). * The data protection buffer, pre-registered to a single MR, which contains the data integrity guards of the raw data blocks. Note that it may not always exist, only in cases where the user is interested in storing protection guards in memory. - signature operation attributes: sig_handover.sig_attrs. * Tells the HCA how to validate/generate the protection information. Once the work request is executed, the memory region that will describe the signature transaction will be the sig_mr. The application can now go ahead and send the sig_mr.rkey or use the sig_mr.lkey for data transfer. 4. New Verb ib_check_mr_status check_mr_status verb checks the status of the memory region post transaction. The first check that may be used is IB_MR_CHECK_SIG_STATUS, which will indicate if any signature errors are pending for a specific signature-enabled ib_mr. This verb is a lightwight check and is allowed to be taken from interrupt context. An application must call this verb after it is known that the actual data transfer has finished. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 8 +++ include/rdma/ib_verbs.h | 149 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index ca8ce7d097e3..92525f855d82 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1437,3 +1437,11 @@ int ib_destroy_flow(struct ib_flow *flow_id) return err; } EXPORT_SYMBOL(ib_destroy_flow); + +int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, + struct ib_mr_status *mr_status) +{ + return mr->device->check_mr_status ? + mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; +} +EXPORT_SYMBOL(ib_check_mr_status); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index cb12e6a4e553..82ab5c1e7605 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -122,7 +122,19 @@ enum ib_device_cap_flags { IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23), IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24), - IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29) + IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29), + IB_DEVICE_SIGNATURE_HANDOVER = (1<<30) +}; + +enum ib_signature_prot_cap { + IB_PROT_T10DIF_TYPE_1 = 1, + IB_PROT_T10DIF_TYPE_2 = 1 << 1, + IB_PROT_T10DIF_TYPE_3 = 1 << 2, +}; + +enum ib_signature_guard_cap { + IB_GUARD_T10DIF_CRC = 1, + IB_GUARD_T10DIF_CSUM = 1 << 1, }; enum ib_atomic_cap { @@ -172,6 +184,8 @@ struct ib_device_attr { unsigned int max_fast_reg_page_list_len; u16 max_pkeys; u8 local_ca_ack_delay; + int sig_prot_cap; + int sig_guard_cap; }; enum ib_mtu { @@ -477,6 +491,114 @@ struct ib_mr_init_attr { u32 flags; }; +enum ib_signature_type { + IB_SIG_TYPE_T10_DIF, +}; + +/** + * T10-DIF Signature types + * T10-DIF types are defined by SCSI + * specifications. + */ +enum ib_t10_dif_type { + IB_T10DIF_NONE, + IB_T10DIF_TYPE1, + IB_T10DIF_TYPE2, + IB_T10DIF_TYPE3 +}; + +/** + * Signature T10-DIF block-guard types + * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules. + * IB_T10DIF_CSUM: Corresponds to IP checksum rules. + */ +enum ib_t10_dif_bg_type { + IB_T10DIF_CRC, + IB_T10DIF_CSUM +}; + +/** + * struct ib_t10_dif_domain - Parameters specific for T10-DIF + * domain. + * @type: T10-DIF type (0|1|2|3) + * @bg_type: T10-DIF block guard type (CRC|CSUM) + * @pi_interval: protection information interval. + * @bg: seed of guard computation. + * @app_tag: application tag of guard block + * @ref_tag: initial guard block reference tag. + * @type3_inc_reftag: T10-DIF type 3 does not state + * about the reference tag, it is the user + * choice to increment it or not. + */ +struct ib_t10_dif_domain { + enum ib_t10_dif_type type; + enum ib_t10_dif_bg_type bg_type; + u16 pi_interval; + u16 bg; + u16 app_tag; + u32 ref_tag; + bool type3_inc_reftag; +}; + +/** + * struct ib_sig_domain - Parameters for signature domain + * @sig_type: specific signauture type + * @sig: union of all signature domain attributes that may + * be used to set domain layout. + */ +struct ib_sig_domain { + enum ib_signature_type sig_type; + union { + struct ib_t10_dif_domain dif; + } sig; +}; + +/** + * struct ib_sig_attrs - Parameters for signature handover operation + * @check_mask: bitmask for signature byte check (8 bytes) + * @mem: memory domain layout desciptor. + * @wire: wire domain layout desciptor. + */ +struct ib_sig_attrs { + u8 check_mask; + struct ib_sig_domain mem; + struct ib_sig_domain wire; +}; + +enum ib_sig_err_type { + IB_SIG_BAD_GUARD, + IB_SIG_BAD_REFTAG, + IB_SIG_BAD_APPTAG, +}; + +/** + * struct ib_sig_err - signature error descriptor + */ +struct ib_sig_err { + enum ib_sig_err_type err_type; + u32 expected; + u32 actual; + u64 sig_err_offset; + u32 key; +}; + +enum ib_mr_status_check { + IB_MR_CHECK_SIG_STATUS = 1, +}; + +/** + * struct ib_mr_status - Memory region status container + * + * @fail_status: Bitmask of MR checks status. For each + * failed check a corresponding status bit is set. + * @sig_err: Additional info for IB_MR_CEHCK_SIG_STATUS + * failure. + */ +struct ib_mr_status { + u32 fail_status; + struct ib_sig_err sig_err; +}; + /** * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate * enum. @@ -660,6 +782,7 @@ enum ib_qp_create_flags { IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, IB_QP_CREATE_NETIF_QP = 1 << 5, + IB_QP_CREATE_SIGNATURE_EN = 1 << 6, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, @@ -824,6 +947,7 @@ enum ib_wr_opcode { IB_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, IB_WR_BIND_MW, + IB_WR_REG_SIG_MR, /* reserve values for low level drivers' internal use. * These values will not be used at all in the ib core layer. */ @@ -929,6 +1053,12 @@ struct ib_send_wr { u32 rkey; struct ib_mw_bind_info bind_info; } bind_mw; + struct { + struct ib_sig_attrs *sig_attrs; + struct ib_mr *sig_mr; + int access_flags; + struct ib_sge *prot; + } sig_handover; } wr; u32 xrc_remote_srq_num; /* XRC TGT QPs only */ }; @@ -1474,6 +1604,8 @@ struct ib_device { *flow_attr, int domain); int (*destroy_flow)(struct ib_flow *flow_id); + int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, + struct ib_mr_status *mr_status); struct ib_dma_mapping_ops *dma_ops; @@ -2473,4 +2605,19 @@ static inline int ib_check_mr_access(int flags) return 0; } +/** + * ib_check_mr_status: lightweight check of MR status. + * This routine may provide status checks on a selected + * ib_mr. first use is for signature status check. + * + * @mr: A memory region. + * @check_mask: Bitmask of which checks to perform from + * ib_mr_status_check enumeration. + * @mr_status: The container of relevant status checks. + * failed checks will be indicated in the status bitmask + * and the relevant info shall be in the error item. + */ +int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, + struct ib_mr_status *mr_status); + #endif /* IB_VERBS_H */ -- cgit v1.2.3 From 3121e3c441b5eccdd15e6c320ec32215b334b9ec Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:06 +0200 Subject: mlx5: Implement create_mr and destroy_mr Support create_mr and destroy_mr verbs. Creating ib_mr may be done for either ib_mr that will register regular page lists like alloc_fast_reg_mr routine, or indirect ib_mrs that can register other (pre-registered) ib_mrs in an indirect manner. In addition user may request signature enable, that will mean that the created ib_mr may be attached with signature attributes (BSF, PSVs). Currently we only allow direct/indirect registration modes. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/main.c | 2 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 + drivers/infiniband/hw/mlx5/mr.c | 111 +++++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/mr.c | 61 +++++++++++++++ include/linux/mlx5/device.h | 25 ++++++ include/linux/mlx5/driver.h | 19 +++++ 6 files changed, 222 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index aa03e732b6a8..7260a299c6db 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1423,9 +1423,11 @@ static int init_one(struct pci_dev *pdev, dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; + dev->ib_dev.destroy_mr = mlx5_ib_destroy_mr; dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; dev->ib_dev.process_mad = mlx5_ib_process_mad; + dev->ib_dev.create_mr = mlx5_ib_create_mr; dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 389e31965773..79c4f14c4d3e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -265,6 +265,7 @@ struct mlx5_ib_mr { enum ib_wc_status status; struct mlx5_ib_dev *dev; struct mlx5_create_mkey_mbox_out out; + struct mlx5_core_sig_ctx *sig; }; struct mlx5_ib_fast_reg_page_list { @@ -495,6 +496,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr); +int mlx5_ib_destroy_mr(struct ib_mr *ibmr); +struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr); struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 7c95ca1f0c25..032445c47608 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -992,6 +992,117 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr) return 0; } +struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_ib_mr *mr; + int access_mode, err; + int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + in->seg.status = 1 << 6; /* free */ + in->seg.xlt_oct_size = cpu_to_be32(ndescs); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + access_mode = MLX5_ACCESS_MODE_MTT; + + if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) { + u32 psv_index[2]; + + in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | + MLX5_MKEY_BSF_EN); + in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); + mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); + if (!mr->sig) { + err = -ENOMEM; + goto err_free_in; + } + + /* create mem & wire PSVs */ + err = mlx5_core_create_psv(&dev->mdev, to_mpd(pd)->pdn, + 2, psv_index); + if (err) + goto err_free_sig; + + access_mode = MLX5_ACCESS_MODE_KLM; + mr->sig->psv_memory.psv_idx = psv_index[0]; + mr->sig->psv_wire.psv_idx = psv_index[1]; + } + + in->seg.flags = MLX5_PERM_UMR_EN | access_mode; + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), + NULL, NULL, NULL); + if (err) + goto err_destroy_psv; + + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + mr->umem = NULL; + kfree(in); + + return &mr->ibmr; + +err_destroy_psv: + if (mr->sig) { + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); + } +err_free_sig: + kfree(mr->sig); +err_free_in: + kfree(in); +err_free: + kfree(mr); + return ERR_PTR(err); +} + +int mlx5_ib_destroy_mr(struct ib_mr *ibmr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibmr->device); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + int err; + + if (mr->sig) { + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); + kfree(mr->sig); + } + + err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); + if (err) { + mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", + mr->mmr.key, err); + return err; + } + + kfree(mr); + + return err; +} + struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 35e514dc7b7d..bb746bbe73c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -144,3 +144,64 @@ int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, return err; } EXPORT_SYMBOL(mlx5_core_dump_fill_mkey); + +int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, + int npsvs, u32 *sig_index) +{ + struct mlx5_allocate_psv_in in; + struct mlx5_allocate_psv_out out; + int i, err; + + if (npsvs > MLX5_MAX_PSVS) + return -EINVAL; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_PSV); + in.npsv_pd = cpu_to_be32((npsvs << 28) | pdn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) { + mlx5_core_err(dev, "cmd exec failed %d\n", err); + return err; + } + + if (out.hdr.status) { + mlx5_core_err(dev, "create_psv bad status %d\n", out.hdr.status); + return mlx5_cmd_status_to_err(&out.hdr); + } + + for (i = 0; i < npsvs; i++) + sig_index[i] = be32_to_cpu(out.psv_idx[i]) & 0xffffff; + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_psv); + +int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num) +{ + struct mlx5_destroy_psv_in in; + struct mlx5_destroy_psv_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + + in.psv_number = cpu_to_be32(psv_num); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_PSV); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) { + mlx5_core_err(dev, "destroy_psv cmd exec failed %d\n", err); + goto out; + } + + if (out.hdr.status) { + mlx5_core_err(dev, "destroy_psv bad status %d\n", out.hdr.status); + err = mlx5_cmd_status_to_err(&out.hdr); + goto out; + } + +out: + return err; +} +EXPORT_SYMBOL(mlx5_core_destroy_psv); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 817a6fae6d2c..f714fc427765 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -48,6 +48,8 @@ enum { MLX5_MAX_COMMANDS = 32, MLX5_CMD_DATA_BLOCK_SIZE = 512, MLX5_PCI_CMD_XPORT = 7, + MLX5_MKEY_BSF_OCTO_SIZE = 4, + MLX5_MAX_PSVS = 4, }; enum { @@ -936,4 +938,27 @@ enum { MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO = 1 << 0 }; +struct mlx5_allocate_psv_in { + struct mlx5_inbox_hdr hdr; + __be32 npsv_pd; + __be32 rsvd_psv0; +}; + +struct mlx5_allocate_psv_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; + __be32 psv_idx[4]; +}; + +struct mlx5_destroy_psv_in { + struct mlx5_inbox_hdr hdr; + __be32 psv_number; + u8 rsvd[4]; +}; + +struct mlx5_destroy_psv_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + #endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 130bc8d77fa5..e1cb657ccade 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -401,6 +401,22 @@ struct mlx5_eq { struct mlx5_rsc_debug *dbg; }; +struct mlx5_core_psv { + u32 psv_idx; + struct psv_layout { + u32 pd; + u16 syndrome; + u16 reserved; + u16 bg; + u16 app_tag; + u32 ref_tag; + } psv; +}; + +struct mlx5_core_sig_ctx { + struct mlx5_core_psv psv_memory; + struct mlx5_core_psv psv_wire; +}; struct mlx5_core_mr { u64 iova; @@ -746,6 +762,9 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db); const char *mlx5_command_str(int command); int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, + int npsvs, u32 *sig_index); +int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); static inline u32 mlx5_mkey_to_idx(u32 mkey) { -- cgit v1.2.3 From e1e66cc26457c2e9412f67618646ec2a441fc409 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:07 +0200 Subject: IB/mlx5: Initialize mlx5_ib_qp signature-related members If user requested signature enable we initialize relevant mlx5_ib_qp members. We mark the qp as sig_enable and we increase the effective SQ size, but still limit the user max_send_wr to original size computed. We also allow the create_qp routine to accept sig_enable create flag. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ drivers/infiniband/hw/mlx5/qp.c | 12 +++++++++--- include/linux/mlx5/qp.h | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 79c4f14c4d3e..e438f08899ae 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -189,6 +189,9 @@ struct mlx5_ib_qp { int create_type; u32 pa_lkey; + + /* Store signature errors */ + bool signature_en; }; struct mlx5_ib_cq_buf { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7dfe8a1c84cf..01999f3744fb 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -256,8 +256,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) } size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg); - - return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); + if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN && + ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE) + return MLX5_SIG_WQE_SIZE; + else + return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); } static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, @@ -284,6 +287,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, sizeof(struct mlx5_wqe_inline_seg); attr->cap.max_inline_data = qp->max_inline_data; + if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) + qp->signature_en = true; + wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) { @@ -665,7 +671,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, int err; uuari = &dev->mdev.priv.uuari; - if (init_attr->create_flags) + if (init_attr->create_flags & ~IB_QP_CREATE_SIGNATURE_EN) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index d51eff713549..152756eaa8a3 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -37,6 +37,7 @@ #include #define MLX5_INVALID_LKEY 0x100 +#define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 5) enum mlx5_qp_optpar { MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, -- cgit v1.2.3 From 6e5eadace1da7c57c1f10d55a484bda13c86dfed Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:08 +0200 Subject: IB/mlx5: Break up wqe handling into begin & finish routines As a preliminary step for signature feature which will require posting multiple (3) WQEs for a single WR, we break post_send routine WQE indexing into begin and finish routines. This patch does not change any functionality. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 97 ++++++++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 36 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 01999f3744fb..a029009323ec 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2047,6 +2047,59 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr) } } +static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, + struct ib_send_wr *wr, int *idx, + int *size, int nreq) +{ + int err = 0; + + if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { + err = -ENOMEM; + return err; + } + + *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + *seg = mlx5_get_send_wqe(qp, *idx); + *ctrl = *seg; + *(uint32_t *)(*seg + 8) = 0; + (*ctrl)->imm = send_ieth(wr); + (*ctrl)->fm_ce_se = qp->sq_signal_bits | + (wr->send_flags & IB_SEND_SIGNALED ? + MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (wr->send_flags & IB_SEND_SOLICITED ? + MLX5_WQE_CTRL_SOLICITED : 0); + + *seg += sizeof(**ctrl); + *size = sizeof(**ctrl) / 16; + + return err; +} + +static void finish_wqe(struct mlx5_ib_qp *qp, + struct mlx5_wqe_ctrl_seg *ctrl, + u8 size, unsigned idx, u64 wr_id, + int nreq, u8 fence, u8 next_fence, + u32 mlx5_opcode) +{ + u8 opmod = 0; + + ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | + mlx5_opcode | ((u32)opmod << 24)); + ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8)); + ctrl->fm_ce_se |= fence; + qp->fm_cache = next_fence; + if (unlikely(qp->wq_sig)) + ctrl->signature = wq_sig(ctrl); + + qp->sq.wrid[idx] = wr_id; + qp->sq.w_list[idx].opcode = mlx5_opcode; + qp->sq.wqe_head[idx] = qp->sq.head + nreq; + qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); + qp->sq.w_list[idx].next = qp->sq.cur_post; +} + + int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -2060,7 +2113,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int uninitialized_var(size); void *qend = qp->sq.qend; unsigned long flags; - u32 mlx5_opcode; unsigned idx; int err = 0; int inl = 0; @@ -2069,7 +2121,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int nreq; int i; u8 next_fence = 0; - u8 opmod = 0; u8 fence; spin_lock_irqsave(&qp->sq.lock, flags); @@ -2082,36 +2133,23 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { + fence = qp->fm_cache; + num_sge = wr->num_sge; + if (unlikely(num_sge > qp->sq.max_gs)) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; *bad_wr = wr; goto out; } - fence = qp->fm_cache; - num_sge = wr->num_sge; - if (unlikely(num_sge > qp->sq.max_gs)) { + err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); + if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; *bad_wr = wr; goto out; } - idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); - seg = mlx5_get_send_wqe(qp, idx); - ctrl = seg; - *(uint32_t *)(seg + 8) = 0; - ctrl->imm = send_ieth(wr); - ctrl->fm_ce_se = qp->sq_signal_bits | - (wr->send_flags & IB_SEND_SIGNALED ? - MLX5_WQE_CTRL_CQ_UPDATE : 0) | - (wr->send_flags & IB_SEND_SOLICITED ? - MLX5_WQE_CTRL_SOLICITED : 0); - - seg += sizeof(*ctrl); - size = sizeof(*ctrl) / 16; - switch (ibqp->qp_type) { case IB_QPT_XRC_INI: xrc = seg; @@ -2244,22 +2282,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } } - mlx5_opcode = mlx5_ib_opcode[wr->opcode]; - ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | - mlx5_opcode | - ((u32)opmod << 24)); - ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8)); - ctrl->fm_ce_se |= get_fence(fence, wr); - qp->fm_cache = next_fence; - if (unlikely(qp->wq_sig)) - ctrl->signature = wq_sig(ctrl); - - qp->sq.wrid[idx] = wr->wr_id; - qp->sq.w_list[idx].opcode = mlx5_opcode; - qp->sq.wqe_head[idx] = qp->sq.head + nreq; - qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); - qp->sq.w_list[idx].next = qp->sq.cur_post; - + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + get_fence(fence, wr), next_fence, + mlx5_ib_opcode[wr->opcode]); if (0) dump_wqe(qp, idx, size); } -- cgit v1.2.3 From 2ac45934f8700e0c2a579f6ee85a56c6e9ea89d5 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:09 +0200 Subject: IB/mlx5: Remove MTT access mode from umr flags helper function get_umr_flags helper function might be used for types of access modes other than ACCESS_MODE_MTT, such as ACCESS_MODE_KLM. So remove it from helper, and callers will add their own access mode flag. This commit does not add/change functionality. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a029009323ec..1dbadbfc4474 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1832,7 +1832,7 @@ static u8 get_umr_flags(int acc) (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | - MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; + MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN; } static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, @@ -1844,7 +1844,8 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, return; } - seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags); + seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) | + MLX5_ACCESS_MODE_MTT; *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE); seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); -- cgit v1.2.3 From 3bcdb17a5e88288ead90be3c107e754a6075a5b0 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:10 +0200 Subject: IB/mlx5: Keep mlx5 MRs in a radix tree under device This will be useful when processing signature errors on a specific key. The mlx5 driver will lookup the matching mlx5 memory region structure and mark it as dirty (contains signature errors). Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/mr.c | 24 ++++++++++++++++++++++++ include/linux/mlx5/driver.h | 18 ++++++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a064f06e0cb8..96a0617f7609 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -446,6 +446,7 @@ int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) mlx5_init_cq_table(dev); mlx5_init_qp_table(dev); mlx5_init_srq_table(dev); + mlx5_init_mr_table(dev); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index bb746bbe73c6..4cc927649404 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -36,11 +36,24 @@ #include #include "mlx5_core.h" +void mlx5_init_mr_table(struct mlx5_core_dev *dev) +{ + struct mlx5_mr_table *table = &dev->priv.mr_table; + + rwlock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); +} + +void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev) +{ +} + int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, struct mlx5_create_mkey_mbox_in *in, int inlen, mlx5_cmd_cbk_t callback, void *context, struct mlx5_create_mkey_mbox_out *out) { + struct mlx5_mr_table *table = &dev->priv.mr_table; struct mlx5_create_mkey_mbox_out lout; int err; u8 key; @@ -73,14 +86,21 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(lout.mkey), key, mr->key); + /* connect to MR tree */ + write_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->key), mr); + write_unlock_irq(&table->lock); + return err; } EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) { + struct mlx5_mr_table *table = &dev->priv.mr_table; struct mlx5_destroy_mkey_mbox_in in; struct mlx5_destroy_mkey_mbox_out out; + unsigned long flags; int err; memset(&in, 0, sizeof(in)); @@ -95,6 +115,10 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) if (out.hdr.status) return mlx5_cmd_status_to_err(&out.hdr); + write_lock_irqsave(&table->lock, flags); + radix_tree_delete(&table->tree, mlx5_base_mkey(mr->key)); + write_unlock_irqrestore(&table->lock, flags); + return err; } EXPORT_SYMBOL(mlx5_core_destroy_mkey); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e1cb657ccade..e562e01e59c7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -491,6 +491,13 @@ struct mlx5_srq_table { struct radix_tree_root tree; }; +struct mlx5_mr_table { + /* protect radix tree + */ + rwlock_t lock; + struct radix_tree_root tree; +}; + struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; @@ -520,6 +527,10 @@ struct mlx5_priv { struct mlx5_cq_table cq_table; /* end: cq staff */ + /* start: mr staff */ + struct mlx5_mr_table mr_table; + /* end: mr staff */ + /* start: alloc staff */ struct mutex pgdir_mutex; struct list_head pgdir_list; @@ -667,6 +678,11 @@ static inline void mlx5_vfree(const void *addr) kfree(addr); } +static inline u32 mlx5_base_mkey(const u32 key) +{ + return key & 0xffffff00u; +} + int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev); void mlx5_dev_cleanup(struct mlx5_core_dev *dev); int mlx5_cmd_init(struct mlx5_core_dev *dev); @@ -701,6 +717,8 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_query_srq_mbox_out *out); int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq); +void mlx5_init_mr_table(struct mlx5_core_dev *dev); +void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, struct mlx5_create_mkey_mbox_in *in, int inlen, mlx5_cmd_cbk_t callback, void *context, -- cgit v1.2.3 From e6631814fb3ac454fbbf47ea343c2b9508e4e1ba Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:11 +0200 Subject: IB/mlx5: Support IB_WR_REG_SIG_MR This patch implements IB_WR_REG_SIG_MR posted by the user. Baisically this WR involves 3 WQEs in order to prepare and properly register the signature layout: 1. post UMR WR to register the sig_mr in one of two possible ways: * In case the user registered a single MR for data so the UMR data segment consists of: - single klm (data MR) passed by the user - BSF with signature attributes requested by the user. * In case the user registered 2 MRs, one for data and one for protection, the UMR consists of: - strided block format which includes data and protection MRs and their repetitive block format. - BSF with signature attributes requested by the user. 2. post SET_PSV in order to set the memory domain initial signature parameters passed by the user. SET_PSV is not signaled and solicited CQE. 3. post SET_PSV in order to set the wire domain initial signature parameters passed by the user. SET_PSV is not signaled and solicited CQE. * After this compound WR we place a small fence for next WR to come. This patch also introduces some helper functions to set the BSF correctly and determining the signature format selectors. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 422 ++++++++++++++++++++++++++++++++++++++++ include/linux/mlx5/qp.h | 61 ++++++ 2 files changed, 483 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 1dbadbfc4474..67e79989b181 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1777,6 +1777,26 @@ static __be64 frwr_mkey_mask(void) return cpu_to_be64(result); } +static __be64 sig_mkey_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_RINVAL | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_SMALL_FENCE | + MLX5_MKEY_MASK_FREE | + MLX5_MKEY_MASK_BSF_EN; + + return cpu_to_be64(result); +} + static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, struct ib_send_wr *wr, int li) { @@ -1961,6 +1981,339 @@ static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr, return 0; } +static u16 prot_field_size(enum ib_signature_type type) +{ + switch (type) { + case IB_SIG_TYPE_T10_DIF: + return MLX5_DIF_SIZE; + default: + return 0; + } +} + +static u8 bs_selector(int block_size) +{ + switch (block_size) { + case 512: return 0x1; + case 520: return 0x2; + case 4096: return 0x3; + case 4160: return 0x4; + case 1073741824: return 0x5; + default: return 0; + } +} + +static int format_selector(struct ib_sig_attrs *attr, + struct ib_sig_domain *domain, + int *selector) +{ + +#define FORMAT_DIF_NONE 0 +#define FORMAT_DIF_CRC_INC 8 +#define FORMAT_DIF_CRC_NO_INC 12 +#define FORMAT_DIF_CSUM_INC 13 +#define FORMAT_DIF_CSUM_NO_INC 14 + + switch (domain->sig.dif.type) { + case IB_T10DIF_NONE: + /* No DIF */ + *selector = FORMAT_DIF_NONE; + break; + case IB_T10DIF_TYPE1: /* Fall through */ + case IB_T10DIF_TYPE2: + switch (domain->sig.dif.bg_type) { + case IB_T10DIF_CRC: + *selector = FORMAT_DIF_CRC_INC; + break; + case IB_T10DIF_CSUM: + *selector = FORMAT_DIF_CSUM_INC; + break; + default: + return 1; + } + break; + case IB_T10DIF_TYPE3: + switch (domain->sig.dif.bg_type) { + case IB_T10DIF_CRC: + *selector = domain->sig.dif.type3_inc_reftag ? + FORMAT_DIF_CRC_INC : + FORMAT_DIF_CRC_NO_INC; + break; + case IB_T10DIF_CSUM: + *selector = domain->sig.dif.type3_inc_reftag ? + FORMAT_DIF_CSUM_INC : + FORMAT_DIF_CSUM_NO_INC; + break; + default: + return 1; + } + break; + default: + return 1; + } + + return 0; +} + +static int mlx5_set_bsf(struct ib_mr *sig_mr, + struct ib_sig_attrs *sig_attrs, + struct mlx5_bsf *bsf, u32 data_size) +{ + struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig; + struct mlx5_bsf_basic *basic = &bsf->basic; + struct ib_sig_domain *mem = &sig_attrs->mem; + struct ib_sig_domain *wire = &sig_attrs->wire; + int ret, selector; + + switch (sig_attrs->mem.sig_type) { + case IB_SIG_TYPE_T10_DIF: + if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF) + return -EINVAL; + + /* Input domain check byte mask */ + basic->check_byte_mask = sig_attrs->check_mask; + if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && + mem->sig.dif.type == wire->sig.dif.type) { + /* Same block structure */ + basic->bsf_size_sbs = 1 << 4; + if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) + basic->wire.copy_byte_mask = 0xff; + else + basic->wire.copy_byte_mask = 0x3f; + } else + basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); + + basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); + basic->raw_data_size = cpu_to_be32(data_size); + + ret = format_selector(sig_attrs, mem, &selector); + if (ret) + return -EINVAL; + basic->m_bfs_psv = cpu_to_be32(selector << 24 | + msig->psv_memory.psv_idx); + + ret = format_selector(sig_attrs, wire, &selector); + if (ret) + return -EINVAL; + basic->w_bfs_psv = cpu_to_be32(selector << 24 | + msig->psv_wire.psv_idx); + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size) +{ + struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs; + struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + struct mlx5_bsf *bsf; + u32 data_len = wr->sg_list->length; + u32 data_key = wr->sg_list->lkey; + u64 data_va = wr->sg_list->addr; + int ret; + int wqe_size; + + if (!wr->wr.sig_handover.prot) { + /** + * Source domain doesn't contain signature information + * So need construct: + * ------------------ + * | data_klm | + * ------------------ + * | BSF | + * ------------------ + **/ + struct mlx5_klm *data_klm = *seg; + + data_klm->bcount = cpu_to_be32(data_len); + data_klm->key = cpu_to_be32(data_key); + data_klm->va = cpu_to_be64(data_va); + wqe_size = ALIGN(sizeof(*data_klm), 64); + } else { + /** + * Source domain contains signature information + * So need construct a strided block format: + * --------------------------- + * | stride_block_ctrl | + * --------------------------- + * | data_klm | + * --------------------------- + * | prot_klm | + * --------------------------- + * | BSF | + * --------------------------- + **/ + struct mlx5_stride_block_ctrl_seg *sblock_ctrl; + struct mlx5_stride_block_entry *data_sentry; + struct mlx5_stride_block_entry *prot_sentry; + u32 prot_key = wr->wr.sig_handover.prot->lkey; + u64 prot_va = wr->wr.sig_handover.prot->addr; + u16 block_size = sig_attrs->mem.sig.dif.pi_interval; + int prot_size; + + sblock_ctrl = *seg; + data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl); + prot_sentry = (void *)data_sentry + sizeof(*data_sentry); + + prot_size = prot_field_size(sig_attrs->mem.sig_type); + if (!prot_size) { + pr_err("Bad block size given: %u\n", block_size); + return -EINVAL; + } + sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size + + prot_size); + sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP); + sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size); + sblock_ctrl->num_entries = cpu_to_be16(2); + + data_sentry->bcount = cpu_to_be16(block_size); + data_sentry->key = cpu_to_be32(data_key); + data_sentry->va = cpu_to_be64(data_va); + prot_sentry->bcount = cpu_to_be16(prot_size); + prot_sentry->key = cpu_to_be32(prot_key); + + if (prot_key == data_key && prot_va == data_va) { + /** + * The data and protection are interleaved + * in a single memory region + **/ + prot_sentry->va = cpu_to_be64(data_va + block_size); + prot_sentry->stride = cpu_to_be16(block_size + prot_size); + data_sentry->stride = prot_sentry->stride; + } else { + /* The data and protection are two different buffers */ + prot_sentry->va = cpu_to_be64(prot_va); + data_sentry->stride = cpu_to_be16(block_size); + prot_sentry->stride = cpu_to_be16(prot_size); + } + wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + + sizeof(*prot_sentry), 64); + } + + *seg += wqe_size; + *size += wqe_size / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + bsf = *seg; + ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); + if (ret) + return -EINVAL; + + *seg += sizeof(*bsf); + *size += sizeof(*bsf) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + return 0; +} + +static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, + struct ib_send_wr *wr, u32 nelements, + u32 length, u32 pdn) +{ + struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + u32 sig_key = sig_mr->rkey; + + memset(seg, 0, sizeof(*seg)); + + seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | + MLX5_ACCESS_MODE_KLM; + seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | + MLX5_MKEY_BSF_EN | pdn); + seg->len = cpu_to_be64(length); + seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements))); + seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); +} + +static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr, u32 nelements) +{ + memset(umr, 0, sizeof(*umr)); + + umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; + umr->klm_octowords = get_klm_octo(nelements); + umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); + umr->mkey_mask = sig_mkey_mask(); +} + + +static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size) +{ + struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr); + u32 pdn = get_pd(qp)->pdn; + u32 klm_oct_size; + int region_len, ret; + + if (unlikely(wr->num_sge != 1) || + unlikely(wr->wr.sig_handover.access_flags & + IB_ACCESS_REMOTE_ATOMIC) || + unlikely(!sig_mr->sig) || unlikely(!qp->signature_en)) + return -EINVAL; + + /* length of the protected region, data + protection */ + region_len = wr->sg_list->length; + if (wr->wr.sig_handover.prot) + region_len += wr->wr.sig_handover.prot->length; + + /** + * KLM octoword size - if protection was provided + * then we use strided block format (3 octowords), + * else we use single KLM (1 octoword) + **/ + klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1; + + set_sig_umr_segment(*seg, wr, klm_oct_size); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + ret = set_sig_data_segment(wr, qp, seg, size); + if (ret) + return ret; + + return 0; +} + +static int set_psv_wr(struct ib_sig_domain *domain, + u32 psv_idx, void **seg, int *size) +{ + struct mlx5_seg_set_psv *psv_seg = *seg; + + memset(psv_seg, 0, sizeof(*psv_seg)); + psv_seg->psv_num = cpu_to_be32(psv_idx); + switch (domain->sig_type) { + case IB_SIG_TYPE_T10_DIF: + psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | + domain->sig.dif.app_tag); + psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); + + *seg += sizeof(*psv_seg); + *size += sizeof(*psv_seg) / 16; + break; + + default: + pr_err("Bad signature type given.\n"); + return 1; + } + + return 0; +} + static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp) { @@ -2108,6 +2461,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_core_dev *mdev = &dev->mdev; struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_mr *mr; struct mlx5_wqe_data_seg *dpseg; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf = qp->bf; @@ -2203,6 +2557,73 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, num_sge = 0; break; + case IB_WR_REG_SIG_MR: + qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; + mr = to_mmr(wr->wr.sig_handover.sig_mr); + + ctrl->imm = cpu_to_be32(mr->ibmr.rkey); + err = set_sig_umr_wr(wr, qp, &seg, &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_UMR); + /* + * SET_PSV WQEs are not signaled and solicited + * on error + */ + wr->send_flags &= ~IB_SEND_SIGNALED; + wr->send_flags |= IB_SEND_SOLICITED; + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem, + mr->sig->psv_memory.psv_idx, &seg, + &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_SET_PSV); + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire, + mr->sig->psv_wire.psv_idx, &seg, + &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_SET_PSV); + num_sge = 0; + goto skip_psv; + default: break; } @@ -2286,6 +2707,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, get_fence(fence, wr), next_fence, mlx5_ib_opcode[wr->opcode]); +skip_psv: if (0) dump_wqe(qp, idx, size); } diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 152756eaa8a3..49af74f90ef9 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -38,6 +38,8 @@ #define MLX5_INVALID_LKEY 0x100 #define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 5) +#define MLX5_DIF_SIZE 8 +#define MLX5_STRIDE_BLOCK_OP 0x400 enum mlx5_qp_optpar { MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, @@ -152,6 +154,11 @@ enum { MLX5_SND_DBR = 1, }; +enum { + MLX5_FLAGS_INLINE = 1<<7, + MLX5_FLAGS_CHECK_FREE = 1<<5, +}; + struct mlx5_wqe_fmr_seg { __be32 flags; __be32 mem_key; @@ -279,6 +286,60 @@ struct mlx5_wqe_inline_seg { __be32 byte_count; }; +struct mlx5_bsf { + struct mlx5_bsf_basic { + u8 bsf_size_sbs; + u8 check_byte_mask; + union { + u8 copy_byte_mask; + u8 bs_selector; + u8 rsvd_wflags; + } wire; + union { + u8 bs_selector; + u8 rsvd_mflags; + } mem; + __be32 raw_data_size; + __be32 w_bfs_psv; + __be32 m_bfs_psv; + } basic; + struct mlx5_bsf_ext { + __be32 t_init_gen_pro_size; + __be32 rsvd_epi_size; + __be32 w_tfs_psv; + __be32 m_tfs_psv; + } ext; + struct mlx5_bsf_inl { + __be32 w_inl_vld; + __be32 w_rsvd; + __be64 w_block_format; + __be32 m_inl_vld; + __be32 m_rsvd; + __be64 m_block_format; + } inl; +}; + +struct mlx5_klm { + __be32 bcount; + __be32 key; + __be64 va; +}; + +struct mlx5_stride_block_entry { + __be16 stride; + __be16 bcount; + __be32 key; + __be64 va; +}; + +struct mlx5_stride_block_ctrl_seg { + __be32 bcount_per_cycle; + __be32 op; + __be32 repeat_count; + u16 rsvd; + __be16 num_entries; +}; + struct mlx5_core_qp { void (*event) (struct mlx5_core_qp *, int); int qpn; -- cgit v1.2.3 From d5436ba01075ef4629015f7a00914d64ffd795d6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:12 +0200 Subject: IB/mlx5: Collect signature error completion This commit takes care of the generated signature error CQE generated by the HW (if happened). The underlying mlx5 driver will handle signature error completions and will mark the relevant memory region as dirty. Once the consumer gets the completion for the transaction, it must check for signature errors on signature memory region using a new lightweight verb ib_check_mr_status(). In case the user doesn't check for signature error (i.e. doesn't call ib_check_mr_status() with status check IB_MR_CHECK_SIG_STATUS), the memory region cannot be used for another signature operation (REG_SIG_MR work request will fail). Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/cq.c | 62 ++++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 ++++ drivers/infiniband/hw/mlx5/mr.c | 46 ++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/qp.c | 8 +++-- include/linux/mlx5/cq.h | 1 + include/linux/mlx5/device.h | 18 +++++++++++ include/linux/mlx5/driver.h | 4 +++ include/linux/mlx5/qp.h | 5 +++ 9 files changed, 150 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index b1705ce6eb88..62bb6b49dc1d 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -366,6 +366,38 @@ static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) mlx5_buf_free(&dev->mdev, &buf->buf); } +static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, + struct ib_sig_err *item) +{ + u16 syndrome = be16_to_cpu(cqe->syndrome); + +#define GUARD_ERR (1 << 13) +#define APPTAG_ERR (1 << 12) +#define REFTAG_ERR (1 << 11) + + if (syndrome & GUARD_ERR) { + item->err_type = IB_SIG_BAD_GUARD; + item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16; + item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16; + } else + if (syndrome & REFTAG_ERR) { + item->err_type = IB_SIG_BAD_REFTAG; + item->expected = be32_to_cpu(cqe->expected_reftag); + item->actual = be32_to_cpu(cqe->actual_reftag); + } else + if (syndrome & APPTAG_ERR) { + item->err_type = IB_SIG_BAD_APPTAG; + item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff; + item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff; + } else { + pr_err("Got signature completion error with bad syndrome %04x\n", + syndrome); + } + + item->sig_err_offset = be64_to_cpu(cqe->err_offset); + item->key = be32_to_cpu(cqe->mkey); +} + static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_ib_qp **cur_qp, struct ib_wc *wc) @@ -375,6 +407,9 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_cqe64 *cqe64; struct mlx5_core_qp *mqp; struct mlx5_ib_wq *wq; + struct mlx5_sig_err_cqe *sig_err_cqe; + struct mlx5_core_mr *mmr; + struct mlx5_ib_mr *mr; uint8_t opcode; uint32_t qpn; u16 wqe_ctr; @@ -475,6 +510,33 @@ repoll: } } break; + case MLX5_CQE_SIG_ERR: + sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; + + read_lock(&dev->mdev.priv.mr_table.lock); + mmr = __mlx5_mr_lookup(&dev->mdev, + mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); + if (unlikely(!mmr)) { + read_unlock(&dev->mdev.priv.mr_table.lock); + mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", + cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); + return -EINVAL; + } + + mr = to_mibmr(mmr); + get_sig_err_item(sig_err_cqe, &mr->sig->err_item); + mr->sig->sig_err_exists = true; + mr->sig->sigerr_count++; + + mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n", + cq->mcq.cqn, mr->sig->err_item.key, + mr->sig->err_item.err_type, + mr->sig->err_item.sig_err_offset, + mr->sig->err_item.expected, + mr->sig->err_item.actual); + + read_unlock(&dev->mdev.priv.mr_table.lock); + goto repoll; } return 0; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 7260a299c6db..ba3ecec7fa65 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1431,6 +1431,7 @@ static int init_one(struct pci_dev *pdev, dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; + dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e438f08899ae..50541586e0a6 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -400,6 +400,11 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) return container_of(mqp, struct mlx5_ib_qp, mqp); } +static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr) +{ + return container_of(mmr, struct mlx5_ib_mr, mmr); +} + static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) { return container_of(ibpd, struct mlx5_ib_pd, ibpd); @@ -537,6 +542,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context); +int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, + struct ib_mr_status *mr_status); static inline void init_query_mad(struct ib_smp *mad) { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 032445c47608..81392b26d078 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1038,6 +1038,11 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, access_mode = MLX5_ACCESS_MODE_KLM; mr->sig->psv_memory.psv_idx = psv_index[0]; mr->sig->psv_wire.psv_idx = psv_index[1]; + + mr->sig->sig_status_checked = true; + mr->sig->sig_err_exists = false; + /* Next UMR, Arm SIGERR */ + ++mr->sig->sigerr_count; } in->seg.flags = MLX5_PERM_UMR_EN | access_mode; @@ -1188,3 +1193,44 @@ void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) kfree(mfrpl->ibfrpl.page_list); kfree(mfrpl); } + +int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, + struct ib_mr_status *mr_status) +{ + struct mlx5_ib_mr *mmr = to_mmr(ibmr); + int ret = 0; + + if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { + pr_err("Invalid status check mask\n"); + ret = -EINVAL; + goto done; + } + + mr_status->fail_status = 0; + if (check_mask & IB_MR_CHECK_SIG_STATUS) { + if (!mmr->sig) { + ret = -EINVAL; + pr_err("signature status check requested on a non-signature enabled MR\n"); + goto done; + } + + mmr->sig->sig_status_checked = true; + if (!mmr->sig->sig_err_exists) + goto done; + + if (ibmr->lkey == mmr->sig->err_item.key) + memcpy(&mr_status->sig_err, &mmr->sig->err_item, + sizeof(mr_status->sig_err)); + else { + mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; + mr_status->sig_err.sig_err_offset = 0; + mr_status->sig_err.key = mmr->sig->err_item.key; + } + + mmr->sig->sig_err_exists = false; + mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; + } + +done: + return ret; +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 67e79989b181..ae788d27b93f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1784,6 +1784,7 @@ static __be64 sig_mkey_mask(void) result = MLX5_MKEY_MASK_LEN | MLX5_MKEY_MASK_PAGE_SIZE | MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_SIGERR | MLX5_MKEY_MASK_EN_RINVAL | MLX5_MKEY_MASK_KEY | MLX5_MKEY_MASK_LR | @@ -2219,13 +2220,14 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, { struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; u32 sig_key = sig_mr->rkey; + u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; memset(seg, 0, sizeof(*seg)); seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | MLX5_ACCESS_MODE_KLM; seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); - seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | MLX5_MKEY_BSF_EN | pdn); seg->len = cpu_to_be64(length); seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements))); @@ -2255,7 +2257,8 @@ static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, if (unlikely(wr->num_sge != 1) || unlikely(wr->wr.sig_handover.access_flags & IB_ACCESS_REMOTE_ATOMIC) || - unlikely(!sig_mr->sig) || unlikely(!qp->signature_en)) + unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || + unlikely(!sig_mr->sig->sig_status_checked)) return -EINVAL; /* length of the protected region, data + protection */ @@ -2286,6 +2289,7 @@ static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, if (ret) return ret; + sig_mr->sig->sig_status_checked = false; return 0; } diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 2202c7f72b75..f6b17ac601bd 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -80,6 +80,7 @@ enum { MLX5_CQE_RESP_SEND_IMM = 3, MLX5_CQE_RESP_SEND_INV = 4, MLX5_CQE_RESIZE_CQ = 5, + MLX5_CQE_SIG_ERR = 12, MLX5_CQE_REQ_ERR = 13, MLX5_CQE_RESP_ERR = 14, MLX5_CQE_INVALID = 15, diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f714fc427765..407bdb67fd4f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -118,6 +118,7 @@ enum { MLX5_MKEY_MASK_START_ADDR = 1ull << 6, MLX5_MKEY_MASK_PD = 1ull << 7, MLX5_MKEY_MASK_EN_RINVAL = 1ull << 8, + MLX5_MKEY_MASK_EN_SIGERR = 1ull << 9, MLX5_MKEY_MASK_BSF_EN = 1ull << 12, MLX5_MKEY_MASK_KEY = 1ull << 13, MLX5_MKEY_MASK_QPN = 1ull << 14, @@ -557,6 +558,23 @@ struct mlx5_cqe64 { u8 op_own; }; +struct mlx5_sig_err_cqe { + u8 rsvd0[16]; + __be32 expected_trans_sig; + __be32 actual_trans_sig; + __be32 expected_reftag; + __be32 actual_reftag; + __be16 syndrome; + u8 rsvd22[2]; + __be32 mkey; + __be64 err_offset; + u8 rsvd30[8]; + __be32 qpn; + u8 rsvd38[2]; + u8 signature; + u8 op_own; +}; + struct mlx5_wqe_srq_next_seg { u8 rsvd0[2]; __be16 next_wqe_index; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e562e01e59c7..93cef6313e72 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -416,6 +416,10 @@ struct mlx5_core_psv { struct mlx5_core_sig_ctx { struct mlx5_core_psv psv_memory; struct mlx5_core_psv psv_wire; + struct ib_sig_err err_item; + bool sig_status_checked; + bool sig_err_exists; + u32 sigerr_count; }; struct mlx5_core_mr { diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 49af74f90ef9..f829ad80ff28 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -506,6 +506,11 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); } +static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key) +{ + return radix_tree_lookup(&dev->priv.mr_table.tree, key); +} + int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, struct mlx5_create_qp_mbox_in *in, -- cgit v1.2.3 From 2dea909444c294f55316c068906945ef38980ef3 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:13 +0200 Subject: IB/mlx5: Expose support for signature MR feature Currently support only T10-DIF types of signature handover operations (types 1|2|3). Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ba3ecec7fa65..7b9c0782105e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -273,6 +273,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (flags & MLX5_DEV_CAP_FLAG_XRC) props->device_cap_flags |= IB_DEVICE_XRC; props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) { + props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; + /* At this stage no support for signature handover */ + props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | + IB_PROT_T10DIF_TYPE_2 | + IB_PROT_T10DIF_TYPE_3; + props->sig_guard_cap = IB_GUARD_T10DIF_CRC | + IB_GUARD_T10DIF_CSUM; + } props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) & 0xffffff; -- cgit v1.2.3 From 4ac79a70033cbb9b7fa2bb2f3770308359af8c12 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 11 Mar 2014 14:17:25 -0500 Subject: RDMA/nes: Fixes for IRD/ORD negotiation with MPA v2 Fixes to enable the negotiation of the supported IRD/ORD sizes with the peer when exchanging MPA v2 messages in connection establishment. Signed-off-by: Tatyana Nikolova Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_cm.c | 119 ++++++++++++++++++++++++++++--------- drivers/infiniband/hw/nes/nes_cm.h | 3 + 2 files changed, 93 insertions(+), 29 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 9c9f2f57e960..e852a3a42b9e 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -128,6 +128,7 @@ static void build_mpa_v1(struct nes_cm_node *, void *, u8); static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **); static void print_core(struct nes_cm_core *core); +static void record_ird_ord(struct nes_cm_node *, u16, u16); /* External CM API Interface */ /* instance of function pointers for client API */ @@ -317,7 +318,6 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, } } - if (priv_data_len + mpa_hdr_len != len) { nes_debug(NES_DBG_CM, "The received ietf buffer was not right" " complete (%x + %x != %x)\n", @@ -356,25 +356,57 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, /* send reset */ return -EINVAL; } + if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) + cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD; - if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) { + if (cm_node->mpav2_ird_ord != IETF_NO_IRD_ORD) { /* responder */ - if (cm_node->ord_size > ird_size) - cm_node->ord_size = ird_size; - } else { - /* initiator */ - if (cm_node->ord_size > ird_size) - cm_node->ord_size = ird_size; - - if (cm_node->ird_size < ord_size) { - /* no resources available */ - /* send terminate message */ - return -EINVAL; + if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) { + /* we are still negotiating */ + if (ord_size > NES_MAX_IRD) { + cm_node->ird_size = NES_MAX_IRD; + } else { + cm_node->ird_size = ord_size; + if (ord_size == 0 && + (rtr_ctrl_ord & IETF_RDMA0_READ)) { + cm_node->ird_size = 1; + nes_debug(NES_DBG_CM, + "%s: Remote peer doesn't support RDMA0_READ (ord=%u)\n", + __func__, ord_size); + } + } + if (ird_size > NES_MAX_ORD) + cm_node->ord_size = NES_MAX_ORD; + else + cm_node->ord_size = ird_size; + } else { /* initiator */ + if (ord_size > NES_MAX_IRD) { + nes_debug(NES_DBG_CM, + "%s: Unable to support the requested (ord =%u)\n", + __func__, ord_size); + return -EINVAL; + } + cm_node->ird_size = ord_size; + + if (ird_size > NES_MAX_ORD) { + cm_node->ord_size = NES_MAX_ORD; + } else { + if (ird_size == 0 && + (rtr_ctrl_ord & IETF_RDMA0_READ)) { + nes_debug(NES_DBG_CM, + "%s: Remote peer doesn't support RDMA0_READ (ird=%u)\n", + __func__, ird_size); + return -EINVAL; + } else { + cm_node->ord_size = ird_size; + } + } } } if (rtr_ctrl_ord & IETF_RDMA0_READ) { cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; + } else if (rtr_ctrl_ord & IETF_RDMA0_WRITE) { cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO; } else { /* Not supported RDMA0 operation */ @@ -514,6 +546,19 @@ static void print_core(struct nes_cm_core *core) nes_debug(NES_DBG_CM, "-------------- end core ---------------\n"); } +static void record_ird_ord(struct nes_cm_node *cm_node, + u16 conn_ird, u16 conn_ord) +{ + if (conn_ird > NES_MAX_IRD) + conn_ird = NES_MAX_IRD; + + if (conn_ord > NES_MAX_ORD) + conn_ord = NES_MAX_ORD; + + cm_node->ird_size = conn_ird; + cm_node->ord_size = conn_ord; +} + /** * cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame */ @@ -557,11 +602,13 @@ static void build_mpa_v2(struct nes_cm_node *cm_node, mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE); /* initialize RTR msg */ - ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ? - IETF_NO_IRD_ORD : cm_node->ird_size; - ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ? - IETF_NO_IRD_ORD : cm_node->ord_size; - + if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) { + ctrl_ird = IETF_NO_IRD_ORD; + ctrl_ord = IETF_NO_IRD_ORD; + } else { + ctrl_ird = cm_node->ird_size & IETF_NO_IRD_ORD; + ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD; + } ctrl_ird |= IETF_PEER_TO_PEER; ctrl_ird |= IETF_FLPDU_ZERO_LEN; @@ -1409,8 +1456,9 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->mpa_frame_rev = mpa_version; cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; - cm_node->ird_size = IETF_NO_IRD_ORD; - cm_node->ord_size = IETF_NO_IRD_ORD; + cm_node->mpav2_ird_ord = 0; + cm_node->ird_size = 0; + cm_node->ord_size = 0; nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n", &cm_node->loc_addr, cm_node->loc_port, @@ -3027,11 +3075,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) rem_ref_cm_node(cm_node->cm_core, cm_node); return -ECONNRESET; } - /* associate the node with the QP */ nesqp->cm_node = (void *)cm_node; cm_node->nesqp = nesqp; + nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n", nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener); atomic_inc(&cm_accepts); @@ -3054,6 +3102,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (cm_node->mpa_frame_rev == IETF_MPA_V1) mpa_frame_offset = 4; + if (cm_node->mpa_frame_rev == IETF_MPA_V1 || + cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) { + record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); + } + memcpy(mpa_v2_frame->priv_data, conn_param->private_data, conn_param->private_data_len); @@ -3117,7 +3170,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } nesqp->skip_lsmm = 1; - /* Cache the cm_id in the qp */ nesqp->cm_id = cm_id; cm_node->cm_id = cm_id; @@ -3154,7 +3206,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT)); nesqp->nesqp_context->ird_ord_sizes |= - cpu_to_le32((u32)conn_param->ord); + cpu_to_le32((u32)cm_node->ord_size); memset(&nes_quad, 0, sizeof(nes_quad)); nes_quad.DstIpAdrIndex = @@ -3194,6 +3246,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_event.remote_addr = cm_id->remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; + cm_event.ird = cm_node->ird_size; + cm_event.ord = cm_node->ord_size; + ret = cm_id->event_handler(cm_id, &cm_event); attr.qp_state = IB_QPS_RTS; nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); @@ -3290,14 +3345,8 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) /* cache the cm_id in the qp */ nesqp->cm_id = cm_id; - cm_id->provider_data = nesqp; - nesqp->private_data_len = conn_param->private_data_len; - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); - /* space for rdma0 read msg */ - if (conn_param->ord == 0) - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(1); nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord); nes_debug(NES_DBG_CM, "mpa private data len =%u\n", @@ -3334,6 +3383,11 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -ENOMEM; } + record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); + if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO && + cm_node->ord_size == 0) + cm_node->ord_size = 1; + cm_node->apbvt_set = apbvt_set; nesqp->cm_node = cm_node; cm_node->nesqp = nesqp; @@ -3530,6 +3584,8 @@ static void cm_event_connected(struct nes_cm_event *event) nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32((u32)cm_node->ord_size); /* Adjust tail for not having a LSMM */ /*nesqp->hwqp.sq_tail = 1;*/ @@ -3742,8 +3798,13 @@ static void cm_event_mpa_req(struct nes_cm_event *event) cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr); cm_event.private_data = cm_node->mpa_frame_buf; cm_event.private_data_len = (u8)cm_node->mpa_frame_size; + if (cm_node->mpa_frame_rev == IETF_MPA_V1) { + cm_event.ird = NES_MAX_IRD; + cm_event.ord = NES_MAX_ORD; + } else { cm_event.ird = cm_node->ird_size; cm_event.ord = cm_node->ord_size; + } ret = cm_id->event_handler(cm_id, &cm_event); if (ret) diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 4646e6666087..522c99cd07c4 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -58,6 +58,8 @@ #define IETF_RDMA0_WRITE 0x8000 #define IETF_RDMA0_READ 0x4000 #define IETF_NO_IRD_ORD 0x3FFF +#define NES_MAX_IRD 0x40 +#define NES_MAX_ORD 0x7F enum ietf_mpa_flags { IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */ @@ -333,6 +335,7 @@ struct nes_cm_node { enum mpa_frame_version mpa_frame_rev; u16 ird_size; u16 ord_size; + u16 mpav2_ird_ord; u16 mpa_frame_size; struct iw_cm_id *cm_id; -- cgit v1.2.3 From 43adff3979c8f887b364c887e447c443e2f835e8 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 11 Mar 2014 14:20:17 -0500 Subject: RDMA/nes: Fix for passing a valid QP pointer to the user space library Signed-off-by: Tatyana Nikolova Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_cm.c | 2 +- drivers/infiniband/hw/nes/nes_user.h | 5 +++-- drivers/infiniband/hw/nes/nes_verbs.c | 2 ++ drivers/infiniband/hw/nes/nes_verbs.h | 1 + 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index e852a3a42b9e..dfa9df484505 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -657,7 +657,7 @@ static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_a struct nes_qp *nesqp = *nesqp_addr; struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0]; - u64temp = (unsigned long)nesqp; + u64temp = (unsigned long)nesqp->nesuqp_addr; u64temp |= NES_SW_CONTEXT_ALIGN >> 1; set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp); diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h index 4926de744488..529c421bb15c 100644 --- a/drivers/infiniband/hw/nes/nes_user.h +++ b/drivers/infiniband/hw/nes/nes_user.h @@ -39,8 +39,8 @@ #include -#define NES_ABI_USERSPACE_VER 1 -#define NES_ABI_KERNEL_VER 1 +#define NES_ABI_USERSPACE_VER 2 +#define NES_ABI_KERNEL_VER 2 /* * Make sure that all structs defined in this file remain laid out so @@ -78,6 +78,7 @@ struct nes_create_cq_req { struct nes_create_qp_req { __u64 user_wqe_buffers; + __u64 user_qp_buffer; }; enum iwnes_memreg_type { diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 8308e3634767..797d8876d948 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1191,6 +1191,8 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, if (req.user_wqe_buffers) { virt_wqs = 1; } + if (req.user_qp_buffer) + nesqp->nesuqp_addr = req.user_qp_buffer; if ((ibpd->uobject) && (ibpd->uobject->context)) { nesqp->user_mode = 1; nes_ucontext = to_nesucontext(ibpd->uobject->context); diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h index 0eff7c44d76b..309b31c31ae1 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.h +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -184,5 +184,6 @@ struct nes_qp { u8 pau_busy; u8 pau_pending; u8 pau_state; + __u64 nesuqp_addr; }; #endif /* NES_VERBS_H */ -- cgit v1.2.3 From 1c20c81909455f64f2df6107cb099ee5569d9f62 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 20 Feb 2014 11:02:48 -0500 Subject: IB/qib: Fix potential buffer overrun in sending diag packet routine Guard against a potential buffer overrun. Right now the qib driver is protected by the fact that the data structure in question is only 16 bits. Should that ever change the problem will be exposed. There is a similar defect in the ipath driver and this brings the two code paths into sync. Reported-by: Nico Golde Reported-by: Fabian Yamaguchi Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_diag.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index 1686fd4bda87..07f9030a8f10 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -546,7 +546,7 @@ static ssize_t qib_diagpkt_write(struct file *fp, size_t count, loff_t *off) { u32 __iomem *piobuf; - u32 plen, clen, pbufn; + u32 plen, pbufn, maxlen_reserve; struct qib_diag_xpkt dp; u32 *tmpbuf = NULL; struct qib_devdata *dd; @@ -590,15 +590,20 @@ static ssize_t qib_diagpkt_write(struct file *fp, } ppd = &dd->pport[dp.port - 1]; - /* need total length before first word written */ - /* +1 word is for the qword padding */ - plen = sizeof(u32) + dp.len; - clen = dp.len >> 2; - - if ((plen + 4) > ppd->ibmaxlen) { + /* + * need total length before first word written, plus 2 Dwords. One Dword + * is for padding so we get the full user data when not aligned on + * a word boundary. The other Dword is to make sure we have room for the + * ICRC which gets tacked on later. + */ + maxlen_reserve = 2 * sizeof(u32); + if (dp.len > ppd->ibmaxlen - maxlen_reserve) { ret = -EINVAL; - goto bail; /* before writing pbc */ + goto bail; } + + plen = sizeof(u32) + dp.len; + tmpbuf = vmalloc(plen); if (!tmpbuf) { qib_devinfo(dd->pcidev, @@ -638,11 +643,11 @@ static ssize_t qib_diagpkt_write(struct file *fp, */ if (dd->flags & QIB_PIO_FLUSH_WC) { qib_flush_wc(); - qib_pio_copy(piobuf + 2, tmpbuf, clen - 1); + qib_pio_copy(piobuf + 2, tmpbuf, plen - 1); qib_flush_wc(); - __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1); } else - qib_pio_copy(piobuf + 2, tmpbuf, clen); + qib_pio_copy(piobuf + 2, tmpbuf, plen); if (dd->flags & QIB_USE_SPCL_TRIG) { u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023; -- cgit v1.2.3 From a2cb0eb8a64adb29a99fd864013de957028f36ae Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 20 Feb 2014 11:02:53 -0500 Subject: IB/ipath: Fix potential buffer overrun in sending diag packet routine Guard against a potential buffer overrun. The size to read from the user is passed in, and due to the padding that needs to be taken into account, as well as the place holder for the ICRC it is possible to overflow the 32bit value which would cause more data to be copied from user space than is allocated in the buffer. Cc: Reported-by: Nico Golde Reported-by: Fabian Yamaguchi Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_diag.c | 66 ++++++++++++-------------------- 1 file changed, 25 insertions(+), 41 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index 714293b78518..e2f9a51f4a38 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -326,7 +326,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp, size_t count, loff_t *off) { u32 __iomem *piobuf; - u32 plen, clen, pbufn; + u32 plen, pbufn, maxlen_reserve; struct ipath_diag_pkt odp; struct ipath_diag_xpkt dp; u32 *tmpbuf = NULL; @@ -335,51 +335,29 @@ static ssize_t ipath_diagpkt_write(struct file *fp, u64 val; u32 l_state, lt_state; /* LinkState, LinkTrainingState */ - if (count < sizeof(odp)) { - ret = -EINVAL; - goto bail; - } if (count == sizeof(dp)) { if (copy_from_user(&dp, data, sizeof(dp))) { ret = -EFAULT; goto bail; } - } else if (copy_from_user(&odp, data, sizeof(odp))) { - ret = -EFAULT; + } else if (count == sizeof(odp)) { + if (copy_from_user(&odp, data, sizeof(odp))) { + ret = -EFAULT; + goto bail; + } + } else { + ret = -EINVAL; goto bail; } - /* - * Due to padding/alignment issues (lessened with new struct) - * the old and new structs are the same length. We need to - * disambiguate them, which we can do because odp.len has never - * been less than the total of LRH+BTH+DETH so far, while - * dp.unit (same offset) unit is unlikely to get that high. - * Similarly, dp.data, the pointer to user at the same offset - * as odp.unit, is almost certainly at least one (512byte)page - * "above" NULL. The if-block below can be omitted if compatibility - * between a new driver and older diagnostic code is unimportant. - * compatibility the other direction (new diags, old driver) is - * handled in the diagnostic code, with a warning. - */ - if (dp.unit >= 20 && dp.data < 512) { - /* very probable version mismatch. Fix it up */ - memcpy(&odp, &dp, sizeof(odp)); - /* We got a legacy dp, copy elements to dp */ - dp.unit = odp.unit; - dp.data = odp.data; - dp.len = odp.len; - dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */ - } - /* send count must be an exact number of dwords */ if (dp.len & 3) { ret = -EINVAL; goto bail; } - clen = dp.len >> 2; + plen = dp.len >> 2; dd = ipath_lookup(dp.unit); if (!dd || !(dd->ipath_flags & IPATH_PRESENT) || @@ -422,16 +400,22 @@ static ssize_t ipath_diagpkt_write(struct file *fp, goto bail; } - /* need total length before first word written */ - /* +1 word is for the qword padding */ - plen = sizeof(u32) + dp.len; - - if ((plen + 4) > dd->ipath_ibmaxlen) { + /* + * need total length before first word written, plus 2 Dwords. One Dword + * is for padding so we get the full user data when not aligned on + * a word boundary. The other Dword is to make sure we have room for the + * ICRC which gets tacked on later. + */ + maxlen_reserve = 2 * sizeof(u32); + if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) { ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n", - plen - 4, dd->ipath_ibmaxlen); + dp.len, dd->ipath_ibmaxlen); ret = -EINVAL; - goto bail; /* before writing pbc */ + goto bail; } + + plen = sizeof(u32) + dp.len; + tmpbuf = vmalloc(plen); if (!tmpbuf) { dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, " @@ -473,11 +457,11 @@ static ssize_t ipath_diagpkt_write(struct file *fp, */ if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) { ipath_flush_wc(); - __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1); + __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1); ipath_flush_wc(); - __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1); } else - __iowrite32_copy(piobuf + 2, tmpbuf, clen); + __iowrite32_copy(piobuf + 2, tmpbuf, plen); ipath_flush_wc(); -- cgit v1.2.3 From f8b6c47a44c063062317646683a73371c24c69ee Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 7 Mar 2014 08:32:31 -0500 Subject: IB/qib: Fix debugfs ordering issue with multiple HCAs The debugfs init code was incorrectly called before the idr mechanism is used to get the unit number, so the dd->unit hasn't been initialized. This caused the unit relative directory creation to fail after the first. This patch moves the init for the debugfs stuff until after all of the failures and after the unit number has been determined. A bug in unwind code in qib_alloc_devdata() is also fixed. Cc: Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_init.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 24e802f4ea2f..76c3e177164d 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1097,14 +1097,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) int ret; dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); - if (!dd) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } + if (!dd) + return ERR_PTR(-ENOMEM); -#ifdef CONFIG_DEBUG_FS - qib_dbg_ibdev_init(&dd->verbs_dev); -#endif + INIT_LIST_HEAD(&dd->list); idr_preload(GFP_KERNEL); spin_lock_irqsave(&qib_devs_lock, flags); @@ -1121,11 +1117,6 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) if (ret < 0) { qib_early_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); -#ifdef CONFIG_DEBUG_FS - qib_dbg_ibdev_exit(&dd->verbs_dev); -#endif - ib_dealloc_device(&dd->verbs_dev.ibdev); - dd = ERR_PTR(ret); goto bail; } @@ -1139,9 +1130,15 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) qib_early_err(&pdev->dev, "Could not alloc cpulist info, cpu affinity might be wrong\n"); } - -bail: +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_init(&dd->verbs_dev); +#endif return dd; +bail: + if (!list_empty(&dd->list)) + list_del_init(&dd->list); + ib_dealloc_device(&dd->verbs_dev.ibdev); + return ERR_PTR(ret);; } /* -- cgit v1.2.3 From 1ed88dd7d0b361e677b2690f573e5c274bb25c87 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 7 Mar 2014 08:40:49 -0500 Subject: IB/qib: Add percpu counter replacing qib_devdata int_counter This patch replaces the dd->int_counter with a percpu counter. The maintanance of qib_stats.sps_ints and int_counter are combined into the new counter. There are two new functions added to read the counter: - qib_int_counter (for a particular qib_devdata) - qib_sps_ints (for all HCAs) A z_int_counter is added to allow the interrupt detection logic to determine if interrupts have occured since z_int_counter was "reset". Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib.h | 10 +++++++-- drivers/infiniband/hw/qib/qib_fs.c | 1 + drivers/infiniband/hw/qib/qib_iba6120.c | 7 +++---- drivers/infiniband/hw/qib/qib_iba7220.c | 8 +++----- drivers/infiniband/hw/qib/qib_iba7322.c | 31 +++++++++------------------- drivers/infiniband/hw/qib/qib_init.c | 36 ++++++++++++++++++++++++++++++++- 6 files changed, 59 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 1946101419a3..f044430ed98e 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -868,8 +868,10 @@ struct qib_devdata { /* last buffer for user use */ u32 lastctxt_piobuf; - /* saturating counter of (non-port-specific) device interrupts */ - u32 int_counter; + /* reset value */ + u64 z_int_counter; + /* percpu intcounter */ + u64 __percpu *int_counter; /* pio bufs allocated per ctxt */ u32 pbufsctxt; @@ -1449,6 +1451,10 @@ void qib_nomsi(struct qib_devdata *); void qib_nomsix(struct qib_devdata *); void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *); void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8); +/* interrupts for device */ +u64 qib_int_counter(struct qib_devdata *); +/* interrupt for all devices */ +u64 qib_sps_ints(void); /* * dma_addr wrappers - all 0's invalid for hw diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index c61e2a92b3c1..cab610ccd50e 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -105,6 +105,7 @@ static int create_file(const char *name, umode_t mode, static ssize_t driver_stats_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + qib_stats.sps_ints = qib_sps_ints(); return simple_read_from_buffer(buf, count, ppos, &qib_stats, sizeof qib_stats); } diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 84e593d6007b..b9bea2ebfd4a 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1634,9 +1634,7 @@ static irqreturn_t qib_6120intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) @@ -1808,7 +1806,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) * isn't set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT); - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); val = dd->control | QLOGIC_IB_C_RESET; writeq(val, &dd->kregbase[kr_control]); mb(); /* prevent compiler re-ordering around actual reset */ diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 454c2e7668fe..28063d4c225b 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1962,10 +1962,7 @@ static irqreturn_t qib_7220intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; - + this_cpu_inc(*dd->int_counter); if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) unlikely_7220_intr(dd, istat); @@ -2120,7 +2117,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) * isn't set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT); - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); val = dd->control | QLOGIC_IB_C_RESET; writeq(val, &dd->kregbase[kr_control]); mb(); /* prevent compiler reordering around actual reset */ diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index d1bd21319d7d..8441579decd9 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -3115,9 +3115,7 @@ static irqreturn_t qib_7322intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* handle "errors" of various kinds first, device ahead of port */ if (unlikely(istat & (~QIB_I_BITSEXTANT | QIB_I_GPIO | @@ -3186,9 +3184,7 @@ static irqreturn_t qib_7322pintr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ((1ULL << QIB_I_RCVAVAIL_LSB) | @@ -3215,9 +3211,7 @@ static irqreturn_t qib_7322bufavail(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, QIB_I_SPIOBUFAVAIL); @@ -3248,9 +3242,7 @@ static irqreturn_t sdma_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3277,9 +3269,7 @@ static irqreturn_t sdma_idle_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3306,9 +3296,7 @@ static irqreturn_t sdma_progress_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3336,9 +3324,7 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3723,7 +3709,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd) dd->pport->cpspec->ibsymdelta = 0; dd->pport->cpspec->iblnkerrdelta = 0; dd->pport->cpspec->ibmalfdelta = 0; - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); /* * Keep chip from being accessed until we are ready. Use diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 76c3e177164d..6d2629990cb3 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -525,6 +525,7 @@ static void enable_chip(struct qib_devdata *dd) static void verify_interrupt(unsigned long opaque) { struct qib_devdata *dd = (struct qib_devdata *) opaque; + u64 int_counter; if (!dd) return; /* being torn down */ @@ -533,7 +534,8 @@ static void verify_interrupt(unsigned long opaque) * If we don't have a lid or any interrupts, let the user know and * don't bother checking again. */ - if (dd->int_counter == 0) { + int_counter = qib_int_counter(dd) - dd->z_int_counter; + if (int_counter == 0) { if (!dd->f_intr_fallback(dd)) dev_err(&dd->pcidev->dev, "No interrupts detected, not usable.\n"); @@ -1079,9 +1081,34 @@ void qib_free_devdata(struct qib_devdata *dd) #ifdef CONFIG_DEBUG_FS qib_dbg_ibdev_exit(&dd->verbs_dev); #endif + free_percpu(dd->int_counter); ib_dealloc_device(&dd->verbs_dev.ibdev); } +u64 qib_int_counter(struct qib_devdata *dd) +{ + int cpu; + u64 int_counter = 0; + + for_each_possible_cpu(cpu) + int_counter += *per_cpu_ptr(dd->int_counter, cpu); + return int_counter; +} + +u64 qib_sps_ints(void) +{ + unsigned long flags; + struct qib_devdata *dd; + u64 sps_ints = 0; + + spin_lock_irqsave(&qib_devs_lock, flags); + list_for_each_entry(dd, &qib_dev_list, list) { + sps_ints += qib_int_counter(dd); + } + spin_unlock_irqrestore(&qib_devs_lock, flags); + return sps_ints; +} + /* * Allocate our primary per-unit data structure. Must be done via verbs * allocator, because the verbs cleanup process both does cleanup and @@ -1119,6 +1146,13 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) "Could not allocate unit ID: error %d\n", -ret); goto bail; } + dd->int_counter = alloc_percpu(u64); + if (!dd->int_counter) { + ret = -ENOMEM; + qib_early_err(&pdev->dev, + "Could not allocate per-cpu int_counter\n"); + goto bail; + } if (!qib_cpulist_count) { u32 count = num_online_cpus(); -- cgit v1.2.3 From 7d7632add8dd99f68b21546efff08a5a162de184 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 7 Mar 2014 08:40:55 -0500 Subject: IB/qib: Modify software pma counters to use percpu variables The counters, unicast_xmit, unicast_rcv, multicast_xmit, multicast_rcv are now maintained as percpu variables. The mad code is modified to add a z_ latch so that the percpu counters monotonically increase with appropriate adjustments in the reset, read logic to maintain the z_ latch. This patch also corrects the fact the unitcast_xmit wasn't handled at all for UC and RC QPs. Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib.h | 2 +- drivers/infiniband/hw/qib/qib_iba6120.c | 4 ++- drivers/infiniband/hw/qib/qib_iba7220.c | 4 ++- drivers/infiniband/hw/qib/qib_iba7322.c | 6 ++++- drivers/infiniband/hw/qib/qib_init.c | 22 ++++++++++++----- drivers/infiniband/hw/qib/qib_mad.c | 44 +++++++++++++++++++++++++++------ drivers/infiniband/hw/qib/qib_rc.c | 2 +- drivers/infiniband/hw/qib/qib_ruc.c | 1 + drivers/infiniband/hw/qib/qib_ud.c | 6 ++--- drivers/infiniband/hw/qib/qib_verbs.c | 4 +-- drivers/infiniband/hw/qib/qib_verbs.h | 16 +++++++++--- 11 files changed, 83 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index f044430ed98e..c00ae093b6f8 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1186,7 +1186,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *); void qib_set_ctxtcnt(struct qib_devdata *); int qib_create_ctxts(struct qib_devdata *dd); struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int); -void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); +int qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *); u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *); diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index b9bea2ebfd4a..d68266ac7619 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -3265,7 +3265,9 @@ static int init_6120_variables(struct qib_devdata *dd) dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated); - qib_init_pportdata(ppd, dd, 0, 1); + ret = qib_init_pportdata(ppd, dd, 0, 1); + if (ret) + goto bail; ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_speed_supported = QIB_IB_SDR; ppd->link_width_enabled = IB_WIDTH_4X; diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 28063d4c225b..7dec89fdc124 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -4059,7 +4059,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd) init_waitqueue_head(&cpspec->autoneg_wait); INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work); - qib_init_pportdata(ppd, dd, 0, 1); + ret = qib_init_pportdata(ppd, dd, 0, 1); + if (ret) + goto bail; ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_speed_supported = QIB_IB_SDR | QIB_IB_DDR; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 8441579decd9..a7eb32517a04 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -6544,7 +6544,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd) } dd->num_pports++; - qib_init_pportdata(ppd, dd, pidx, dd->num_pports); + ret = qib_init_pportdata(ppd, dd, pidx, dd->num_pports); + if (ret) { + dd->num_pports--; + goto bail; + } ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_width_enabled = IB_WIDTH_4X; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 6d2629990cb3..7fbf466704ab 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -233,7 +233,7 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, /* * Common code for initializing the physical port structure. */ -void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, +int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, u8 hw_pidx, u8 port) { int size; @@ -243,6 +243,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, spin_lock_init(&ppd->sdma_lock); spin_lock_init(&ppd->lflags_lock); + spin_lock_init(&ppd->cc_shadow_lock); init_waitqueue_head(&ppd->state_wait); init_timer(&ppd->symerr_clear_timer); @@ -250,8 +251,10 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, ppd->symerr_clear_timer.data = (unsigned long)ppd; ppd->qib_wq = NULL; - - spin_lock_init(&ppd->cc_shadow_lock); + ppd->ibport_data.pmastats = + alloc_percpu(struct qib_pma_counters); + if (!ppd->ibport_data.pmastats) + return -ENOMEM; if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) goto bail; @@ -299,7 +302,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, goto bail_3; } - return; + return 0; bail_3: kfree(ppd->ccti_entries_shadow); @@ -313,7 +316,7 @@ bail_1: bail: /* User is intentionally disabling the congestion control agent */ if (!qib_cc_table_size) - return; + return 0; if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) { qib_cc_table_size = 0; @@ -324,7 +327,7 @@ bail: qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port); - return; + return 0; } static int init_pioavailregs(struct qib_devdata *dd) @@ -635,6 +638,12 @@ wq_error: return -ENOMEM; } +static void qib_free_pportdata(struct qib_pportdata *ppd) +{ + free_percpu(ppd->ibport_data.pmastats); + ppd->ibport_data.pmastats = NULL; +} + /** * qib_init - do the actual initialization sequence on the chip * @dd: the qlogic_ib device @@ -922,6 +931,7 @@ static void qib_shutdown_device(struct qib_devdata *dd) destroy_workqueue(ppd->qib_wq); ppd->qib_wq = NULL; } + qib_free_pportdata(ppd); } qib_update_eeprom_log(dd); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index ccb119143d20..edad991d60ed 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1634,6 +1634,23 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp, return reply((struct ib_smp *)pmp); } +static void qib_snapshot_pmacounters( + struct qib_ibport *ibp, + struct qib_pma_counters *pmacounters) +{ + struct qib_pma_counters *p; + int cpu; + + memset(pmacounters, 0, sizeof(*pmacounters)); + for_each_possible_cpu(cpu) { + p = per_cpu_ptr(ibp->pmastats, cpu); + pmacounters->n_unicast_xmit += p->n_unicast_xmit; + pmacounters->n_unicast_rcv += p->n_unicast_rcv; + pmacounters->n_multicast_xmit += p->n_multicast_xmit; + pmacounters->n_multicast_rcv += p->n_multicast_rcv; + } +} + static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { @@ -1642,6 +1659,7 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); u64 swords, rwords, spkts, rpkts, xwait; + struct qib_pma_counters pma; u8 port_select = p->port_select; memset(pmp->data, 0, sizeof(pmp->data)); @@ -1664,10 +1682,17 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, p->port_rcv_data = cpu_to_be64(rwords); p->port_xmit_packets = cpu_to_be64(spkts); p->port_rcv_packets = cpu_to_be64(rpkts); - p->port_unicast_xmit_packets = cpu_to_be64(ibp->n_unicast_xmit); - p->port_unicast_rcv_packets = cpu_to_be64(ibp->n_unicast_rcv); - p->port_multicast_xmit_packets = cpu_to_be64(ibp->n_multicast_xmit); - p->port_multicast_rcv_packets = cpu_to_be64(ibp->n_multicast_rcv); + + qib_snapshot_pmacounters(ibp, &pma); + + p->port_unicast_xmit_packets = cpu_to_be64(pma.n_unicast_xmit + - ibp->z_unicast_xmit); + p->port_unicast_rcv_packets = cpu_to_be64(pma.n_unicast_rcv + - ibp->z_unicast_rcv); + p->port_multicast_xmit_packets = cpu_to_be64(pma.n_multicast_xmit + - ibp->z_multicast_xmit); + p->port_multicast_rcv_packets = cpu_to_be64(pma.n_multicast_rcv + - ibp->z_multicast_rcv); bail: return reply((struct ib_smp *) pmp); @@ -1795,6 +1820,7 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); u64 swords, rwords, spkts, rpkts, xwait; + struct qib_pma_counters pma; qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait); @@ -1810,17 +1836,19 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS) ibp->z_port_rcv_packets = rpkts; + qib_snapshot_pmacounters(ibp, &pma); + if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS) - ibp->n_unicast_xmit = 0; + ibp->z_unicast_xmit = pma.n_unicast_xmit; if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS) - ibp->n_unicast_rcv = 0; + ibp->z_unicast_rcv = pma.n_unicast_rcv; if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS) - ibp->n_multicast_xmit = 0; + ibp->z_multicast_xmit = pma.n_multicast_xmit; if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS) - ibp->n_multicast_rcv = 0; + ibp->z_multicast_rcv = pma.n_multicast_rcv; return pma_get_portcounters_ext(pmp, ibdev, port); } diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 3ab341320ead..2f2501890c4e 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -752,7 +752,7 @@ void qib_send_rc_ack(struct qib_qp *qp) qib_flush_wc(); qib_sendbuf_done(dd, pbufn); - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); goto done; queue_ack: diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 357b6cfcd46c..4c07a8b34ffe 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -703,6 +703,7 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(bth2); + this_cpu_inc(ibp->pmastats->n_unicast_xmit); } /** diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 3ad651c3356c..aaf7039f8ed2 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -280,11 +280,11 @@ int qib_make_ud_req(struct qib_qp *qp) ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) { if (ah_attr->dlid != QIB_PERMISSIVE_LID) - ibp->n_multicast_xmit++; + this_cpu_inc(ibp->pmastats->n_multicast_xmit); else - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); } else { - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); if (unlikely(lid == ppd->lid)) { /* diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 092b0bb1bb78..1b00734fb80d 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -662,7 +662,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid); if (mcast == NULL) goto drop; - ibp->n_multicast_rcv++; + this_cpu_inc(ibp->pmastats->n_multicast_rcv); list_for_each_entry_rcu(p, &mcast->qp_list, list) qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); /* @@ -689,7 +689,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) rcd->lookaside_qpn = qp_num; } else qp = rcd->lookaside_qp; - ibp->n_unicast_rcv++; + this_cpu_inc(ibp->pmastats->n_unicast_rcv); qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp); } return; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index a01c7d2cf541..bfc8948fdd35 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -664,6 +664,13 @@ struct qib_opcode_stats_perctx { struct qib_opcode_stats stats[128]; }; +struct qib_pma_counters { + u64 n_unicast_xmit; /* total unicast packets sent */ + u64 n_unicast_rcv; /* total unicast packets received */ + u64 n_multicast_xmit; /* total multicast packets sent */ + u64 n_multicast_rcv; /* total multicast packets received */ +}; + struct qib_ibport { struct qib_qp __rcu *qp0; struct qib_qp __rcu *qp1; @@ -680,10 +687,11 @@ struct qib_ibport { __be64 mkey; __be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */ u64 tid; /* TID for traps */ - u64 n_unicast_xmit; /* total unicast packets sent */ - u64 n_unicast_rcv; /* total unicast packets received */ - u64 n_multicast_xmit; /* total multicast packets sent */ - u64 n_multicast_rcv; /* total multicast packets received */ + struct qib_pma_counters __percpu *pmastats; + u64 z_unicast_xmit; /* starting count for PMA */ + u64 z_unicast_rcv; /* starting count for PMA */ + u64 z_multicast_xmit; /* starting count for PMA */ + u64 z_multicast_rcv; /* starting count for PMA */ u64 z_symbol_error_counter; /* starting count for PMA */ u64 z_link_error_recovery_counter; /* starting count for PMA */ u64 z_link_downed_counter; /* starting count for PMA */ -- cgit v1.2.3 From 37a967651caf99dd267017023737bd442f5acb3d Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 10 Mar 2014 23:06:28 +0100 Subject: IB/qib: add missing braces in do_qib_user_sdma_queue_create() Commit c804f07248895ff9c moved qib_assign_ctxt() to do_qib_user_sdma_queue_create() but dropped the braces around the statements. This was spotted by coccicheck (coccinelle/spatch): $ make C=2 CHECK=scripts/coccicheck drivers/infiniband/hw/qib/ CHECK drivers/infiniband/hw/qib/qib_file_ops.c drivers/infiniband/hw/qib/qib_file_ops.c:1583:2-23: code aligned with following code on line 1587 This patch adds braces back. Link: http://marc.info/?i=cover.1394485254.git.ydroneaud@opteya.com Cc: Mike Marciniszyn Cc: infinipath@intel.com Cc: Julia Lawall Cc: cocci@systeme.lip6.fr Cc: stable@vger.kernel.org Signed-off-by: Yann Droneaud Tested-by: Mike Marciniszyn Acked-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_file_ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 275f247f9fca..2023cd61b897 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1578,7 +1578,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp) struct qib_ctxtdata *rcd = fd->rcd; struct qib_devdata *dd = rcd->dd; - if (dd->flags & QIB_HAS_SEND_DMA) + if (dd->flags & QIB_HAS_SEND_DMA) { fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, dd->unit, @@ -1586,6 +1586,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp) fd->subctxt); if (!fd->pq) return -ENOMEM; + } return 0; } -- cgit v1.2.3 From 8572de9732b6d1fd211873ffab8c60b3c1745ee8 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 10 Mar 2014 23:06:29 +0100 Subject: IB/qib: fixup indentation in qib_ib_rcv() Commit af061a644a0e4d4778 add some code in qib_ib_rcv() which trigger a warning from coccicheck (coccinelle/spatch): $ make C=2 CHECK=scripts/coccicheck drivers/infiniband/hw/qib/ CHECK drivers/infiniband/hw/qib/qib_verbs.c drivers/infiniband/hw/qib/qib_verbs.c:679:5-32: code aligned with following code on line 681 CC [M] drivers/infiniband/hw/qib/qib_verbs.o In fact, according to similar code in qib_kreceive(), qib_ib_rcv() code is correct but improperly indented. This patch fix indentation for the misaligned portion. Link: http://marc.info/?i=cover.1394485254.git.ydroneaud@opteya.com Cc: Mike Marciniszyn Cc: infinipath@intel.com Cc: Julia Lawall Cc: cocci@systeme.lip6.fr Signed-off-by: Yann Droneaud Tested-by: Mike Marciniszyn Acked-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 1b00734fb80d..9bcfbd842980 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -678,8 +678,8 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) &rcd->lookaside_qp->refcount)) wake_up( &rcd->lookaside_qp->wait); - rcd->lookaside_qp = NULL; - } + rcd->lookaside_qp = NULL; + } } if (!rcd->lookaside_qp) { qp = qib_lookup_qpn(ibp, qp_num); -- cgit v1.2.3 From 06064a103f6bd5b409ffed6248983270c0681c21 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 17 Mar 2014 14:07:16 -0400 Subject: IB/qib: Fix memory leak of recv context when driver fails to initialize. In qib_create_ctxts() we allocate an array to hold recv contexts. Then attempt to create data for those recv contexts. If that call to qib_create_ctxtdata() fails then an error is returned but the previously allocated memory is not freed. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_init.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 7fbf466704ab..5b7aeb224a30 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -130,7 +130,6 @@ void qib_set_ctxtcnt(struct qib_devdata *dd) int qib_create_ctxts(struct qib_devdata *dd) { unsigned i; - int ret; int local_node_id = pcibus_to_node(dd->pcidev->bus); if (local_node_id < 0) @@ -145,8 +144,7 @@ int qib_create_ctxts(struct qib_devdata *dd) if (!dd->rcd) { qib_dev_err(dd, "Unable to allocate ctxtdata array, failing\n"); - ret = -ENOMEM; - goto done; + return -ENOMEM; } /* create (one or more) kctxt */ @@ -163,15 +161,14 @@ int qib_create_ctxts(struct qib_devdata *dd) if (!rcd) { qib_dev_err(dd, "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); - ret = -ENOMEM; - goto done; + kfree(dd->rcd); + dd->rcd = NULL; + return -ENOMEM; } rcd->pkeys[0] = QIB_DEFAULT_P_KEY; rcd->seq_cnt = 1; } - ret = 0; -done: - return ret; + return 0; } /* -- cgit v1.2.3 From 9d194d1025f463392feafa26ff8c2d8247f71be1 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 10 Mar 2014 23:06:27 +0100 Subject: IB/nes: Return an error on ib_copy_from_udata() failure instead of NULL In case of error while accessing to userspace memory, function nes_create_qp() returns NULL instead of an error code wrapped through ERR_PTR(). But NULL is not expected by ib_uverbs_create_qp(), as it check for error with IS_ERR(). As page 0 is likely not mapped, it is going to trigger an Oops when the kernel will try to dereference NULL pointer to access to struct ib_qp's fields. In some rare cases, page 0 could be mapped by userspace, which could turn this bug to a vulnerability that could be exploited: the function pointers in struct ib_device will be under userspace total control. This was caught when using spatch (aka. coccinelle) to rewrite calls to ib_copy_{from,to}_udata(). Link: https://www.gitorious.org/opteya/ib-hw-nes-create-qp-null Link: https://www.gitorious.org/opteya/coccib/source/75ebf2c1033c64c1d81df13e4ae44ee99c989eba:ib_copy_udata.cocci Link: http://marc.info/?i=cover.1394485254.git.ydroneaud@opteya.com Cc: Signed-off-by: Yann Droneaud Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 8308e3634767..eb624611f94b 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1186,7 +1186,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); kfree(nesqp->allocated_buffer); nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n"); - return NULL; + return ERR_PTR(-EFAULT); } if (req.user_wqe_buffers) { virt_wqs = 1; -- cgit v1.2.3 From 970918b32b030e9b3966e5ccb5f4a5a5b515a5b1 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 18 Feb 2014 09:54:27 -0300 Subject: IB/usnic: Remove '0x' when using %pa format %pa format already prints in hexadecimal format, so remove the '0x' annotation to avoid a double '0x0x' pattern. Signed-off-by: Fabio Estevam Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_uiom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 16755cdab2c0..801a1d6937e4 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -286,7 +286,7 @@ iter_chunk: err = iommu_map(pd->domain, va_start, pa_start, size, flags); if (err) { - usnic_err("Failed to map va 0x%lx pa 0x%pa size 0x%zx with err %d\n", + usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", va_start, &pa_start, size, err); goto err_out; } -- cgit v1.2.3 From db498827ff62611c12c03f6d33bcc532d9fb497e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Feb 2014 14:09:37 +0300 Subject: IB/qib: Remove duplicate check in get_a_ctxt() We already know "pusable" is non-zero, no need to check again. Signed-off-by: Dan Carpenter Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_file_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 2023cd61b897..b15e34eeef68 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1459,7 +1459,7 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo, cused++; else cfree++; - if (pusable && cfree && cused < inuse) { + if (cfree && cused < inuse) { udd = dd; inuse = cused; } -- cgit v1.2.3 From 349850f0a9188cb87b4ef6c01c057cb42cca478e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 6 Feb 2014 15:48:23 +0300 Subject: RDMA/nes: Clean up a condition We don't need to test "ret" twice and also the white space is messed up. Signed-off-by: Dan Carpenter Reviewed-by: Tatyana Nikolova Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_verbs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 797d8876d948..9722eeeaadaa 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -3136,9 +3136,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); - if ((!ret) || - ((original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) && - (ret))) { + if (!ret || original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) { if (dont_wait) { if (nesqp->cm_id && nesqp->hw_tcp_state != 0) { nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d)," -- cgit v1.2.3 From bc1b04ab34a1485339571242cb0fbad823835685 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 19 Feb 2014 15:05:16 -0500 Subject: RDMA/ocrdma: Fix compiler warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/infiniband/hw/ocrdma/ocrdma_verbs.c: In function ‘_ocrdma_modify_qp’: drivers/infiniband/hw/ocrdma/ocrdma_verbs.c:1299:31: error: ‘old_qps’ may be used uninitialized in this function [-Werror=maybe-uninitialized] status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps); ocrdma_mbx_modify_qp() (and subsequent calls) doesn't appear to use old_qps so it doesn't need to be passed on. Removing the variable results in the warning going away. Signed-off-by: Prarit Bhargava Acked-by: Devesh Sharma (Devesh.sharma@emulex.com) Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 8 +++----- drivers/infiniband/hw/ocrdma/ocrdma_hw.h | 3 +-- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 1664d648cbfc..ac3fbf2db00a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2127,8 +2127,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, static int ocrdma_set_qp_params(struct ocrdma_qp *qp, struct ocrdma_modify_qp *cmd, - struct ib_qp_attr *attrs, int attr_mask, - enum ib_qp_state old_qps) + struct ib_qp_attr *attrs, int attr_mask) { int status = 0; @@ -2233,8 +2232,7 @@ pmtu_err: } int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp, - struct ib_qp_attr *attrs, int attr_mask, - enum ib_qp_state old_qps) + struct ib_qp_attr *attrs, int attr_mask) { int status = -ENOMEM; struct ocrdma_modify_qp *cmd; @@ -2257,7 +2255,7 @@ int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp, OCRDMA_QP_PARAMS_STATE_MASK; } - status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask, old_qps); + status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask); if (status) goto mbx_err; status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index 82fe332ae6c6..db3d55f368bf 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -112,8 +112,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs, u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset, u16 *dpp_credit_lmt); int ocrdma_mbx_modify_qp(struct ocrdma_dev *, struct ocrdma_qp *, - struct ib_qp_attr *attrs, int attr_mask, - enum ib_qp_state old_qps); + struct ib_qp_attr *attrs, int attr_mask); int ocrdma_mbx_query_qp(struct ocrdma_dev *, struct ocrdma_qp *, struct ocrdma_qp_params *param); int ocrdma_mbx_destroy_qp(struct ocrdma_dev *, struct ocrdma_qp *); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e0cc201be41a..f1108ebae83a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1296,7 +1296,7 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, */ if (status < 0) return status; - status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps); + status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask); if (!status && attr_mask & IB_QP_STATE && attr->qp_state == IB_QPS_RTR) ocrdma_flush_rq_db(qp); -- cgit v1.2.3 From 0e9855dbf43a9cd31df5f5f61a18e031ac1c4a82 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 10 Mar 2014 10:33:05 +0100 Subject: IB/mlx4: Fix a sparse endianness warning Fix the following warning for the mlx4 driver: $ make M=drivers/infiniband C=2 CF=-D__CHECK_ENDIAN__ drivers/infiniband/hw/mlx4/qp.c:1885:31: warning: restricted __be16 degrades to integer Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index d8f4d1fe8494..74993250523e 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1882,7 +1882,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, return err; } - if (ah->av.eth.vlan != 0xffff) { + if (ah->av.eth.vlan != cpu_to_be16(0xffff)) { vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff; is_vlan = 1; } -- cgit v1.2.3 From db523b8de13545488b6ff6c952b4527596f3c16a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 23 Jan 2014 12:31:28 +0200 Subject: IB/iser: Suppress completions for fast registration work requests In case iSER uses fast registration method, it should not request for successful completions on fast registration nor local invalidate requests. We color wr_id with ISER_FRWR_LI_WRID in order to correctly consume error completions. Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 ++ drivers/infiniband/ulp/iser/iser_memory.c | 15 ++++----------- drivers/infiniband/ulp/iser/iser_verbs.c | 14 ++++++-------- 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 67914027c614..e1a01c6e6e12 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -138,6 +138,8 @@ #define ISER_WSV 0x08 #define ISER_RSV 0x04 +#define ISER_FRWR_LI_WRID 0xffffffffffffffffULL + struct iser_hdr { u8 flags; u8 rsvd[3]; diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 1ce0c97d2ccb..f7701795cef2 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -457,8 +457,8 @@ static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, if (!desc->valid) { memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.wr_id = ISER_FRWR_LI_WRID; inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.send_flags = IB_SEND_SIGNALED; inv_wr.ex.invalidate_rkey = desc->data_mr->rkey; wr = &inv_wr; /* Bump the key */ @@ -468,8 +468,8 @@ static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, /* Prepare FASTREG WR */ memset(&fastreg_wr, 0, sizeof(fastreg_wr)); + fastreg_wr.wr_id = ISER_FRWR_LI_WRID; fastreg_wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.send_flags = IB_SEND_SIGNALED; fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset; fastreg_wr.wr.fast_reg.page_list = desc->data_frpl; fastreg_wr.wr.fast_reg.page_list_len = page_list_len; @@ -480,20 +480,13 @@ static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); - if (!wr) { + if (!wr) wr = &fastreg_wr; - atomic_inc(&ib_conn->post_send_buf_count); - } else { + else wr->next = &fastreg_wr; - atomic_add(2, &ib_conn->post_send_buf_count); - } ret = ib_post_send(ib_conn->qp, wr, &bad_wr); if (ret) { - if (bad_wr->next) - atomic_sub(2, &ib_conn->post_send_buf_count); - else - atomic_dec(&ib_conn->post_send_buf_count); iser_err("fast registration failed, ret:%d\n", ret); return ret; } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index ca37edef2791..7bdb811ad5b1 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -993,18 +993,16 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index) if (wc.status == IB_WC_SUCCESS) { if (wc.opcode == IB_WC_SEND) iser_snd_completion(tx_desc, ib_conn); - else if (wc.opcode == IB_WC_LOCAL_INV || - wc.opcode == IB_WC_FAST_REG_MR) { - atomic_dec(&ib_conn->post_send_buf_count); - continue; - } else + else iser_err("expected opcode %d got %d\n", IB_WC_SEND, wc.opcode); } else { iser_err("tx id %llx status %d vend_err %x\n", - wc.wr_id, wc.status, wc.vendor_err); - atomic_dec(&ib_conn->post_send_buf_count); - iser_handle_comp_error(tx_desc, ib_conn); + wc.wr_id, wc.status, wc.vendor_err); + if (wc.wr_id != ISER_FRWR_LI_WRID) { + atomic_dec(&ib_conn->post_send_buf_count); + iser_handle_comp_error(tx_desc, ib_conn); + } } completed_tx++; } -- cgit v1.2.3 From 7306b8fad467c4c3c1e3fc68b237427cac1533a7 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 5 Mar 2014 19:43:39 +0200 Subject: IB/iser: Avoid FRWR notation, use fastreg instead FRWR stands for "fast registration work request". We want to avoid calling the fastreg pool with that name, instead we name it fastreg which stands for "fast registration". This pool will include more elements in the future, so it is a good idea to generalize the name. Signed-off-by: Sagi Grimberg Signed-off-by: Alex Tabachnik Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 20 ++++---- drivers/infiniband/ulp/iser/iser_memory.c | 28 +++++------ drivers/infiniband/ulp/iser/iser_verbs.c | 84 +++++++++++++++---------------- 3 files changed, 67 insertions(+), 65 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index e1a01c6e6e12..ca161dfeee48 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -138,7 +138,7 @@ #define ISER_WSV 0x08 #define ISER_RSV 0x04 -#define ISER_FRWR_LI_WRID 0xffffffffffffffffULL +#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL struct iser_hdr { u8 flags; @@ -312,6 +312,8 @@ struct iser_conn { unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; + + /* Connection memory registration pool */ union { struct { struct ib_fmr_pool *pool; /* pool of IB FMRs */ @@ -321,8 +323,8 @@ struct iser_conn { struct { struct list_head pool; int pool_size; - } frwr; - } fastreg; + } fastreg; + }; }; struct iscsi_iser_conn { @@ -408,8 +410,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); -int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task, - enum iser_data_dir cmd_dir); +int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task, + enum iser_data_dir cmd_dir); int iser_connect(struct iser_conn *ib_conn, struct sockaddr_in *src_addr, @@ -422,8 +424,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); -void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir); +void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir); int iser_post_recvl(struct iser_conn *ib_conn); int iser_post_recvm(struct iser_conn *ib_conn, int count); @@ -440,6 +442,6 @@ int iser_initialize_task_headers(struct iscsi_task *task, int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session); int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max); void iser_free_fmr_pool(struct iser_conn *ib_conn); -int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max); -void iser_free_frwr_pool(struct iser_conn *ib_conn); +int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max); +void iser_free_fastreg_pool(struct iser_conn *ib_conn); #endif diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index f7701795cef2..6e9b7bcbc562 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -422,8 +422,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, (unsigned long)regd_buf->reg.va, (unsigned long)regd_buf->reg.len); } else { /* use FMR for multiple dma entries */ - iser_page_vec_build(mem, ib_conn->fastreg.fmr.page_vec, ibdev); - err = iser_reg_page_vec(ib_conn, ib_conn->fastreg.fmr.page_vec, + iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev); + err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec, ®d_buf->reg); if (err && err != -EAGAIN) { iser_data_buf_dump(mem, ibdev); @@ -431,12 +431,12 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, mem->dma_nents, ntoh24(iser_task->desc.iscsi_header.dlength)); iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", - ib_conn->fastreg.fmr.page_vec->data_size, - ib_conn->fastreg.fmr.page_vec->length, - ib_conn->fastreg.fmr.page_vec->offset); - for (i = 0; i < ib_conn->fastreg.fmr.page_vec->length; i++) + ib_conn->fmr.page_vec->data_size, + ib_conn->fmr.page_vec->length, + ib_conn->fmr.page_vec->offset); + for (i = 0; i < ib_conn->fmr.page_vec->length; i++) iser_err("page_vec[%d] = 0x%llx\n", i, - (unsigned long long) ib_conn->fastreg.fmr.page_vec->pages[i]); + (unsigned long long) ib_conn->fmr.page_vec->pages[i]); } if (err) return err; @@ -457,7 +457,7 @@ static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, if (!desc->valid) { memset(&inv_wr, 0, sizeof(inv_wr)); - inv_wr.wr_id = ISER_FRWR_LI_WRID; + inv_wr.wr_id = ISER_FASTREG_LI_WRID; inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.ex.invalidate_rkey = desc->data_mr->rkey; wr = &inv_wr; @@ -468,7 +468,7 @@ static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, /* Prepare FASTREG WR */ memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.wr_id = ISER_FRWR_LI_WRID; + fastreg_wr.wr_id = ISER_FASTREG_LI_WRID; fastreg_wr.opcode = IB_WR_FAST_REG_MR; fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset; fastreg_wr.wr.fast_reg.page_list = desc->data_frpl; @@ -503,13 +503,13 @@ static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, } /** - * iser_reg_rdma_mem_frwr - Registers memory intended for RDMA, + * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA, * using Fast Registration WR (if possible) obtaining rkey and va * * returns 0 on success, errno code on failure */ -int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir) +int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir) { struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; @@ -544,7 +544,7 @@ int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task, regd_buf->reg.is_mr = 0; } else { spin_lock_irqsave(&ib_conn->lock, flags); - desc = list_first_entry(&ib_conn->fastreg.frwr.pool, + desc = list_first_entry(&ib_conn->fastreg.pool, struct fast_reg_descriptor, list); list_del(&desc->list); spin_unlock_irqrestore(&ib_conn->lock, flags); @@ -567,7 +567,7 @@ int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task, return 0; err_reg: spin_lock_irqsave(&ib_conn->lock, flags); - list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); + list_add_tail(&desc->list, &ib_conn->fastreg.pool); spin_unlock_irqrestore(&ib_conn->lock, flags); return err; } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 7bdb811ad5b1..dc5a0b49cfbe 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -94,13 +94,13 @@ static int iser_create_device_ib_res(struct iser_device *device) device->iser_unreg_rdma_mem = iser_unreg_mem_fmr; } else if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { - iser_info("FRWR supported, using FRWR for registration\n"); - device->iser_alloc_rdma_reg_res = iser_create_frwr_pool; - device->iser_free_rdma_reg_res = iser_free_frwr_pool; - device->iser_reg_rdma_mem = iser_reg_rdma_mem_frwr; - device->iser_unreg_rdma_mem = iser_unreg_mem_frwr; + iser_info("FastReg supported, using FastReg for registration\n"); + device->iser_alloc_rdma_reg_res = iser_create_fastreg_pool; + device->iser_free_rdma_reg_res = iser_free_fastreg_pool; + device->iser_reg_rdma_mem = iser_reg_rdma_mem_fastreg; + device->iser_unreg_rdma_mem = iser_unreg_mem_fastreg; } else { - iser_err("IB device does not support FMRs nor FRWRs, can't register memory\n"); + iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n"); goto dev_attr_err; } @@ -221,13 +221,13 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) struct ib_fmr_pool_param params; int ret = -ENOMEM; - ib_conn->fastreg.fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) + - (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), - GFP_KERNEL); - if (!ib_conn->fastreg.fmr.page_vec) + ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) + + (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), + GFP_KERNEL); + if (!ib_conn->fmr.page_vec) return ret; - ib_conn->fastreg.fmr.page_vec->pages = (u64 *)(ib_conn->fastreg.fmr.page_vec + 1); + ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1); params.page_shift = SHIFT_4K; /* when the first/last SG element are not start/end * @@ -243,16 +243,16 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); - ib_conn->fastreg.fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); - if (!IS_ERR(ib_conn->fastreg.fmr.pool)) + ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); + if (!IS_ERR(ib_conn->fmr.pool)) return 0; /* no FMR => no need for page_vec */ - kfree(ib_conn->fastreg.fmr.page_vec); - ib_conn->fastreg.fmr.page_vec = NULL; + kfree(ib_conn->fmr.page_vec); + ib_conn->fmr.page_vec = NULL; - ret = PTR_ERR(ib_conn->fastreg.fmr.pool); - ib_conn->fastreg.fmr.pool = NULL; + ret = PTR_ERR(ib_conn->fmr.pool); + ib_conn->fmr.pool = NULL; if (ret != -ENOSYS) { iser_err("FMR allocation failed, err %d\n", ret); return ret; @@ -268,30 +268,30 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) void iser_free_fmr_pool(struct iser_conn *ib_conn) { iser_info("freeing conn %p fmr pool %p\n", - ib_conn, ib_conn->fastreg.fmr.pool); + ib_conn, ib_conn->fmr.pool); - if (ib_conn->fastreg.fmr.pool != NULL) - ib_destroy_fmr_pool(ib_conn->fastreg.fmr.pool); + if (ib_conn->fmr.pool != NULL) + ib_destroy_fmr_pool(ib_conn->fmr.pool); - ib_conn->fastreg.fmr.pool = NULL; + ib_conn->fmr.pool = NULL; - kfree(ib_conn->fastreg.fmr.page_vec); - ib_conn->fastreg.fmr.page_vec = NULL; + kfree(ib_conn->fmr.page_vec); + ib_conn->fmr.page_vec = NULL; } /** - * iser_create_frwr_pool - Creates pool of fast_reg descriptors + * iser_create_fastreg_pool - Creates pool of fast_reg descriptors * for fast registration work requests. * returns 0 on success, or errno code on failure */ -int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max) +int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max) { struct iser_device *device = ib_conn->device; struct fast_reg_descriptor *desc; int i, ret; - INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool); - ib_conn->fastreg.frwr.pool_size = 0; + INIT_LIST_HEAD(&ib_conn->fastreg.pool); + ib_conn->fastreg.pool_size = 0; for (i = 0; i < cmds_max; i++) { desc = kmalloc(sizeof(*desc), GFP_KERNEL); if (!desc) { @@ -316,8 +316,8 @@ int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max) goto fast_reg_mr_failure; } desc->valid = true; - list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); - ib_conn->fastreg.frwr.pool_size++; + list_add_tail(&desc->list, &ib_conn->fastreg.pool); + ib_conn->fastreg.pool_size++; } return 0; @@ -327,24 +327,24 @@ fast_reg_mr_failure: fast_reg_page_failure: kfree(desc); err: - iser_free_frwr_pool(ib_conn); + iser_free_fastreg_pool(ib_conn); return ret; } /** - * iser_free_frwr_pool - releases the pool of fast_reg descriptors + * iser_free_fastreg_pool - releases the pool of fast_reg descriptors */ -void iser_free_frwr_pool(struct iser_conn *ib_conn) +void iser_free_fastreg_pool(struct iser_conn *ib_conn) { struct fast_reg_descriptor *desc, *tmp; int i = 0; - if (list_empty(&ib_conn->fastreg.frwr.pool)) + if (list_empty(&ib_conn->fastreg.pool)) return; - iser_info("freeing conn %p frwr pool\n", ib_conn); + iser_info("freeing conn %p fr pool\n", ib_conn); - list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.frwr.pool, list) { + list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) { list_del(&desc->list); ib_free_fast_reg_page_list(desc->data_frpl); ib_dereg_mr(desc->data_mr); @@ -352,9 +352,9 @@ void iser_free_frwr_pool(struct iser_conn *ib_conn) ++i; } - if (i < ib_conn->fastreg.frwr.pool_size) + if (i < ib_conn->fastreg.pool_size) iser_warn("pool still has %d regions registered\n", - ib_conn->fastreg.frwr.pool_size - i); + ib_conn->fastreg.pool_size - i); } /** @@ -801,7 +801,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, page_list = page_vec->pages; io_addr = page_list[0]; - mem = ib_fmr_pool_map_phys(ib_conn->fastreg.fmr.pool, + mem = ib_fmr_pool_map_phys(ib_conn->fmr.pool, page_list, page_vec->length, io_addr); @@ -855,8 +855,8 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, reg->mem_h = NULL; } -void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir) +void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir) { struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; @@ -868,7 +868,7 @@ void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, reg->mem_h = NULL; reg->is_mr = 0; spin_lock_bh(&ib_conn->lock); - list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); + list_add_tail(&desc->list, &ib_conn->fastreg.pool); spin_unlock_bh(&ib_conn->lock); } @@ -999,7 +999,7 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index) } else { iser_err("tx id %llx status %d vend_err %x\n", wc.wr_id, wc.status, wc.vendor_err); - if (wc.wr_id != ISER_FRWR_LI_WRID) { + if (wc.wr_id != ISER_FASTREG_LI_WRID) { atomic_dec(&ib_conn->post_send_buf_count); iser_handle_comp_error(tx_desc, ib_conn); } -- cgit v1.2.3 From d11ec4ecf022f49df33a784f0cf445638573f577 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 5 Mar 2014 19:43:40 +0200 Subject: IB/iser: Push the decision what memory key to use into fast_reg_mr routine This is a preparation step for T10-PI offload support. We prefer to push the desicion of which mkey to use (global or fastreg) to iser_fast_reg_mr. We choose to do this since in T10-PI we may need to register for protection buffers and in this case we wish to simplify iser_fast_reg_mr instead of repeating the logic of which key to use. This patch does not change any functionality. Signed-off-by: Sagi Grimberg Signed-off-by: Alex Tabachnik Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_memory.c | 101 +++++++++++++++++------------- 1 file changed, 59 insertions(+), 42 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 6e9b7bcbc562..d25587e2f296 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -444,16 +444,40 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, return 0; } -static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, - struct iser_conn *ib_conn, +static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct iser_regd_buf *regd_buf, - u32 offset, unsigned int data_size, - unsigned int page_list_len) + struct iser_data_buf *mem, + struct ib_sge *sge) { + struct fast_reg_descriptor *desc = regd_buf->reg.mem_h; + struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; + struct ib_device *ibdev = device->ib_device; struct ib_send_wr fastreg_wr, inv_wr; struct ib_send_wr *bad_wr, *wr = NULL; u8 key; - int ret; + int ret, offset, size, plen; + + /* if there a single dma entry, dma mr suffices */ + if (mem->dma_nents == 1) { + struct scatterlist *sg = (struct scatterlist *)mem->buf; + + sge->lkey = device->mr->lkey; + sge->addr = ib_sg_dma_address(ibdev, &sg[0]); + sge->length = ib_sg_dma_len(ibdev, &sg[0]); + + iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n", + sge->lkey, sge->addr, sge->length); + return 0; + } + + plen = iser_sg_to_page_vec(mem, device->ib_device, + desc->data_frpl->page_list, + &offset, &size); + if (plen * SIZE_4K < size) { + iser_err("fast reg page_list too short to hold this SG\n"); + return -EINVAL; + } if (!desc->valid) { memset(&inv_wr, 0, sizeof(inv_wr)); @@ -472,9 +496,9 @@ static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, fastreg_wr.opcode = IB_WR_FAST_REG_MR; fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset; fastreg_wr.wr.fast_reg.page_list = desc->data_frpl; - fastreg_wr.wr.fast_reg.page_list_len = page_list_len; + fastreg_wr.wr.fast_reg.page_list_len = plen; fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K; - fastreg_wr.wr.fast_reg.length = data_size; + fastreg_wr.wr.fast_reg.length = size; fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey; fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | @@ -492,12 +516,9 @@ static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, } desc->valid = false; - regd_buf->reg.mem_h = desc; - regd_buf->reg.lkey = desc->data_mr->lkey; - regd_buf->reg.rkey = desc->data_mr->rkey; - regd_buf->reg.va = desc->data_frpl->page_list[0] + offset; - regd_buf->reg.len = data_size; - regd_buf->reg.is_mr = 1; + sge->lkey = desc->data_mr->lkey; + sge->addr = desc->data_frpl->page_list[0] + offset; + sge->length = size; return ret; } @@ -516,11 +537,10 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, struct ib_device *ibdev = device->ib_device; struct iser_data_buf *mem = &iser_task->data[cmd_dir]; struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir]; - struct fast_reg_descriptor *desc; - unsigned int data_size, page_list_len; + struct fast_reg_descriptor *desc = NULL; + struct ib_sge data_sge; int err, aligned_len; unsigned long flags; - u32 offset; aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem->dma_nents) { @@ -533,41 +553,38 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, mem = &iser_task->data_copy[cmd_dir]; } - /* if there a single dma entry, dma mr suffices */ - if (mem->dma_nents == 1) { - struct scatterlist *sg = (struct scatterlist *)mem->buf; - - regd_buf->reg.lkey = device->mr->lkey; - regd_buf->reg.rkey = device->mr->rkey; - regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]); - regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]); - regd_buf->reg.is_mr = 0; - } else { + if (mem->dma_nents != 1) { spin_lock_irqsave(&ib_conn->lock, flags); desc = list_first_entry(&ib_conn->fastreg.pool, struct fast_reg_descriptor, list); list_del(&desc->list); spin_unlock_irqrestore(&ib_conn->lock, flags); - page_list_len = iser_sg_to_page_vec(mem, device->ib_device, - desc->data_frpl->page_list, - &offset, &data_size); - - if (page_list_len * SIZE_4K < data_size) { - iser_err("fast reg page_list too short to hold this SG\n"); - err = -EINVAL; - goto err_reg; - } + regd_buf->reg.mem_h = desc; + } - err = iser_fast_reg_mr(desc, ib_conn, regd_buf, - offset, data_size, page_list_len); - if (err) - goto err_reg; + err = iser_fast_reg_mr(iser_task, regd_buf, mem, &data_sge); + if (err) + goto err_reg; + + if (desc) { + regd_buf->reg.rkey = desc->data_mr->rkey; + regd_buf->reg.is_mr = 1; + } else { + regd_buf->reg.rkey = device->mr->rkey; + regd_buf->reg.is_mr = 0; } + regd_buf->reg.lkey = data_sge.lkey; + regd_buf->reg.va = data_sge.addr; + regd_buf->reg.len = data_sge.length; + return 0; err_reg: - spin_lock_irqsave(&ib_conn->lock, flags); - list_add_tail(&desc->list, &ib_conn->fastreg.pool); - spin_unlock_irqrestore(&ib_conn->lock, flags); + if (desc) { + spin_lock_irqsave(&ib_conn->lock, flags); + list_add_tail(&desc->list, &ib_conn->fastreg.pool); + spin_unlock_irqrestore(&ib_conn->lock, flags); + } + return err; } -- cgit v1.2.3 From 310b347c6017ca5f00fa1e574c2d9c5b1088a786 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 5 Mar 2014 19:43:41 +0200 Subject: IB/iser: Move fast_reg_descriptor initialization to a function fastreg descriptor will include protection information context. In order to place the logic in one place we introduce iser_create_fr_desc function. This patch does not change any functionality. Signed-off-by: Sagi Grimberg Signed-off-by: Alex Tabachnik Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_verbs.c | 58 ++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index dc5a0b49cfbe..9569e402d368 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -279,6 +279,39 @@ void iser_free_fmr_pool(struct iser_conn *ib_conn) ib_conn->fmr.page_vec = NULL; } +static int +iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, + struct fast_reg_descriptor *desc) +{ + int ret; + + desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device, + ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc->data_frpl)) { + ret = PTR_ERR(desc->data_frpl); + iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", + ret); + return PTR_ERR(desc->data_frpl); + } + + desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc->data_mr)) { + ret = PTR_ERR(desc->data_mr); + iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); + goto fast_reg_mr_failure; + } + iser_info("Create fr_desc %p page_list %p\n", + desc, desc->data_frpl->page_list); + desc->valid = true; + + return 0; + +fast_reg_mr_failure: + ib_free_fast_reg_page_list(desc->data_frpl); + + return ret; +} + /** * iser_create_fastreg_pool - Creates pool of fast_reg descriptors * for fast registration work requests. @@ -300,32 +333,21 @@ int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max) goto err; } - desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device, - ISCSI_ISER_SG_TABLESIZE + 1); - if (IS_ERR(desc->data_frpl)) { - ret = PTR_ERR(desc->data_frpl); - iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", ret); - goto fast_reg_page_failure; + ret = iser_create_fastreg_desc(device->ib_device, + device->pd, desc); + if (ret) { + iser_err("Failed to create fastreg descriptor err=%d\n", + ret); + kfree(desc); + goto err; } - desc->data_mr = ib_alloc_fast_reg_mr(device->pd, - ISCSI_ISER_SG_TABLESIZE + 1); - if (IS_ERR(desc->data_mr)) { - ret = PTR_ERR(desc->data_mr); - iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); - goto fast_reg_mr_failure; - } - desc->valid = true; list_add_tail(&desc->list, &ib_conn->fastreg.pool); ib_conn->fastreg.pool_size++; } return 0; -fast_reg_mr_failure: - ib_free_fast_reg_page_list(desc->data_frpl); -fast_reg_page_failure: - kfree(desc); err: iser_free_fastreg_pool(ib_conn); return ret; -- cgit v1.2.3 From 65198d6b843bf43650781f71caac1266d6b407cb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 5 Mar 2014 19:43:42 +0200 Subject: IB/iser: Keep IB device attributes under iser_device For T10-PI offload support, we will need to know the device signature offload capability upon every connection establishment. This patch does not change any functionality. Signed-off-by: Sagi Grimberg Signed-off-by: Alex Tabachnik Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 1 + drivers/infiniband/ulp/iser/iser_verbs.c | 18 ++++++------------ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index ca161dfeee48..b4290f509a39 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -260,6 +260,7 @@ struct iscsi_iser_task; struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; + struct ib_device_attr dev_attr; struct ib_cq *rx_cq[ISER_MAX_CQ]; struct ib_cq *tx_cq[ISER_MAX_CQ]; struct ib_mr *mr; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 9569e402d368..95fcfcac0100 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -71,17 +71,14 @@ static void iser_event_handler(struct ib_event_handler *handler, */ static int iser_create_device_ib_res(struct iser_device *device) { - int i, j; struct iser_cq_desc *cq_desc; - struct ib_device_attr *dev_attr; + struct ib_device_attr *dev_attr = &device->dev_attr; + int ret, i, j; - dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL); - if (!dev_attr) - return -ENOMEM; - - if (ib_query_device(device->ib_device, dev_attr)) { + ret = ib_query_device(device->ib_device, dev_attr); + if (ret) { pr_warn("Query device failed for %s\n", device->ib_device->name); - goto dev_attr_err; + return ret; } /* Assign function handles - based on FMR support */ @@ -101,7 +98,7 @@ static int iser_create_device_ib_res(struct iser_device *device) device->iser_unreg_rdma_mem = iser_unreg_mem_fastreg; } else { iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n"); - goto dev_attr_err; + return -1; } device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); @@ -158,7 +155,6 @@ static int iser_create_device_ib_res(struct iser_device *device) if (ib_register_event_handler(&device->event_handler)) goto handler_err; - kfree(dev_attr); return 0; handler_err: @@ -178,8 +174,6 @@ pd_err: kfree(device->cq_desc); cq_desc_err: iser_err("failed to allocate an IB resource\n"); -dev_attr_err: - kfree(dev_attr); return -1; } -- cgit v1.2.3 From 73bc06b7edd8ce4ccbce7ffd28978ce16b97e5d8 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 5 Mar 2014 19:43:43 +0200 Subject: IB/iser: Replace fastreg descriptor valid bool with indicators container In T10-PI support we will have memory keys for protection buffers and signature transactions. We prefer to compact indicators rather than keeping multiple bools. This commit does not change any functionality. Signed-off-by: Sagi Grimberg Signed-off-by: Alex Tabachnik Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 8 ++++++-- drivers/infiniband/ulp/iser/iser_memory.c | 4 ++-- drivers/infiniband/ulp/iser/iser_verbs.c | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index b4290f509a39..56607140ff3a 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -280,13 +280,17 @@ struct iser_device { enum iser_data_dir cmd_dir); }; +enum iser_reg_indicator { + ISER_DATA_KEY_VALID = 1 << 0, +}; + struct fast_reg_descriptor { struct list_head list; /* For fast registration - FRWR */ struct ib_mr *data_mr; struct ib_fast_reg_page_list *data_frpl; - /* Valid for fast registration flag */ - bool valid; + /* registration indicators container */ + u8 reg_indicators; }; struct iser_conn { diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index d25587e2f296..a7a0d3e8f822 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -479,7 +479,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, return -EINVAL; } - if (!desc->valid) { + if (!(desc->reg_indicators & ISER_DATA_KEY_VALID)) { memset(&inv_wr, 0, sizeof(inv_wr)); inv_wr.wr_id = ISER_FASTREG_LI_WRID; inv_wr.opcode = IB_WR_LOCAL_INV; @@ -514,7 +514,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, iser_err("fast registration failed, ret:%d\n", ret); return ret; } - desc->valid = false; + desc->reg_indicators &= ~ISER_DATA_KEY_VALID; sge->lkey = desc->data_mr->lkey; sge->addr = desc->data_frpl->page_list[0] + offset; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 95fcfcac0100..6a5f4245182a 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -296,7 +296,7 @@ iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, } iser_info("Create fr_desc %p page_list %p\n", desc, desc->data_frpl->page_list); - desc->valid = true; + desc->reg_indicators |= ISER_DATA_KEY_VALID; return 0; -- cgit v1.2.3 From 9a8b08fad2efb3b6c8c5375dbaac5f4e1d19f206 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 5 Mar 2014 19:43:44 +0200 Subject: IB/iser: Generalize iser_unmap_task_data and finalize_rdma_unaligned_sg This routines operates on data buffers and may also work with protection infomation buffers. So we generalize them to handle an iser_data_buf which can be the command data or command protection information. This patch does not change any functionality. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 9 ++++--- drivers/infiniband/ulp/iser/iser_initiator.c | 37 +++++++++++++++++-------- drivers/infiniband/ulp/iser/iser_memory.c | 40 +++++++++++----------------- 3 files changed, 48 insertions(+), 38 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 56607140ff3a..623defa187b2 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -410,8 +410,10 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *task); void iser_free_rx_descriptors(struct iser_conn *ib_conn); -void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, - enum iser_data_dir cmd_dir); +void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, + struct iser_data_buf *mem, + struct iser_data_buf *mem_copy, + enum iser_data_dir cmd_dir); int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); @@ -441,7 +443,8 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, enum iser_data_dir iser_dir, enum dma_data_direction dma_dir); -void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, + struct iser_data_buf *data); int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc); int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session); diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 334f34b1cd46..58e14c7705c6 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -644,27 +644,42 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { struct iser_device *device = iser_task->iser_conn->ib_conn->device; - int is_rdma_aligned = 1; + int is_rdma_data_aligned = 1; /* if we were reading, copy back to unaligned sglist, * anyway dma_unmap and free the copy */ if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) { - is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_IN); + is_rdma_data_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + &iser_task->data[ISER_DIR_IN], + &iser_task->data_copy[ISER_DIR_IN], + ISER_DIR_IN); } + if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) { - is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT); + is_rdma_data_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + &iser_task->data[ISER_DIR_OUT], + &iser_task->data_copy[ISER_DIR_OUT], + ISER_DIR_OUT); } - if (iser_task->dir[ISER_DIR_IN]) + if (iser_task->dir[ISER_DIR_IN]) { device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); + if (is_rdma_data_aligned) + iser_dma_unmap_task_data(iser_task, + &iser_task->data[ISER_DIR_IN]); - if (iser_task->dir[ISER_DIR_OUT]) - device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); + } - /* if the data was unaligned, it was already unmapped and then copied */ - if (is_rdma_aligned) - iser_dma_unmap_task_data(iser_task); + if (iser_task->dir[ISER_DIR_OUT]) { + device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); + if (is_rdma_data_aligned) + iser_dma_unmap_task_data(iser_task, + &iser_task->data[ISER_DIR_OUT]); + if (prot_count && is_rdma_prot_aligned) + iser_dma_unmap_task_data(iser_task, + &iser_task->prot[ISER_DIR_OUT]); + } } diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index a7a0d3e8f822..a9335080ae69 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -105,17 +105,18 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, /** * iser_finalize_rdma_unaligned_sg */ + void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir) + struct iser_data_buf *data, + struct iser_data_buf *data_copy, + enum iser_data_dir cmd_dir) { struct ib_device *dev; - struct iser_data_buf *mem_copy; unsigned long cmd_data_len; dev = iser_task->iser_conn->ib_conn->device->ib_device; - mem_copy = &iser_task->data_copy[cmd_dir]; - ib_dma_unmap_sg(dev, &mem_copy->sg_single, 1, + ib_dma_unmap_sg(dev, &data_copy->sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); @@ -127,10 +128,10 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, int i; /* copy back read RDMA to unaligned sg */ - mem = mem_copy->copy_buf; + mem = data_copy->copy_buf; - sgl = (struct scatterlist *)iser_task->data[ISER_DIR_IN].buf; - sg_size = iser_task->data[ISER_DIR_IN].size; + sgl = (struct scatterlist *)data->buf; + sg_size = data->size; p = mem; for_each_sg(sgl, sg, sg_size, i) { @@ -143,15 +144,15 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, } } - cmd_data_len = iser_task->data[cmd_dir].data_len; + cmd_data_len = data->data_len; if (cmd_data_len > ISER_KMALLOC_THRESHOLD) - free_pages((unsigned long)mem_copy->copy_buf, + free_pages((unsigned long)data_copy->copy_buf, ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); else - kfree(mem_copy->copy_buf); + kfree(data_copy->copy_buf); - mem_copy->copy_buf = NULL; + data_copy->copy_buf = NULL; } #define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) @@ -329,22 +330,13 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, return 0; } -void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task) +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, + struct iser_data_buf *data) { struct ib_device *dev; - struct iser_data_buf *data; dev = iser_task->iser_conn->ib_conn->device->ib_device; - - if (iser_task->dir[ISER_DIR_IN]) { - data = &iser_task->data[ISER_DIR_IN]; - ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE); - } - - if (iser_task->dir[ISER_DIR_OUT]) { - data = &iser_task->data[ISER_DIR_OUT]; - ib_dma_unmap_sg(dev, data->buf, data->size, DMA_TO_DEVICE); - } + ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE); } static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, @@ -363,7 +355,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, iser_data_buf_dump(mem, ibdev); /* unmap the command data before accessing it */ - iser_dma_unmap_task_data(iser_task); + iser_dma_unmap_task_data(iser_task, &iser_task->data[cmd_dir]); /* allocate copy buf, if we are writing, copy the */ /* unaligned scatterlist, dma map the copy */ -- cgit v1.2.3 From 5f588e3d0c9483ef2fd35f7fe5e104f236b704f8 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 5 Mar 2014 19:43:45 +0200 Subject: IB/iser: Generalize fall_to_bounce_buf routine Unaligned SG-lists may also happen for protection information. Generalize bounce buffer routine to handle any iser_data_buf which may be data and/or protection. This patch does not change any functionality. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_memory.c | 53 +++++++++++++++++-------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index a9335080ae69..2c3f4b144a1a 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -45,13 +45,19 @@ * iser_start_rdma_unaligned_sg */ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, + struct iser_data_buf *data, + struct iser_data_buf *data_copy, enum iser_data_dir cmd_dir) { - int dma_nents; - struct ib_device *dev; + struct ib_device *dev = iser_task->iser_conn->ib_conn->device->ib_device; + struct scatterlist *sgl = (struct scatterlist *)data->buf; + struct scatterlist *sg; char *mem = NULL; - struct iser_data_buf *data = &iser_task->data[cmd_dir]; - unsigned long cmd_data_len = data->data_len; + unsigned long cmd_data_len = 0; + int dma_nents, i; + + for_each_sg(sgl, sg, data->size, i) + cmd_data_len += ib_sg_dma_len(dev, sg); if (cmd_data_len > ISER_KMALLOC_THRESHOLD) mem = (void *)__get_free_pages(GFP_ATOMIC, @@ -61,17 +67,16 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, if (mem == NULL) { iser_err("Failed to allocate mem size %d %d for copying sglist\n", - data->size,(int)cmd_data_len); + data->size, (int)cmd_data_len); return -ENOMEM; } if (cmd_dir == ISER_DIR_OUT) { /* copy the unaligned sg the buffer which is used for RDMA */ - struct scatterlist *sgl = (struct scatterlist *)data->buf; - struct scatterlist *sg; int i; char *p, *from; + sgl = (struct scatterlist *)data->buf; p = mem; for_each_sg(sgl, sg, data->size, i) { from = kmap_atomic(sg_page(sg)); @@ -83,22 +88,19 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, } } - sg_init_one(&iser_task->data_copy[cmd_dir].sg_single, mem, cmd_data_len); - iser_task->data_copy[cmd_dir].buf = - &iser_task->data_copy[cmd_dir].sg_single; - iser_task->data_copy[cmd_dir].size = 1; + sg_init_one(&data_copy->sg_single, mem, cmd_data_len); + data_copy->buf = &data_copy->sg_single; + data_copy->size = 1; + data_copy->copy_buf = mem; - iser_task->data_copy[cmd_dir].copy_buf = mem; - - dev = iser_task->iser_conn->ib_conn->device->ib_device; - dma_nents = ib_dma_map_sg(dev, - &iser_task->data_copy[cmd_dir].sg_single, - 1, + dma_nents = ib_dma_map_sg(dev, &data_copy->sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); BUG_ON(dma_nents == 0); - iser_task->data_copy[cmd_dir].dma_nents = dma_nents; + data_copy->dma_nents = dma_nents; + data_copy->data_len = cmd_data_len; + return 0; } @@ -341,11 +343,12 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, struct ib_device *ibdev, + struct iser_data_buf *mem, + struct iser_data_buf *mem_copy, enum iser_data_dir cmd_dir, int aligned_len) { struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; - struct iser_data_buf *mem = &iser_task->data[cmd_dir]; iscsi_conn->fmr_unalign_cnt++; iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", @@ -355,12 +358,12 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, iser_data_buf_dump(mem, ibdev); /* unmap the command data before accessing it */ - iser_dma_unmap_task_data(iser_task, &iser_task->data[cmd_dir]); + iser_dma_unmap_task_data(iser_task, mem); /* allocate copy buf, if we are writing, copy the */ /* unaligned scatterlist, dma map the copy */ - if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0) - return -ENOMEM; + if (iser_start_rdma_unaligned_sg(iser_task, mem, mem_copy, cmd_dir) != 0) + return -ENOMEM; return 0; } @@ -388,7 +391,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem->dma_nents) { - err = fall_to_bounce_buf(iser_task, ibdev, + err = fall_to_bounce_buf(iser_task, ibdev, mem, + &iser_task->data_copy[cmd_dir], cmd_dir, aligned_len); if (err) { iser_err("failed to allocate bounce buffer\n"); @@ -536,7 +540,8 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem->dma_nents) { - err = fall_to_bounce_buf(iser_task, ibdev, + err = fall_to_bounce_buf(iser_task, ibdev, mem, + &iser_task->data_copy[cmd_dir], cmd_dir, aligned_len); if (err) { iser_err("failed to allocate bounce buffer\n"); -- cgit v1.2.3 From 7f73384752af3caf0756cf807b2f7fbac50982d1 Mon Sep 17 00:00:00 2001 From: Alex Tabachnik Date: Wed, 5 Mar 2014 19:43:46 +0200 Subject: IB/iser: Introduce pi_enable, pi_guard module parameters Use modparams to activate protection information support. pi_enable bool: Based on this parameter iSER will know if it should support T10-PI. We don't want to do this by default as it requires to allocate and initialize extra resources. In case pi_enable=N, iSER won't publish to SCSI midlayer any DIF capabilities. pi_guard int: Based on this parameter iSER will publish DIX guard type support to SCSI midlayer. 0 means CRC is allowed to be passed in DIX buffers, 1 (or non-zero) means IP-CSUM is allowed to be passed in DIX buffers. Note that over the wire, only CRC is allowed. In the next phase, it is worth considering passing these parameters from iscsid via nlmsg. This will allow these parameters to be connection based rather than global. Signed-off-by: Alex Tabachnik Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 8 ++++++++ drivers/infiniband/ulp/iser/iscsi_iser.h | 3 +++ drivers/infiniband/ulp/iser/iser_verbs.c | 13 +++++++++++++ 3 files changed, 24 insertions(+) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index dd03cfe596d6..cfa952e9ac90 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -82,6 +82,8 @@ static unsigned int iscsi_max_lun = 512; module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); int iser_debug_level = 0; +bool iser_pi_enable = false; +int iser_pi_guard = 0; MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover"); MODULE_LICENSE("Dual BSD/GPL"); @@ -91,6 +93,12 @@ MODULE_VERSION(DRV_VER); module_param_named(debug_level, iser_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)"); +module_param_named(pi_enable, iser_pi_enable, bool, 0644); +MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)"); + +module_param_named(pi_guard, iser_pi_guard, int, 0644); +MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)"); + struct iser_global ig; void diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 623defa187b2..011003f04253 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -317,6 +317,7 @@ struct iser_conn { unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; + bool pi_support; /* Connection memory registration pool */ union { @@ -371,6 +372,8 @@ struct iser_global { extern struct iser_global ig; extern int iser_debug_level; +extern bool iser_pi_enable; +extern int iser_pi_guard; /* allocate connection resources needed for rdma functionality */ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 6a5f4245182a..4c27f553df39 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -607,6 +607,19 @@ static int iser_addr_handler(struct rdma_cm_id *cma_id) ib_conn = (struct iser_conn *)cma_id->context; ib_conn->device = device; + /* connection T10-PI support */ + if (iser_pi_enable) { + if (!(device->dev_attr.device_cap_flags & + IB_DEVICE_SIGNATURE_HANDOVER)) { + iser_warn("T10-PI requested but not supported on %s, " + "continue without T10-PI\n", + ib_conn->device->ib_device->name); + ib_conn->pi_support = false; + } else { + ib_conn->pi_support = true; + } + } + ret = rdma_resolve_route(cma_id, 1000); if (ret) { iser_err("resolve route failed: %d\n", ret); -- cgit v1.2.3 From 6b5a8fb0d22f95fff1eefe1545aa2c7771cacc3f Mon Sep 17 00:00:00 2001 From: Alex Tabachnik Date: Wed, 5 Mar 2014 19:43:47 +0200 Subject: IB/iser: Initialize T10-PI resources During connection establishment we also initialize T10-PI resources (QP, PI contexts) in order to support SCSI's protection operations. Signed-off-by: Alex Tabachnik Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 21 ++++++++- drivers/infiniband/ulp/iser/iser_verbs.c | 77 +++++++++++++++++++++++++++++--- 2 files changed, 90 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 011003f04253..99fc8b899648 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -134,6 +134,15 @@ ISER_MAX_TX_MISC_PDUS + \ ISER_MAX_RX_MISC_PDUS) +/* Max registration work requests per command */ +#define ISER_MAX_REG_WR_PER_CMD 5 + +/* For Signature we don't support DATAOUTs so no need to make room for them */ +#define ISER_QP_SIG_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \ + (1 + ISER_MAX_REG_WR_PER_CMD) + \ + ISER_MAX_TX_MISC_PDUS + \ + ISER_MAX_RX_MISC_PDUS) + #define ISER_VER 0x10 #define ISER_WSV 0x08 #define ISER_RSV 0x04 @@ -281,7 +290,16 @@ struct iser_device { }; enum iser_reg_indicator { - ISER_DATA_KEY_VALID = 1 << 0, + ISER_DATA_KEY_VALID = 1 << 0, + ISER_PROT_KEY_VALID = 1 << 1, + ISER_SIG_KEY_VALID = 1 << 2, + ISER_FASTREG_PROTECTED = 1 << 3, +}; + +struct iser_pi_context { + struct ib_mr *prot_mr; + struct ib_fast_reg_page_list *prot_frpl; + struct ib_mr *sig_mr; }; struct fast_reg_descriptor { @@ -289,6 +307,7 @@ struct fast_reg_descriptor { /* For fast registration - FRWR */ struct ib_mr *data_mr; struct ib_fast_reg_page_list *data_frpl; + struct iser_pi_context *pi_ctx; /* registration indicators container */ u8 reg_indicators; }; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 4c27f553df39..0404c71761f9 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -275,7 +275,7 @@ void iser_free_fmr_pool(struct iser_conn *ib_conn) static int iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, - struct fast_reg_descriptor *desc) + bool pi_enable, struct fast_reg_descriptor *desc) { int ret; @@ -294,12 +294,64 @@ iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); goto fast_reg_mr_failure; } + desc->reg_indicators |= ISER_DATA_KEY_VALID; + + if (pi_enable) { + struct ib_mr_init_attr mr_init_attr = {0}; + struct iser_pi_context *pi_ctx = NULL; + + desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL); + if (!desc->pi_ctx) { + iser_err("Failed to allocate pi context\n"); + ret = -ENOMEM; + goto pi_ctx_alloc_failure; + } + pi_ctx = desc->pi_ctx; + + pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device, + ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(pi_ctx->prot_frpl)) { + ret = PTR_ERR(pi_ctx->prot_frpl); + iser_err("Failed to allocate prot frpl ret=%d\n", + ret); + goto prot_frpl_failure; + } + + pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, + ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(pi_ctx->prot_mr)) { + ret = PTR_ERR(pi_ctx->prot_mr); + iser_err("Failed to allocate prot frmr ret=%d\n", + ret); + goto prot_mr_failure; + } + desc->reg_indicators |= ISER_PROT_KEY_VALID; + + mr_init_attr.max_reg_descriptors = 2; + mr_init_attr.flags |= IB_MR_SIGNATURE_EN; + pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr); + if (IS_ERR(pi_ctx->sig_mr)) { + ret = PTR_ERR(pi_ctx->sig_mr); + iser_err("Failed to allocate signature enabled mr err=%d\n", + ret); + goto sig_mr_failure; + } + desc->reg_indicators |= ISER_SIG_KEY_VALID; + } + desc->reg_indicators &= ~ISER_FASTREG_PROTECTED; + iser_info("Create fr_desc %p page_list %p\n", desc, desc->data_frpl->page_list); - desc->reg_indicators |= ISER_DATA_KEY_VALID; return 0; - +sig_mr_failure: + ib_dereg_mr(desc->pi_ctx->prot_mr); +prot_mr_failure: + ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); +prot_frpl_failure: + kfree(desc->pi_ctx); +pi_ctx_alloc_failure: + ib_dereg_mr(desc->data_mr); fast_reg_mr_failure: ib_free_fast_reg_page_list(desc->data_frpl); @@ -320,15 +372,15 @@ int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max) INIT_LIST_HEAD(&ib_conn->fastreg.pool); ib_conn->fastreg.pool_size = 0; for (i = 0; i < cmds_max; i++) { - desc = kmalloc(sizeof(*desc), GFP_KERNEL); + desc = kzalloc(sizeof(*desc), GFP_KERNEL); if (!desc) { iser_err("Failed to allocate a new fast_reg descriptor\n"); ret = -ENOMEM; goto err; } - ret = iser_create_fastreg_desc(device->ib_device, - device->pd, desc); + ret = iser_create_fastreg_desc(device->ib_device, device->pd, + ib_conn->pi_support, desc); if (ret) { iser_err("Failed to create fastreg descriptor err=%d\n", ret); @@ -364,6 +416,12 @@ void iser_free_fastreg_pool(struct iser_conn *ib_conn) list_del(&desc->list); ib_free_fast_reg_page_list(desc->data_frpl); ib_dereg_mr(desc->data_mr); + if (desc->pi_ctx) { + ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); + ib_dereg_mr(desc->pi_ctx->prot_mr); + ib_destroy_mr(desc->pi_ctx->sig_mr); + kfree(desc->pi_ctx); + } kfree(desc); ++i; } @@ -405,12 +463,17 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) init_attr.qp_context = (void *)ib_conn; init_attr.send_cq = device->tx_cq[min_index]; init_attr.recv_cq = device->rx_cq[min_index]; - init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; init_attr.cap.max_send_sge = 2; init_attr.cap.max_recv_sge = 1; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; + if (ib_conn->pi_support) { + init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS; + init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; + } else { + init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; + } ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); if (ret) -- cgit v1.2.3 From 177e31bd5a40999028f6694623ceea1bec5abff6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 5 Mar 2014 19:43:48 +0200 Subject: IB/iser: Support T10-PI operations Add logic to initialize protection information entities. Upon each iSCSI task, we keep the scsi_cmnd in order to query the scsi protection operations and reference to protection buffers. Modify iser_fast_reg_mr to receive indication whether it is registering the data or protection buffers. In addition introduce iser_reg_sig_mr which performs fast registration work-request for a signature enabled memory region (IB_WR_REG_SIG_MR). In this routine we set all the protection relevants for the device to offload protection data-transfer and verification. Signed-off-by: Sagi Grimberg Signed-off-by: Alex Tabachnik Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 2 + drivers/infiniband/ulp/iser/iscsi_iser.h | 9 + drivers/infiniband/ulp/iser/iser_initiator.c | 63 ++++++- drivers/infiniband/ulp/iser/iser_memory.c | 257 ++++++++++++++++++++++++--- 4 files changed, 304 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index cfa952e9ac90..a64b87811915 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -184,6 +184,8 @@ iscsi_iser_task_init(struct iscsi_task *task) iser_task->command_sent = 0; iser_task_rdma_init(iser_task); + iser_task->sc = task->sc; + return 0; } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 99fc8b899648..fce54092d300 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -46,6 +46,8 @@ #include #include #include +#include +#include #include #include @@ -289,6 +291,10 @@ struct iser_device { enum iser_data_dir cmd_dir); }; +#define ISER_CHECK_GUARD 0xc0 +#define ISER_CHECK_REFTAG 0x0f +#define ISER_CHECK_APPTAG 0x30 + enum iser_reg_indicator { ISER_DATA_KEY_VALID = 1 << 0, ISER_PROT_KEY_VALID = 1 << 1, @@ -361,11 +367,14 @@ struct iscsi_iser_task { struct iser_tx_desc desc; struct iscsi_iser_conn *iser_conn; enum iser_task_status status; + struct scsi_cmnd *sc; int command_sent; /* set if command sent */ int dir[ISER_DIRS_NUM]; /* set if dir use*/ struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ + struct iser_data_buf prot[ISER_DIRS_NUM]; /* prot desc */ + struct iser_data_buf prot_copy[ISER_DIRS_NUM];/* prot copy */ }; struct iser_page_vec { diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 58e14c7705c6..7fd95fe6d989 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -62,6 +62,17 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, if (err) return err; + if (scsi_prot_sg_count(iser_task->sc)) { + struct iser_data_buf *pbuf_in = &iser_task->prot[ISER_DIR_IN]; + + err = iser_dma_map_task_data(iser_task, + pbuf_in, + ISER_DIR_IN, + DMA_FROM_DEVICE); + if (err) + return err; + } + if (edtl > iser_task->data[ISER_DIR_IN].data_len) { iser_err("Total data length: %ld, less than EDTL: " "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", @@ -113,6 +124,17 @@ iser_prepare_write_cmd(struct iscsi_task *task, if (err) return err; + if (scsi_prot_sg_count(iser_task->sc)) { + struct iser_data_buf *pbuf_out = &iser_task->prot[ISER_DIR_OUT]; + + err = iser_dma_map_task_data(iser_task, + pbuf_out, + ISER_DIR_OUT, + DMA_TO_DEVICE); + if (err) + return err; + } + if (edtl > iser_task->data[ISER_DIR_OUT].data_len) { iser_err("Total data length: %ld, less than EDTL: %d, " "in WRITE cmd BHS itt: %d, conn: 0x%p\n", @@ -368,7 +390,7 @@ int iser_send_command(struct iscsi_conn *conn, struct iscsi_iser_task *iser_task = task->dd_data; unsigned long edtl; int err; - struct iser_data_buf *data_buf; + struct iser_data_buf *data_buf, *prot_buf; struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr; struct scsi_cmnd *sc = task->sc; struct iser_tx_desc *tx_desc = &iser_task->desc; @@ -379,18 +401,26 @@ int iser_send_command(struct iscsi_conn *conn, tx_desc->type = ISCSI_TX_SCSI_COMMAND; iser_create_send_desc(iser_conn->ib_conn, tx_desc); - if (hdr->flags & ISCSI_FLAG_CMD_READ) + if (hdr->flags & ISCSI_FLAG_CMD_READ) { data_buf = &iser_task->data[ISER_DIR_IN]; - else + prot_buf = &iser_task->prot[ISER_DIR_IN]; + } else { data_buf = &iser_task->data[ISER_DIR_OUT]; + prot_buf = &iser_task->prot[ISER_DIR_OUT]; + } if (scsi_sg_count(sc)) { /* using a scatter list */ data_buf->buf = scsi_sglist(sc); data_buf->size = scsi_sg_count(sc); } - data_buf->data_len = scsi_bufflen(sc); + if (scsi_prot_sg_count(sc)) { + prot_buf->buf = scsi_prot_sglist(sc); + prot_buf->size = scsi_prot_sg_count(sc); + prot_buf->data_len = sc->prot_sdb->length; + } + if (hdr->flags & ISCSI_FLAG_CMD_READ) { err = iser_prepare_read_cmd(task, edtl); if (err) @@ -635,6 +665,9 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) iser_task->data[ISER_DIR_IN].data_len = 0; iser_task->data[ISER_DIR_OUT].data_len = 0; + iser_task->prot[ISER_DIR_IN].data_len = 0; + iser_task->prot[ISER_DIR_OUT].data_len = 0; + memset(&iser_task->rdma_regd[ISER_DIR_IN], 0, sizeof(struct iser_regd_buf)); memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0, @@ -645,6 +678,8 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { struct iser_device *device = iser_task->iser_conn->ib_conn->device; int is_rdma_data_aligned = 1; + int is_rdma_prot_aligned = 1; + int prot_count = scsi_prot_sg_count(iser_task->sc); /* if we were reading, copy back to unaligned sglist, * anyway dma_unmap and free the copy @@ -665,12 +700,30 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) ISER_DIR_OUT); } + if (iser_task->prot_copy[ISER_DIR_IN].copy_buf != NULL) { + is_rdma_prot_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + &iser_task->prot[ISER_DIR_IN], + &iser_task->prot_copy[ISER_DIR_IN], + ISER_DIR_IN); + } + + if (iser_task->prot_copy[ISER_DIR_OUT].copy_buf != NULL) { + is_rdma_prot_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + &iser_task->prot[ISER_DIR_OUT], + &iser_task->prot_copy[ISER_DIR_OUT], + ISER_DIR_OUT); + } + if (iser_task->dir[ISER_DIR_IN]) { device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); if (is_rdma_data_aligned) iser_dma_unmap_task_data(iser_task, &iser_task->data[ISER_DIR_IN]); - + if (prot_count && is_rdma_prot_aligned) + iser_dma_unmap_task_data(iser_task, + &iser_task->prot[ISER_DIR_IN]); } if (iser_task->dir[ISER_DIR_OUT]) { diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 2c3f4b144a1a..0995565f5dda 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -440,15 +440,180 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, return 0; } +static inline enum ib_t10_dif_type +scsi2ib_prot_type(unsigned char prot_type) +{ + switch (prot_type) { + case SCSI_PROT_DIF_TYPE0: + return IB_T10DIF_NONE; + case SCSI_PROT_DIF_TYPE1: + return IB_T10DIF_TYPE1; + case SCSI_PROT_DIF_TYPE2: + return IB_T10DIF_TYPE2; + case SCSI_PROT_DIF_TYPE3: + return IB_T10DIF_TYPE3; + default: + return IB_T10DIF_NONE; + } +} + + +static int +iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) +{ + unsigned char scsi_ptype = scsi_get_prot_type(sc); + + sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF; + sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF; + sig_attrs->mem.sig.dif.pi_interval = sc->device->sector_size; + sig_attrs->wire.sig.dif.pi_interval = sc->device->sector_size; + + switch (scsi_get_prot_op(sc)) { + case SCSI_PROT_WRITE_INSERT: + case SCSI_PROT_READ_STRIP: + sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE; + sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype); + sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; + sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) & + 0xffffffff; + break; + case SCSI_PROT_READ_INSERT: + case SCSI_PROT_WRITE_STRIP: + sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype); + sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; + sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) & + 0xffffffff; + sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE; + break; + case SCSI_PROT_READ_PASS: + case SCSI_PROT_WRITE_PASS: + sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype); + sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; + sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) & + 0xffffffff; + sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype); + sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; + sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) & + 0xffffffff; + break; + default: + iser_err("Unsupported PI operation %d\n", + scsi_get_prot_op(sc)); + return -EINVAL; + } + return 0; +} + + +static int +iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask) +{ + switch (scsi_get_prot_type(sc)) { + case SCSI_PROT_DIF_TYPE0: + *mask = 0x0; + break; + case SCSI_PROT_DIF_TYPE1: + case SCSI_PROT_DIF_TYPE2: + *mask = ISER_CHECK_GUARD | ISER_CHECK_REFTAG; + break; + case SCSI_PROT_DIF_TYPE3: + *mask = ISER_CHECK_GUARD; + break; + default: + iser_err("Unsupported protection type %d\n", + scsi_get_prot_type(sc)); + return -EINVAL; + } + + return 0; +} + +static int +iser_reg_sig_mr(struct iscsi_iser_task *iser_task, + struct fast_reg_descriptor *desc, struct ib_sge *data_sge, + struct ib_sge *prot_sge, struct ib_sge *sig_sge) +{ + struct iser_conn *iser_conn = iser_task->iser_conn->ib_conn; + struct iser_pi_context *pi_ctx = desc->pi_ctx; + struct ib_send_wr sig_wr, inv_wr; + struct ib_send_wr *bad_wr, *wr = NULL; + struct ib_sig_attrs sig_attrs; + int ret; + u32 key; + + memset(&sig_attrs, 0, sizeof(sig_attrs)); + ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs); + if (ret) + goto err; + + ret = iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask); + if (ret) + goto err; + + if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) { + memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.wr_id = ISER_FASTREG_LI_WRID; + inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey; + wr = &inv_wr; + /* Bump the key */ + key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF); + ib_update_fast_reg_key(pi_ctx->sig_mr, ++key); + } + + memset(&sig_wr, 0, sizeof(sig_wr)); + sig_wr.opcode = IB_WR_REG_SIG_MR; + sig_wr.wr_id = ISER_FASTREG_LI_WRID; + sig_wr.sg_list = data_sge; + sig_wr.num_sge = 1; + sig_wr.wr.sig_handover.sig_attrs = &sig_attrs; + sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr; + if (scsi_prot_sg_count(iser_task->sc)) + sig_wr.wr.sig_handover.prot = prot_sge; + sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE; + + if (!wr) + wr = &sig_wr; + else + wr->next = &sig_wr; + + ret = ib_post_send(iser_conn->qp, wr, &bad_wr); + if (ret) { + iser_err("reg_sig_mr failed, ret:%d\n", ret); + goto err; + } + desc->reg_indicators &= ~ISER_SIG_KEY_VALID; + + sig_sge->lkey = pi_ctx->sig_mr->lkey; + sig_sge->addr = 0; + sig_sge->length = data_sge->length + prot_sge->length; + if (scsi_get_prot_op(iser_task->sc) == SCSI_PROT_WRITE_INSERT || + scsi_get_prot_op(iser_task->sc) == SCSI_PROT_READ_STRIP) { + sig_sge->length += (data_sge->length / + iser_task->sc->device->sector_size) * 8; + } + + iser_dbg("sig_sge: addr: 0x%llx length: %u lkey: 0x%x\n", + sig_sge->addr, sig_sge->length, + sig_sge->lkey); +err: + return ret; +} + static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct iser_regd_buf *regd_buf, struct iser_data_buf *mem, + enum iser_reg_indicator ind, struct ib_sge *sge) { struct fast_reg_descriptor *desc = regd_buf->reg.mem_h; struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; + struct ib_mr *mr; + struct ib_fast_reg_page_list *frpl; struct ib_send_wr fastreg_wr, inv_wr; struct ib_send_wr *bad_wr, *wr = NULL; u8 key; @@ -467,35 +632,42 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, return 0; } - plen = iser_sg_to_page_vec(mem, device->ib_device, - desc->data_frpl->page_list, + if (ind == ISER_DATA_KEY_VALID) { + mr = desc->data_mr; + frpl = desc->data_frpl; + } else { + mr = desc->pi_ctx->prot_mr; + frpl = desc->pi_ctx->prot_frpl; + } + + plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list, &offset, &size); if (plen * SIZE_4K < size) { iser_err("fast reg page_list too short to hold this SG\n"); return -EINVAL; } - if (!(desc->reg_indicators & ISER_DATA_KEY_VALID)) { + if (!(desc->reg_indicators & ind)) { memset(&inv_wr, 0, sizeof(inv_wr)); inv_wr.wr_id = ISER_FASTREG_LI_WRID; inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.ex.invalidate_rkey = desc->data_mr->rkey; + inv_wr.ex.invalidate_rkey = mr->rkey; wr = &inv_wr; /* Bump the key */ - key = (u8)(desc->data_mr->rkey & 0x000000FF); - ib_update_fast_reg_key(desc->data_mr, ++key); + key = (u8)(mr->rkey & 0x000000FF); + ib_update_fast_reg_key(mr, ++key); } /* Prepare FASTREG WR */ memset(&fastreg_wr, 0, sizeof(fastreg_wr)); fastreg_wr.wr_id = ISER_FASTREG_LI_WRID; fastreg_wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset; - fastreg_wr.wr.fast_reg.page_list = desc->data_frpl; + fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset; + fastreg_wr.wr.fast_reg.page_list = frpl; fastreg_wr.wr.fast_reg.page_list_len = plen; fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K; fastreg_wr.wr.fast_reg.length = size; - fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey; + fastreg_wr.wr.fast_reg.rkey = mr->rkey; fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); @@ -510,10 +682,10 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, iser_err("fast registration failed, ret:%d\n", ret); return ret; } - desc->reg_indicators &= ~ISER_DATA_KEY_VALID; + desc->reg_indicators &= ~ind; - sge->lkey = desc->data_mr->lkey; - sge->addr = desc->data_frpl->page_list[0] + offset; + sge->lkey = mr->lkey; + sge->addr = frpl->page_list[0] + offset; sge->length = size; return ret; @@ -550,7 +722,8 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, mem = &iser_task->data_copy[cmd_dir]; } - if (mem->dma_nents != 1) { + if (mem->dma_nents != 1 || + scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) { spin_lock_irqsave(&ib_conn->lock, flags); desc = list_first_entry(&ib_conn->fastreg.pool, struct fast_reg_descriptor, list); @@ -559,21 +732,61 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, regd_buf->reg.mem_h = desc; } - err = iser_fast_reg_mr(iser_task, regd_buf, mem, &data_sge); + err = iser_fast_reg_mr(iser_task, regd_buf, mem, + ISER_DATA_KEY_VALID, &data_sge); if (err) goto err_reg; - if (desc) { - regd_buf->reg.rkey = desc->data_mr->rkey; + if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) { + struct ib_sge prot_sge, sig_sge; + + memset(&prot_sge, 0, sizeof(prot_sge)); + if (scsi_prot_sg_count(iser_task->sc)) { + mem = &iser_task->prot[cmd_dir]; + aligned_len = iser_data_buf_aligned_len(mem, ibdev); + if (aligned_len != mem->dma_nents) { + err = fall_to_bounce_buf(iser_task, ibdev, mem, + &iser_task->prot_copy[cmd_dir], + cmd_dir, aligned_len); + if (err) { + iser_err("failed to allocate bounce buffer\n"); + return err; + } + mem = &iser_task->prot_copy[cmd_dir]; + } + + err = iser_fast_reg_mr(iser_task, regd_buf, mem, + ISER_PROT_KEY_VALID, &prot_sge); + if (err) + goto err_reg; + } + + err = iser_reg_sig_mr(iser_task, desc, &data_sge, + &prot_sge, &sig_sge); + if (err) { + iser_err("Failed to register signature mr\n"); + return err; + } + desc->reg_indicators |= ISER_FASTREG_PROTECTED; + + regd_buf->reg.lkey = sig_sge.lkey; + regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey; + regd_buf->reg.va = sig_sge.addr; + regd_buf->reg.len = sig_sge.length; regd_buf->reg.is_mr = 1; } else { - regd_buf->reg.rkey = device->mr->rkey; - regd_buf->reg.is_mr = 0; - } + if (desc) { + regd_buf->reg.rkey = desc->data_mr->rkey; + regd_buf->reg.is_mr = 1; + } else { + regd_buf->reg.rkey = device->mr->rkey; + regd_buf->reg.is_mr = 0; + } - regd_buf->reg.lkey = data_sge.lkey; - regd_buf->reg.va = data_sge.addr; - regd_buf->reg.len = data_sge.length; + regd_buf->reg.lkey = data_sge.lkey; + regd_buf->reg.va = data_sge.addr; + regd_buf->reg.len = data_sge.length; + } return 0; err_reg: -- cgit v1.2.3 From 55e51eda4820ec5a1c1fc8693a51029f74eac2b9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 5 Mar 2014 19:43:49 +0200 Subject: SCSI/libiscsi: Add check_protection callback for transports iSCSI needs to be at least aware that a task involves protection information. In case it does, after the transaction completed libiscsi will ask the transport to check the protection status of the transaction. Unlike transport errors, DIF errors should not prevent successful completion of the transaction from the transport point of view, but should be escelated to scsi mid-layer when constructing the scsi result and sense data. check_protection routine will return the ascq corresponding to the DIF error that occured (or 0 if no error happened). return ascq: - 0x1: GUARD_CHECK_FAILED - 0x2: APPTAG_CHECK_FAILED - 0x3: REFTAG_CHECK_FAILED Signed-off-by: Sagi Grimberg Signed-off-by: Alex Tabachnik Signed-off-by: Roland Dreier --- drivers/scsi/libiscsi.c | 32 ++++++++++++++++++++++++++++++++ include/scsi/libiscsi.h | 4 ++++ include/scsi/scsi_transport_iscsi.h | 1 + 3 files changed, 37 insertions(+) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 40462415291e..3c11acf67849 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -395,6 +395,10 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) if (rc) return rc; } + + if (scsi_get_prot_op(sc) != SCSI_PROT_NORMAL) + task->protected = true; + if (sc->sc_data_direction == DMA_TO_DEVICE) { unsigned out_len = scsi_out(sc)->length; struct iscsi_r2t_info *r2t = &task->unsol_r2t; @@ -823,6 +827,33 @@ static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr, sc->result = (DID_OK << 16) | rhdr->cmd_status; + if (task->protected) { + sector_t sector; + u8 ascq; + + /** + * Transports that didn't implement check_protection + * callback but still published T10-PI support to scsi-mid + * deserve this BUG_ON. + **/ + BUG_ON(!session->tt->check_protection); + + ascq = session->tt->check_protection(task, §or); + if (ascq) { + sc->result = DRIVER_SENSE << 24 | + SAM_STAT_CHECK_CONDITION; + scsi_build_sense_buffer(1, sc->sense_buffer, + ILLEGAL_REQUEST, 0x10, ascq); + sc->sense_buffer[7] = 0xc; /* Additional sense length */ + sc->sense_buffer[8] = 0; /* Information desc type */ + sc->sense_buffer[9] = 0xa; /* Additional desc length */ + sc->sense_buffer[10] = 0x80; /* Validity bit */ + + put_unaligned_be64(sector, &sc->sense_buffer[12]); + goto out; + } + } + if (rhdr->response != ISCSI_STATUS_CMD_COMPLETED) { sc->result = DID_ERROR << 16; goto out; @@ -1567,6 +1598,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn, task->have_checked_conn = false; task->last_timeout = jiffies; task->last_xfer = jiffies; + task->protected = false; INIT_LIST_HEAD(&task->running); return task; } diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 309f51336fb9..1457c26dfc58 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -133,6 +133,10 @@ struct iscsi_task { unsigned long last_xfer; unsigned long last_timeout; bool have_checked_conn; + + /* T10 protection information */ + bool protected; + /* state set/tested under session->lock */ int state; atomic_t refcount; diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 88640a47216c..2555ee5343fd 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -167,6 +167,7 @@ struct iscsi_transport { struct iscsi_bus_flash_conn *fnode_conn); int (*logout_flashnode_sid) (struct iscsi_cls_session *cls_sess); int (*get_host_stats) (struct Scsi_Host *shost, char *buf, int len); + u8 (*check_protection)(struct iscsi_task *task, sector_t *sector); }; /* -- cgit v1.2.3 From 0a7a08ad6f5f344d592fe63403f48e67395e08bf Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 5 Mar 2014 19:43:50 +0200 Subject: IB/iser: Implement check_protection Once the iSCSI transaction is completed we must implement check_protection in order to notify on DIF errors that may have occured. The routine boils down to calling ib_check_mr_status to get the signature status of the transaction. Signed-off-by: Sagi Grimberg Signed-off-by: Alex Tabachnik Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 13 +++++++++ drivers/infiniband/ulp/iser/iscsi_iser.h | 2 ++ drivers/infiniband/ulp/iser/iser_verbs.c | 47 ++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index a64b87811915..f13d7e9801f1 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -306,6 +306,18 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task) } } +static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector) +{ + struct iscsi_iser_task *iser_task = task->dd_data; + + if (iser_task->dir[ISER_DIR_IN]) + return iser_check_task_pi_status(iser_task, ISER_DIR_IN, + sector); + else + return iser_check_task_pi_status(iser_task, ISER_DIR_OUT, + sector); +} + static struct iscsi_cls_conn * iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) { @@ -742,6 +754,7 @@ static struct iscsi_transport iscsi_iser_transport = { .xmit_task = iscsi_iser_task_xmit, .cleanup_task = iscsi_iser_cleanup_task, .alloc_pdu = iscsi_iser_pdu_alloc, + .check_protection = iscsi_iser_check_protection, /* recovery */ .session_recovery_timedout = iscsi_session_recovery_timedout, diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index fce54092d300..95f291fca178 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -483,4 +483,6 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max); void iser_free_fmr_pool(struct iser_conn *ib_conn); int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max); void iser_free_fastreg_pool(struct iser_conn *ib_conn); +u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir, sector_t *sector); #endif diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 0404c71761f9..abbb6ec90d14 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1153,3 +1153,50 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context) tasklet_schedule(&device->cq_tasklet[cq_index]); } + +u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir, sector_t *sector) +{ + struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; + struct fast_reg_descriptor *desc = reg->mem_h; + unsigned long sector_size = iser_task->sc->device->sector_size; + struct ib_mr_status mr_status; + int ret; + + if (desc && desc->reg_indicators & ISER_FASTREG_PROTECTED) { + desc->reg_indicators &= ~ISER_FASTREG_PROTECTED; + ret = ib_check_mr_status(desc->pi_ctx->sig_mr, + IB_MR_CHECK_SIG_STATUS, &mr_status); + if (ret) { + pr_err("ib_check_mr_status failed, ret %d\n", ret); + goto err; + } + + if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { + sector_t sector_off = mr_status.sig_err.sig_err_offset; + + do_div(sector_off, sector_size + 8); + *sector = scsi_get_lba(iser_task->sc) + sector_off; + + pr_err("PI error found type %d at sector %lx " + "expected %x vs actual %x\n", + mr_status.sig_err.err_type, *sector, + mr_status.sig_err.expected, + mr_status.sig_err.actual); + + switch (mr_status.sig_err.err_type) { + case IB_SIG_BAD_GUARD: + return 0x1; + case IB_SIG_BAD_REFTAG: + return 0x3; + case IB_SIG_BAD_APPTAG: + return 0x2; + } + } + } + + return 0; +err: + /* Not alot we can do here, return ambiguous guard error */ + return 0x1; +} -- cgit v1.2.3 From 6192d4e6bbc7e232093f508b77bd555fd0323369 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 5 Mar 2014 19:43:51 +0200 Subject: IB/iser: Publish T10-PI support to SCSI midlayer After allocating a scsi_host we set protection types and guard type supported. Signed-off-by: Sagi Grimberg Signed-off-by: Alex Tabachnik Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index f13d7e9801f1..a0ec2d012f95 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -435,6 +435,17 @@ static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) iscsi_host_free(shost); } +static inline unsigned int +iser_dif_prot_caps(int prot_caps) +{ + return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? SHOST_DIF_TYPE1_PROTECTION | + SHOST_DIX_TYPE1_PROTECTION : 0) | + ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? SHOST_DIF_TYPE2_PROTECTION | + SHOST_DIX_TYPE2_PROTECTION : 0) | + ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? SHOST_DIF_TYPE3_PROTECTION | + SHOST_DIX_TYPE3_PROTECTION : 0); +} + static struct iscsi_cls_session * iscsi_iser_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, uint16_t qdepth, @@ -459,8 +470,18 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, * older userspace tools (before 2.0-870) did not pass us * the leading conn's ep so this will be NULL; */ - if (ep) + if (ep) { ib_conn = ep->dd_data; + if (ib_conn->pi_support) { + u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap; + + scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); + if (iser_pi_guard) + scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP); + else + scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC); + } + } if (iscsi_host_add(shost, ep ? ib_conn->device->ib_device->dma_device : NULL)) -- cgit v1.2.3 From ff1706f4feb8e0e1a2e56a8dd57e17a4b45649b5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 19 Oct 2013 12:14:12 +0300 Subject: RDMA/cxgb4: Fix underflows in c4iw_create_qp() These sizes should be unsigned so we don't allow negative values and have underflow bugs. These can come from the user so there may be security implications, but I have not tested this. Signed-off-by: Dan Carpenter Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 582936708e6e..72ea152c5f40 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1533,7 +1533,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct c4iw_cq *schp; struct c4iw_cq *rchp; struct c4iw_create_qp_resp uresp; - int sqsize, rqsize; + unsigned int sqsize, rqsize; struct c4iw_ucontext *ucontext; int ret; struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL; -- cgit v1.2.3 From e24a72a3302a638d4c6e77f0b40c45cc61c3f089 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 19 Oct 2013 12:14:35 +0300 Subject: RDMA/cxgb4: Fix four byte info leak in c4iw_create_cq() There is a four byte hole at the end of the "uresp" struct after the ->qid_mask member. Signed-off-by: Dan Carpenter Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 88de3aa9c5b0..e436ead0b32f 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -930,6 +930,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, if (!mm2) goto err4; + memset(&uresp, 0, sizeof(uresp)); uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; uresp.size = chp->cq.size; -- cgit v1.2.3 From ffd435924c86de055d33fe59941841819eef9f6a Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 19 Mar 2014 17:44:38 +0530 Subject: RDMA/cxgb4: Cap CQ size at T4_MAX_IQ_SIZE Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index e436ead0b32f..906119f7a43f 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -881,7 +881,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, /* * Make actual HW queue 2x to avoid cdix_inc overflows. */ - hwentries = entries * 2; + hwentries = min(entries * 2, T4_MAX_IQ_SIZE); /* * Make HW queue at least 64 entries so GTS updates aren't too -- cgit v1.2.3 From f8e819081f797df355cffbdedb9301ea50ae76b2 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 19 Mar 2014 17:44:39 +0530 Subject: RDMA/cxgb4: Allow loopback connections find_route() must treat loopback as a valid egress interface. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index d286bdebe2ab..360807eaffec 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -400,7 +400,8 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, n = dst_neigh_lookup(&rt->dst, &peer_ip); if (!n) return NULL; - if (!our_interface(dev, n->dev)) { + if (!our_interface(dev, n->dev) && + !(n->dev->flags & IFF_LOOPBACK)) { dst_release(&rt->dst); return NULL; } -- cgit v1.2.3 From ebf00060c33b9d0946384fa6f440df7ea35a569e Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 19 Mar 2014 17:44:40 +0530 Subject: RDMA/cxgb4: Always release neigh entry Always release the neigh entry in rx_pkt(). Based on original work by Santosh Rastapur . Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 360807eaffec..2b2af962cc64 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3347,13 +3347,13 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) pi = (struct port_info *)netdev_priv(pdev); tx_chan = cxgb4_port_chan(pdev); } + neigh_release(neigh); if (!e) { pr_err("%s - failed to allocate l2t entry!\n", __func__); goto free_dst; } - neigh_release(neigh); step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; window = (__force u16) htons((__force u16)tcph->window); -- cgit v1.2.3 From 8a9c399eeee8c2d99e22b975f6023001a1fde88f Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 19 Mar 2014 17:44:42 +0530 Subject: RDMA/cxgb4: Fix incorrect BUG_ON conditions Based on original work from Jay Hernandez Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 906119f7a43f..d6a7db2c42f6 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -603,7 +603,7 @@ proc_cqe: */ if (SQ_TYPE(hw_cqe)) { int idx = CQE_WRID_SQ_IDX(hw_cqe); - BUG_ON(idx > wq->sq.size); + BUG_ON(idx >= wq->sq.size); /* * Account for any unsignaled completions completed by @@ -617,7 +617,7 @@ proc_cqe: wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; else wq->sq.in_use -= idx - wq->sq.cidx; - BUG_ON(wq->sq.in_use < 0 && wq->sq.in_use < wq->sq.size); + BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size); wq->sq.cidx = (uint16_t)idx; PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx); -- cgit v1.2.3 From ba32de9d8d8173a1d6dd1ed608c519d5d0a623bb Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 19 Mar 2014 17:44:43 +0530 Subject: RDMA/cxgb4: Mind the sq_sig_all/sq_sig_type QP attributes Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + drivers/infiniband/hw/cxgb4/qp.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 23eaeabab93b..b810d2a89d05 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -441,6 +441,7 @@ struct c4iw_qp { atomic_t refcnt; wait_queue_head_t wait; struct timer_list timer; + int sq_sig_all; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 72ea152c5f40..723ad290bd9d 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -675,7 +675,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_flags = 0; if (wr->send_flags & IB_SEND_SOLICITED) fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_SIGNALED) + if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all) fw_flags |= FW_RI_COMPLETION_FLAG; swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; switch (wr->opcode) { @@ -736,7 +736,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } swsqe->idx = qhp->wq.sq.pidx; swsqe->complete = 0; - swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED); + swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; swsqe->flushed = 0; swsqe->wr_id = wr->wr_id; @@ -1605,6 +1606,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.enable_bind = 1; qhp->attr.max_ord = 1; qhp->attr.max_ird = 1; + qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; spin_lock_init(&qhp->lock); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); -- cgit v1.2.3 From df2d5130ece9118591c2f3fbf0ee4a79183b4ccc Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 19 Mar 2014 17:44:44 +0530 Subject: RDMA/cxgb4: Default peer2peer mode to 1 Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 2b2af962cc64..e1fc5c5445c9 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -98,9 +98,9 @@ int c4iw_debug; module_param(c4iw_debug, int, 0644); MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)"); -static int peer2peer; +static int peer2peer = 1; module_param(peer2peer, int, 0644); -MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); +MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)"); static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; module_param(p2p_type, int, 0644); -- cgit v1.2.3 From eda6d1d1b7932f90d55583f8f3835dd7d6b32543 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 19 Mar 2014 17:44:45 +0530 Subject: RDMA/cxgb4: Save the correct map length for fast_reg_page_lists We cannot save the mapped length using the rdma max_page_list_len field of the ib_fast_reg_page_list struct because the core code uses it. This results in an incorrect unmap of the page list in c4iw_free_fastreg_pbl(). I found this with dma mapping debugging enabled in the kernel. The fix is to save the length in the c4iw_fr_page_list struct. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + drivers/infiniband/hw/cxgb4/mem.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index b810d2a89d05..a1e8f1333b79 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -369,6 +369,7 @@ struct c4iw_fr_page_list { DEFINE_DMA_UNMAP_ADDR(mapping); dma_addr_t dma_addr; struct c4iw_dev *dev; + int pll_len; }; static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list( diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 41b11951a30a..22a2e3e4540f 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -903,7 +903,11 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device, dma_unmap_addr_set(c4pl, mapping, dma_addr); c4pl->dma_addr = dma_addr; c4pl->dev = dev; - c4pl->ibpl.max_page_list_len = pll_len; + c4pl->pll_len = pll_len; + + PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", + __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, + &c4pl->dma_addr); return &c4pl->ibpl; } @@ -912,8 +916,12 @@ void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl) { struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl); + PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", + __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, + &c4pl->dma_addr); + dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, - c4pl->ibpl.max_page_list_len, + c4pl->pll_len, c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping)); kfree(c4pl); } -- cgit v1.2.3 From 49c0e2414b20d868cf006addf14152570aef2605 Mon Sep 17 00:00:00 2001 From: CQ Tang Date: Thu, 30 Jan 2014 17:36:00 -0500 Subject: IB/qib: Change SDMA progression mode depending on single- or multi-rail Improve performance by changing the behavour of the driver when all SDMA descriptors are in use, and the processes adding new descriptors are single- or multi-rail. For single-rail processes, the driver will block the call and finish posting all SDMA descriptors onto the hardware queue before returning back to PSM. Repeated kernel calls are slower than blocking. For multi-rail processes, the driver will return to PSM as quick as possible so PSM can feed packets to other rail. If all hardware queues are full, PSM will buffer the remaining SDMA descriptors until notified by interrupt that space is available. This patch builds a red-black tree to track the number rails opened by a particular PID. If the number is more than one, it is a multi-rail PSM process, otherwise, it is a single-rail process. Reviewed-by: Dean Luick Reviewed-by: John A Gregor Reviewed-by: Mitko Haralanov Signed-off-by: CQ Tang Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_user_sdma.c | 136 +++++++++++++++++++++++++++--- 1 file changed, 123 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 165aee2ca8a0..d2806cae234c 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -52,6 +52,17 @@ /* attempt to drain the queue for 5secs */ #define QIB_USER_SDMA_DRAIN_TIMEOUT 500 +/* + * track how many times a process open this driver. + */ +static struct rb_root qib_user_sdma_rb_root = RB_ROOT; + +struct qib_user_sdma_rb_node { + struct rb_node node; + int refcount; + pid_t pid; +}; + struct qib_user_sdma_pkt { struct list_head list; /* list element */ @@ -120,15 +131,60 @@ struct qib_user_sdma_queue { /* dma page table */ struct rb_root dma_pages_root; + struct qib_user_sdma_rb_node *sdma_rb_node; + /* protect everything above... */ struct mutex lock; }; +static struct qib_user_sdma_rb_node * +qib_user_sdma_rb_search(struct rb_root *root, pid_t pid) +{ + struct qib_user_sdma_rb_node *sdma_rb_node; + struct rb_node *node = root->rb_node; + + while (node) { + sdma_rb_node = container_of(node, + struct qib_user_sdma_rb_node, node); + if (pid < sdma_rb_node->pid) + node = node->rb_left; + else if (pid > sdma_rb_node->pid) + node = node->rb_right; + else + return sdma_rb_node; + } + return NULL; +} + +static int +qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new) +{ + struct rb_node **node = &(root->rb_node); + struct rb_node *parent = NULL; + struct qib_user_sdma_rb_node *got; + + while (*node) { + got = container_of(*node, struct qib_user_sdma_rb_node, node); + parent = *node; + if (new->pid < got->pid) + node = &((*node)->rb_left); + else if (new->pid > got->pid) + node = &((*node)->rb_right); + else + return 0; + } + + rb_link_node(&new->node, parent, node); + rb_insert_color(&new->node, root); + return 1; +} + struct qib_user_sdma_queue * qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) { struct qib_user_sdma_queue *pq = kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL); + struct qib_user_sdma_rb_node *sdma_rb_node; if (!pq) goto done; @@ -138,6 +194,7 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) pq->num_pending = 0; pq->num_sending = 0; pq->added = 0; + pq->sdma_rb_node = NULL; INIT_LIST_HEAD(&pq->sent); spin_lock_init(&pq->sent_lock); @@ -163,8 +220,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) pq->dma_pages_root = RB_ROOT; + sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root, + current->pid); + if (sdma_rb_node) { + sdma_rb_node->refcount++; + } else { + int ret; + sdma_rb_node = kmalloc(sizeof( + struct qib_user_sdma_rb_node), GFP_KERNEL); + if (!sdma_rb_node) + goto err_rb; + + sdma_rb_node->refcount = 1; + sdma_rb_node->pid = current->pid; + + ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, + sdma_rb_node); + BUG_ON(ret == 0); + } + pq->sdma_rb_node = sdma_rb_node; + goto done; +err_rb: + dma_pool_destroy(pq->header_cache); err_slab: kmem_cache_destroy(pq->pkt_slab); err_kfree: @@ -1020,8 +1099,13 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq) if (!pq) return; - kmem_cache_destroy(pq->pkt_slab); + pq->sdma_rb_node->refcount--; + if (pq->sdma_rb_node->refcount == 0) { + rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root); + kfree(pq->sdma_rb_node); + } dma_pool_destroy(pq->header_cache); + kmem_cache_destroy(pq->pkt_slab); kfree(pq); } @@ -1241,26 +1325,52 @@ static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, struct qib_user_sdma_queue *pq, struct list_head *pktlist, int count) { - int ret = 0; unsigned long flags; if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) return -ECOMM; - spin_lock_irqsave(&ppd->sdma_lock, flags); - - if (unlikely(!__qib_sdma_running(ppd))) { - ret = -ECOMM; - goto unlock; + /* non-blocking mode */ + if (pq->sdma_rb_node->refcount > 1) { + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (unlikely(!__qib_sdma_running(ppd))) { + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return -ECOMM; + } + pq->num_pending += count; + list_splice_tail_init(pktlist, &ppd->sdma_userpending); + qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return 0; } + /* In this case, descriptors from this process are not + * linked to ppd pending queue, interrupt handler + * won't update this process, it is OK to directly + * modify without sdma lock. + */ + + pq->num_pending += count; - list_splice_tail_init(pktlist, &ppd->sdma_userpending); - qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + /* + * Blocking mode for single rail process, we must + * release/regain sdma_lock to give other process + * chance to make progress. This is important for + * performance. + */ + do { + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (unlikely(!__qib_sdma_running(ppd))) { + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return -ECOMM; + } + qib_user_sdma_send_desc(ppd, pktlist); + if (!list_empty(pktlist)) + qib_sdma_make_progress(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } while (!list_empty(pktlist)); -unlock: - spin_unlock_irqrestore(&ppd->sdma_lock, flags); - return ret; + return 0; } int qib_user_sdma_writev(struct qib_ctxtdata *rcd, @@ -1290,7 +1400,7 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd, qib_user_sdma_queue_clean(ppd, pq); while (dim) { - int mxp = 8; + int mxp = 1; int ndesc = 0; ret = qib_user_sdma_queue_pkts(dd, ppd, pq, -- cgit v1.2.3 From 186f8ba062f796221d51077342f3ba5202838e9f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 30 Jan 2014 15:12:31 +0300 Subject: IB/qib: Cleanup qib_register_observer() Returning directly is easier to read than do-nothing gotos. Remove the duplicative check on "olp" and pull the code in one indent level. Signed-off-by: Dan Carpenter Acked-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_diag.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index 07f9030a8f10..5dfda4c5cc9c 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -694,28 +694,23 @@ int qib_register_observer(struct qib_devdata *dd, const struct diag_observer *op) { struct diag_observer_list_elt *olp; - int ret = -EINVAL; + unsigned long flags; if (!dd || !op) - goto bail; - ret = -ENOMEM; + return -EINVAL; olp = vmalloc(sizeof *olp); if (!olp) { pr_err("vmalloc for observer failed\n"); - goto bail; + return -ENOMEM; } - if (olp) { - unsigned long flags; - spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); - olp->op = op; - olp->next = dd->diag_observer_list; - dd->diag_observer_list = olp; - spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); - ret = 0; - } -bail: - return ret; + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + olp->op = op; + olp->next = dd->diag_observer_list; + dd->diag_observer_list = olp; + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); + + return 0; } /* Remove all registered observers when device is closed */ -- cgit v1.2.3 From 6751360514b604f2d69d00e64b5de0007e01e36e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Mar 2014 13:51:19 +0100 Subject: scsi_transport_srp: Fix two kernel-doc warnings This patch fixes the following two kernel-doc warnings: Warning(drivers/scsi/scsi_transport_srp.c:819): No description found for parameter 'rport' Warning(include/scsi/scsi_transport_srp.h:75): Excess struct/union/enum/typedef member 'deleted' description in 'srp_rport' Signed-off-by: Bart Van Assche Reported-by: Masanari Iida Acked-by: Sebastian Riemer Signed-off-by: Roland Dreier --- drivers/scsi/scsi_transport_srp.c | 1 + include/scsi/scsi_transport_srp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index d47ffc8d3e43..13e898332e45 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -810,6 +810,7 @@ EXPORT_SYMBOL_GPL(srp_remove_host); /** * srp_stop_rport_timers - stop the transport layer recovery timers + * @rport: SRP remote port for which to stop the timers. * * Must be called after srp_remove_host() and scsi_remove_host(). The caller * must hold a reference on the rport (rport->dev) and on the SCSI host diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index b11da5c1331e..cdb05dd1d440 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -41,7 +41,6 @@ enum srp_rport_state { * @mutex: Protects against concurrent rport reconnect / * fast_io_fail / dev_loss_tmo activity. * @state: rport state. - * @deleted: Whether or not srp_rport_del() has already been invoked. * @reconnect_delay: Reconnect delay in seconds. * @failed_reconnects: Number of failed reconnect attempts. * @reconnect_work: Work structure used for scheduling reconnect attempts. -- cgit v1.2.3 From 2088ca66f5e986c06dc1deb4e2f004218b652300 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 14 Mar 2014 13:51:58 +0100 Subject: IB/srp: Check ib_query_gid return value Detected by Coverity. Signed-off-by: Sagi Grimberg Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 529b6bcdca7a..890322637e2e 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2649,7 +2649,9 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err_free_mem; - ib_query_gid(ibdev, host->port, 0, &target->path.sgid); + ret = ib_query_gid(ibdev, host->port, 0, &target->path.sgid); + if (ret) + goto err_free_mem; shost_printk(KERN_DEBUG, target->scsi_host, PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x " -- cgit v1.2.3 From e7ffde0164b1a580f66ea3b95081626107931d3c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Mar 2014 13:52:21 +0100 Subject: IB/srp: Add more logging Log sgid and dgid when reporting that a login has been rejected or when a host has been added. This makes it easy to figure out which initiator and target ports these messages apply to. Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 890322637e2e..2ec9c05814f6 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1804,8 +1804,10 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, shost_printk(KERN_WARNING, shost, PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); else - shost_printk(KERN_WARNING, shost, - PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason); + shost_printk(KERN_WARNING, shost, PFX + "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n", + target->path.sgid.raw, + target->orig_dgid, reason); } else shost_printk(KERN_WARNING, shost, " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," @@ -2653,15 +2655,6 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err_free_mem; - shost_printk(KERN_DEBUG, target->scsi_host, PFX - "new target: id_ext %016llx ioc_guid %016llx pkey %04x " - "service_id %016llx dgid %pI6\n", - (unsigned long long) be64_to_cpu(target->id_ext), - (unsigned long long) be64_to_cpu(target->ioc_guid), - be16_to_cpu(target->path.pkey), - (unsigned long long) be64_to_cpu(target->service_id), - target->path.dgid.raw); - ret = srp_create_target_ib(target); if (ret) goto err_free_mem; @@ -2681,6 +2674,14 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err_disconnect; + shost_printk(KERN_DEBUG, target->scsi_host, PFX + "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", + be64_to_cpu(target->id_ext), + be64_to_cpu(target->ioc_guid), + be16_to_cpu(target->path.pkey), + be64_to_cpu(target->service_id), + target->path.sgid.raw, target->path.dgid.raw); + return count; err_disconnect: -- cgit v1.2.3 From 2d7091bcf6f893a9b1a2add357c637055eae4e68 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Mar 2014 13:52:45 +0100 Subject: IB/srp: Avoid duplicate connections The connection uniqueness check is performed before a new connection is added to the target list. This patch protects both actions by a mutex such that simultaneous writes from two different threads into the "add_target" variable do not result in duplicate connections. Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 12 +++++++++--- drivers/infiniband/ulp/srp/ib_srp.h | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 2ec9c05814f6..3294f10316a0 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2614,6 +2614,8 @@ static ssize_t srp_create_target(struct device *dev, target->tl_retry_count = 7; target->queue_size = SRP_DEFAULT_QUEUE_SIZE; + mutex_lock(&host->add_target_mutex); + ret = srp_parse_options(buf, target); if (ret) goto err; @@ -2682,7 +2684,11 @@ static ssize_t srp_create_target(struct device *dev, be64_to_cpu(target->service_id), target->path.sgid.raw, target->path.dgid.raw); - return count; + ret = count; + +out: + mutex_unlock(&host->add_target_mutex); + return ret; err_disconnect: srp_disconnect_target(target); @@ -2698,8 +2704,7 @@ err_free_mem: err: scsi_host_put(target_host); - - return ret; + goto out; } static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); @@ -2735,6 +2740,7 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port) INIT_LIST_HEAD(&host->target_list); spin_lock_init(&host->target_lock); init_completion(&host->released); + mutex_init(&host->add_target_mutex); host->srp_dev = device; host->port = port; diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 575681063f38..aad27b7b4a46 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -105,6 +105,7 @@ struct srp_host { spinlock_t target_lock; struct completion released; struct list_head list; + struct mutex add_target_mutex; }; struct srp_request { -- cgit v1.2.3 From a702adceec3eeee23d95a81a0663661e7c25ad9c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Mar 2014 13:53:10 +0100 Subject: IB/srp: Make writing into the "add_target" sysfs attribute interruptible Avoid that stopping srp_daemon takes unusually long due to a cable pull by making writing into the "add_target" sysfs attribute interruptible. Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 3294f10316a0..481c873f735e 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -411,6 +411,8 @@ static void srp_path_rec_completion(int status, static int srp_lookup_path(struct srp_target_port *target) { + int ret; + target->path.numb_path = 1; init_completion(&target->done); @@ -431,7 +433,9 @@ static int srp_lookup_path(struct srp_target_port *target) if (target->path_query_id < 0) return target->path_query_id; - wait_for_completion(&target->done); + ret = wait_for_completion_interruptible(&target->done); + if (ret < 0) + return ret; if (target->status < 0) shost_printk(KERN_WARNING, target->scsi_host, @@ -710,7 +714,9 @@ static int srp_connect_target(struct srp_target_port *target) ret = srp_send_req(target); if (ret) return ret; - wait_for_completion(&target->done); + ret = wait_for_completion_interruptible(&target->done); + if (ret < 0) + return ret; /* * The CM event handling code will set status to -- cgit v1.2.3 From ac72d766a8e44d782bd5480fc953ab6025c82e92 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Mar 2014 13:53:40 +0100 Subject: IB/srp: Avoid that writing into "add_target" hangs due to a cable pull If a cable is pulled while srp_connect_target() is in progress that can result in that function never to return. That makes the process, e.g. srp_daemon, that invoked this function unkillable. Avoid this by letting srp_connect_target() finish if the event IB_CM_TIMEWAIT_EXIT is received. This patch fixes a hang with the following call trace: [] schedule_timeout+0x215/0x2e0 [] wait_for_common+0x123/0x180 [] wait_for_completion+0x1d/0x20 [] srp_connect_target+0x1dc/0x410 [ib_srp] [] srp_create_target+0xba9/0xe70 [ib_srp] [] dev_attr_store+0x20/0x30 [] sysfs_write_file+0xe5/0x170 [] vfs_write+0xb8/0x1a0 [] sys_write+0x51/0x90 [] system_call_fastpath+0x16/0x1b Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 481c873f735e..a64e469db116 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1871,6 +1871,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) case IB_CM_TIMEWAIT_EXIT: shost_printk(KERN_ERR, target->scsi_host, PFX "connection closed\n"); + comp = 1; target->status = 0; break; -- cgit v1.2.3 From b3fe628da2898cf1387105026dfed551ecc25de5 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Mar 2014 13:54:11 +0100 Subject: IB/srp: Fix a race condition between failing I/O and I/O completion Avoid that srp_terminate_io() can access req->scmnd after it has been cleared by the I/O completion code. Do this by protecting req->scmnd accesses from srp_terminate_io() via locking Signed-off-by: Bart Van Assche Acked-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index a64e469db116..66a908bf3fb9 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -783,6 +783,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd, * srp_claim_req - Take ownership of the scmnd associated with a request. * @target: SRP target port. * @req: SRP request. + * @sdev: If not NULL, only take ownership for this SCSI device. * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take * ownership of @req->scmnd if it equals @scmnd. * @@ -791,16 +792,17 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd, */ static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target, struct srp_request *req, + struct scsi_device *sdev, struct scsi_cmnd *scmnd) { unsigned long flags; spin_lock_irqsave(&target->lock, flags); - if (!scmnd) { + if (req->scmnd && + (!sdev || req->scmnd->device == sdev) && + (!scmnd || req->scmnd == scmnd)) { scmnd = req->scmnd; req->scmnd = NULL; - } else if (req->scmnd == scmnd) { - req->scmnd = NULL; } else { scmnd = NULL; } @@ -827,9 +829,10 @@ static void srp_free_req(struct srp_target_port *target, } static void srp_finish_req(struct srp_target_port *target, - struct srp_request *req, int result) + struct srp_request *req, struct scsi_device *sdev, + int result) { - struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL); + struct scsi_cmnd *scmnd = srp_claim_req(target, req, sdev, NULL); if (scmnd) { srp_free_req(target, req, scmnd, 0); @@ -841,11 +844,20 @@ static void srp_finish_req(struct srp_target_port *target, static void srp_terminate_io(struct srp_rport *rport) { struct srp_target_port *target = rport->lld_data; + struct Scsi_Host *shost = target->scsi_host; + struct scsi_device *sdev; int i; + /* + * Invoking srp_terminate_io() while srp_queuecommand() is running + * is not safe. Hence the warning statement below. + */ + shost_for_each_device(sdev, shost) + WARN_ON_ONCE(sdev->request_queue->request_fn_active); + for (i = 0; i < target->req_ring_size; ++i) { struct srp_request *req = &target->req_ring[i]; - srp_finish_req(target, req, DID_TRANSPORT_FAILFAST << 16); + srp_finish_req(target, req, NULL, DID_TRANSPORT_FAILFAST << 16); } } @@ -882,7 +894,7 @@ static int srp_rport_reconnect(struct srp_rport *rport) for (i = 0; i < target->req_ring_size; ++i) { struct srp_request *req = &target->req_ring[i]; - srp_finish_req(target, req, DID_RESET << 16); + srp_finish_req(target, req, NULL, DID_RESET << 16); } INIT_LIST_HEAD(&target->free_tx); @@ -1290,7 +1302,7 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) complete(&target->tsk_mgmt_done); } else { req = &target->req_ring[rsp->tag]; - scmnd = srp_claim_req(target, req, NULL); + scmnd = srp_claim_req(target, req, NULL, NULL); if (!scmnd) { shost_printk(KERN_ERR, target->scsi_host, "Null scmnd for RSP w/tag %016llx\n", @@ -2008,7 +2020,7 @@ static int srp_abort(struct scsi_cmnd *scmnd) shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); - if (!req || !srp_claim_req(target, req, scmnd)) + if (!req || !srp_claim_req(target, req, NULL, scmnd)) return SUCCESS; if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, SRP_TSK_ABORT_TASK) == 0) @@ -2039,8 +2051,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) for (i = 0; i < target->req_ring_size; ++i) { struct srp_request *req = &target->req_ring[i]; - if (req->scmnd && req->scmnd->device == scmnd->device) - srp_finish_req(target, req, DID_RESET << 16); + srp_finish_req(target, req, scmnd->device, DID_RESET << 16); } return SUCCESS; -- cgit v1.2.3 From dbb084cc5f52152f53b5fd22fa76b9bf69904594 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:30 +0530 Subject: RDMA/cxgb4: Don't leak skb in c4iw_uld_rx_handler() Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/device.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 4a033853312e..982f81586f90 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -897,11 +897,13 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, } opcode = *(u8 *)rsp; - if (c4iw_handlers[opcode]) + if (c4iw_handlers[opcode]) { c4iw_handlers[opcode](dev, skb); - else + } else { pr_info("%s no handler opcode 0x%x...\n", __func__, opcode); + kfree_skb(skb); + } return 0; nomem: -- cgit v1.2.3 From 1ce1d471acb7ad8e8b8e3a2972de9fbb5f2be79a Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:31 +0530 Subject: RDMA/cxgb4: Fix possible memory leak in RX_PKT processing If cxgb4_ofld_send() returns < 0, then send_fw_pass_open_req() must free the request skb and the saved skb with the tcp header. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index e1fc5c5445c9..773d010a249c 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3204,6 +3204,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, struct sk_buff *req_skb; struct fw_ofld_connection_wr *req; struct cpl_pass_accept_req *cpl = cplhdr(skb); + int ret; req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL); req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req)); @@ -3240,7 +3241,13 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, req->cookie = (unsigned long)skb; set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); - cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); + ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); + if (ret < 0) { + pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__, + ret); + kfree_skb(skb); + kfree_skb(req_skb); + } } /* -- cgit v1.2.3 From 70b9c66053ecadde421658b8ec808c981f2eef11 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:32 +0530 Subject: RDMA/cxgb4: Ignore read reponse type 1 CQEs These are generated by HW in some error cases and need to be silently discarded. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cq.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index d6a7db2c42f6..ce468e542428 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -365,8 +365,14 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp) if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { - /* - * drop peer2peer RTR reads. + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) + goto next_cqe; + + /* drop peer2peer RTR reads. */ if (CQE_WRID_STAG(hw_cqe) == 1) goto next_cqe; @@ -511,8 +517,18 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, */ if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { - /* - * If this is an unsolicited read response, then the read + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) { + if (CQE_STATUS(hw_cqe)) + t4_set_wq_in_error(wq); + ret = -EAGAIN; + goto skip_cqe; + } + + /* If this is an unsolicited read response, then the read * was generated by the kernel driver as part of peer-2-peer * connection setup. So ignore the completion. */ -- cgit v1.2.3 From be13b2dff8c4e41846477b22cc5c164ea5a6ac2e Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:33 +0530 Subject: RDMA/cxgb4: Connect_request_upcall fixes When processing an MPA Start Request, if the listening endpoint is DEAD, then abort the connection. If the IWCM returns an error, then we must abort the connection and release resources. Also abort_connection() should not post a CLOSE event, so clean that up too. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 773d010a249c..6bfef31f6b1e 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -968,13 +968,14 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } -static void close_complete_upcall(struct c4iw_ep *ep) +static void close_complete_upcall(struct c4iw_ep *ep, int status) { struct iw_cm_event event; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CLOSE; + event.status = status; if (ep->com.cm_id) { PDBG("close complete delivered ep %p cm_id %p tid %u\n", ep, ep->com.cm_id, ep->hwtid); @@ -988,7 +989,6 @@ static void close_complete_upcall(struct c4iw_ep *ep) static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - close_complete_upcall(ep); state_set(&ep->com, ABORTING); set_bit(ABORT_CONN, &ep->com.history); return send_abort(ep, skb, gfp); @@ -1067,9 +1067,10 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) } } -static void connect_request_upcall(struct c4iw_ep *ep) +static int connect_request_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; + int ret; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); @@ -1094,15 +1095,14 @@ static void connect_request_upcall(struct c4iw_ep *ep) event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); } - if (state_read(&ep->parent_ep->com) != DEAD) { - c4iw_get_ep(&ep->com); - ep->parent_ep->com.cm_id->event_handler( - ep->parent_ep->com.cm_id, - &event); - } + c4iw_get_ep(&ep->com); + ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, + &event); + if (ret) + c4iw_put_ep(&ep->com); set_bit(CONNREQ_UPCALL, &ep->com.history); c4iw_put_ep(&ep->parent_ep->com); - ep->parent_ep = NULL; + return ret; } static void established_upcall(struct c4iw_ep *ep) @@ -1401,7 +1401,6 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) return; PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); - stop_ep_timer(ep); mpa = (struct mpa_message *) ep->mpa_pkt; /* @@ -1494,9 +1493,17 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) ep->mpa_attr.p2p_type); state_set(&ep->com, MPA_REQ_RCVD); + stop_ep_timer(ep); /* drive upcall */ - connect_request_upcall(ep); + mutex_lock(&ep->parent_ep->com.mutex); + if (ep->parent_ep->com.state != DEAD) { + if (connect_request_upcall(ep)) + abort_connection(ep, skb, GFP_KERNEL); + } else { + abort_connection(ep, skb, GFP_KERNEL); + } + mutex_unlock(&ep->parent_ep->com.mutex); return; } @@ -2247,7 +2254,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } - close_complete_upcall(ep); + close_complete_upcall(ep, 0); __state_set(&ep->com, DEAD); release = 1; disconnect = 0; @@ -2426,7 +2433,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } - close_complete_upcall(ep); + close_complete_upcall(ep, 0); __state_set(&ep->com, DEAD); release = 1; break; @@ -2981,7 +2988,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) rdev = &ep->com.dev->rdev; if (c4iw_fatal_error(rdev)) { fatal = 1; - close_complete_upcall(ep); + close_complete_upcall(ep, -EIO); ep->com.state = DEAD; } switch (ep->com.state) { @@ -3023,7 +3030,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (close) { if (abrupt) { set_bit(EP_DISC_ABORT, &ep->com.history); - close_complete_upcall(ep); + close_complete_upcall(ep, -ECONNRESET); ret = send_abort(ep, NULL, gfp); } else { set_bit(EP_DISC_CLOSE, &ep->com.history); @@ -3435,6 +3442,7 @@ static void process_timeout(struct c4iw_ep *ep) &attrs, 1); } __state_set(&ep->com, ABORTING); + close_complete_upcall(ep, -ETIMEDOUT); break; default: WARN(1, "%s unexpected state ep %p tid %u state %u\n", -- cgit v1.2.3 From 9c88aa003d26e9f1e9ea6e08511768c2ef666654 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:34 +0530 Subject: RDMA/cxgb4: Update snd_seq when sending MPA messages Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 6bfef31f6b1e..a1bc41d04620 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -762,6 +762,7 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, start_ep_timer(ep); state_set(&ep->com, MPA_REQ_SENT); ep->mpa_attr.initiator = 1; + ep->snd_seq += mpalen; return; } @@ -841,6 +842,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) t4_set_arp_err_handler(skb, NULL, arp_failure_discard); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; + ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -925,6 +927,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) t4_set_arp_err_handler(skb, NULL, arp_failure_discard); ep->mpa_skb = skb; state_set(&ep->com, MPA_REP_SENT); + ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } -- cgit v1.2.3 From 08e74c4b00c30c232d535ff368554959403d0432 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 10 Mar 2014 23:06:26 +0100 Subject: IB/mthca: Return an error on ib_copy_to_udata() failure In case of error when writing to userspace, the function mthca_create_cq() does not set an error code before following its error path. This patch sets the error code to -EFAULT when ib_copy_to_udata() fails. This was caught when using spatch (aka. coccinelle) to rewrite call to ib_copy_{from,to}_udata(). Link: https://www.gitorious.org/opteya/coccib/source/75ebf2c1033c64c1d81df13e4ae44ee99c989eba:ib_copy_udata.cocci Link: http://marc.info/?i=cover.1394485254.git.ydroneaud@opteya.com Cc: Signed-off-by: Yann Droneaud Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_provider.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 5b71d43bd89c..42dde06fdb91 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -695,6 +695,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) { mthca_free_cq(to_mdev(ibdev), cq); + err = -EFAULT; goto err_free; } -- cgit v1.2.3 From 5bdb0f02add5994b0bc17494f4726925ca5d6ba1 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 10 Mar 2014 23:06:25 +0100 Subject: IB/ehca: Returns an error on ib_copy_to_udata() failure In case of error when writing to userspace, function ehca_create_cq() does not set an error code before following its error path. This patch sets the error code to -EFAULT when ib_copy_to_udata() fails. This was caught when using spatch (aka. coccinelle) to rewrite call to ib_copy_{from,to}_udata(). Link: https://www.gitorious.org/opteya/coccib/source/75ebf2c1033c64c1d81df13e4ae44ee99c989eba:ib_copy_udata.cocci Link: http://marc.info/?i=cover.1394485254.git.ydroneaud@opteya.com Cc: Signed-off-by: Yann Droneaud Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_cq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 212150c25ea0..8cc837537768 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -283,6 +283,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1)); if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { ehca_err(device, "Copy to udata failed."); + cq = ERR_PTR(-EFAULT); goto create_cq_exit4; } } -- cgit v1.2.3 From 2c34e68f426151bc6d16de6a187678f6693c0770 Mon Sep 17 00:00:00 2001 From: Yan Burman Date: Tue, 11 Mar 2014 14:41:47 +0200 Subject: IB/mad: Check and handle potential DMA mapping errors Running with DMA_API_DEBUG enabled and not checking for DMA mapping errors triggers a kernel stack trace with "DMA-API: device driver failed to check map error" message. Add these checks to the MAD module, both to be be more robust and also eliminate these false-positive stack traces. Signed-off-by: Yan Burman Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/mad.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 4c837e66516b..ab31f136d04b 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1022,12 +1022,21 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->send_buf.mad, sge[0].length, DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr))) + return -ENOMEM; + mad_send_wr->header_mapping = sge[0].addr; sge[1].addr = ib_dma_map_single(mad_agent->device, ib_get_payload(mad_send_wr), sge[1].length, DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) { + ib_dma_unmap_single(mad_agent->device, + mad_send_wr->header_mapping, + sge[0].length, DMA_TO_DEVICE); + return -ENOMEM; + } mad_send_wr->payload_mapping = sge[1].addr; spin_lock_irqsave(&qp_info->send_queue.lock, flags); @@ -2590,6 +2599,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, sizeof *mad_priv - sizeof mad_priv->header, DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, + sg_list.addr))) { + ret = -ENOMEM; + break; + } mad_priv->header.mapping = sg_list.addr; recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list; mad_priv->header.mad_list.mad_queue = recv_queue; -- cgit v1.2.3 From 3839d8ac1bac55922f6632e36040fdc0de374bba Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Mar 2014 11:21:39 +0300 Subject: mlx4_core: Fix some indenting in mlx4_ib_add() The code was indented too far and also kernel style says we should have curly braces. Signed-off-by: Dan Carpenter Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e81c5547e647..6eb0d442a805 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2056,8 +2056,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]); if (err) ibdev->counters[i] = -1; - } else - ibdev->counters[i] = -1; + } else { + ibdev->counters[i] = -1; + } } mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) -- cgit v1.2.3 From 4661bd798f1b58ee2755bfa04003638345802680 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Mar 2014 11:23:07 +0300 Subject: mlx4_core: Make buffer larger to avoid overflow warning My static checker complains that the sprintf() here can overflow. drivers/infiniband/hw/mlx4/main.c:1836 mlx4_ib_alloc_eqs() error: format string overflow. buf_size: 32 length: 69 This seems like a valid complaint. The "dev->pdev->bus->name" string can be 48 characters long. I just made the buffer 80 characters instead of 69 and I changed the sprintf() to snprintf(). Signed-off-by: Dan Carpenter Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 6eb0d442a805..20b4d7a2d3d7 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1803,7 +1803,7 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev) static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { - char name[32]; + char name[80]; int eq_per_port = 0; int added_eqs = 0; int total_eqs = 0; @@ -1833,8 +1833,8 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) eq = 0; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { for (j = 0; j < eq_per_port; j++) { - sprintf(name, "mlx4-ib-%d-%d@%s", - i, j, dev->pdev->bus->name); + snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s", + i, j, dev->pdev->bus->name); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, NULL, &ibdev->eq_table[eq])) { -- cgit v1.2.3 From 39c978cd1704dc03f6913bd40f858def7fd31185 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 28 Mar 2014 14:03:40 -0700 Subject: IB/iser: Fix sector_t format warning Fix pr_err (printk) format warning: drivers/infiniband/ulp/iser/iser_verbs.c:1181:4: warning: format '%lx' expects argument of type 'long unsigned int', but argument 3 has type 'sector_t' [-Wformat] Signed-off-by: Randy Dunlap Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_verbs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index abbb6ec90d14..d2848e4e4274 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1178,9 +1178,10 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, do_div(sector_off, sector_size + 8); *sector = scsi_get_lba(iser_task->sc) + sector_off; - pr_err("PI error found type %d at sector %lx " + pr_err("PI error found type %d at sector %llx " "expected %x vs actual %x\n", - mr_status.sig_err.err_type, *sector, + mr_status.sig_err.err_type, + (unsigned long long)*sector, mr_status.sig_err.expected, mr_status.sig_err.actual); -- cgit v1.2.3 From 1d6c2b736fd251c1f827704b1234d81b28c112d6 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 1 Apr 2014 16:28:38 +0300 Subject: IB/iser: Drain the tx cq once before looping on the rx cq The iser disconnection flow isn't done before all the inflight recv/send buffers posted to the QP are either flushed or normally completed to the CQ that serves this connection. The condition check is done in iser_handle_comp_error(). Currently, it's possible for the send buffer completion that makes the posted send buffers counter reach zero to be polled in the drain tx call, which is after the rx cq is fully drained. Since this completion might be not an error one (for example, it might be a completion of the logout request iSCSI PDU) we will skip iser_handle_comp_error(). So the connection will never terminate from the iscsi stack point of view, and we hang. To resolve this race, do the draining of the tx cq before the loop on the rx cq. Signed-off-by: Roi Dayan Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_verbs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index d2848e4e4274..89fadd85a776 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1112,8 +1112,12 @@ static void iser_cq_tasklet_fn(unsigned long data) struct iser_rx_desc *desc; unsigned long xfer_len; struct iser_conn *ib_conn; - int completed_tx, completed_rx; - completed_tx = completed_rx = 0; + int completed_tx, completed_rx = 0; + + /* First do tx drain, so in a case where we have rx flushes and a successful + * tx completion we will still go through completion error handling. + */ + completed_tx = iser_drain_tx_cq(device, cq_index); while (ib_poll_cq(cq, 1, &wc) == 1) { desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; @@ -1141,7 +1145,6 @@ static void iser_cq_tasklet_fn(unsigned long data) * " would not cause interrupts to be missed" */ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - completed_tx += iser_drain_tx_cq(device, cq_index); iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); } -- cgit v1.2.3 From 4667f5dfb0c36a86339652aca694736ebc590871 Mon Sep 17 00:00:00 2001 From: Ariel Nahum Date: Tue, 1 Apr 2014 16:28:39 +0300 Subject: IB/iser: Remove struct iscsi_iser_conn The iscsi stack has existing mechanisms to link back and forth between the iscsi connection and the iscsi transport (e.g iser/tcp) connection. This is done through a dd_data pointer field in struct iscsi_conn which can be set to point to the transport connection, etc. The iscsi_iser_conn structure was used to get this linking done in another way, which is uneeded and adds extra complication to the iser code, so we just remove it. Signed-off-by: Ariel Nahum Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 45 ++++++++++------------- drivers/infiniband/ulp/iser/iscsi_iser.h | 10 ++---- drivers/infiniband/ulp/iser/iser_initiator.c | 54 +++++++++++++--------------- drivers/infiniband/ulp/iser/iser_memory.c | 20 +++++------ drivers/infiniband/ulp/iser/iser_verbs.c | 9 +++-- 5 files changed, 59 insertions(+), 79 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index a0ec2d012f95..bfbbb2d1aff7 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -146,8 +146,8 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode) int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc) { - struct iscsi_iser_conn *iser_conn = task->conn->dd_data; - struct iser_device *device = iser_conn->ib_conn->device; + struct iser_conn *ib_conn = task->conn->dd_data; + struct iser_device *device = ib_conn->device; struct iscsi_iser_task *iser_task = task->dd_data; u64 dma_addr; @@ -161,7 +161,7 @@ int iser_initialize_task_headers(struct iscsi_task *task, tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; tx_desc->tx_sg[0].lkey = device->mr->lkey; - iser_task->iser_conn = iser_conn; + iser_task->ib_conn = ib_conn; return 0; } /** @@ -288,10 +288,9 @@ iscsi_iser_task_xmit(struct iscsi_task *task) static void iscsi_iser_cleanup_task(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_tx_desc *tx_desc = &iser_task->desc; - - struct iscsi_iser_conn *iser_conn = task->conn->dd_data; - struct iser_device *device = iser_conn->ib_conn->device; + struct iser_tx_desc *tx_desc = &iser_task->desc; + struct iser_conn *ib_conn = task->conn->dd_data; + struct iser_device *device = ib_conn->device; ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); @@ -323,9 +322,8 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) { struct iscsi_conn *conn; struct iscsi_cls_conn *cls_conn; - struct iscsi_iser_conn *iser_conn; - cls_conn = iscsi_conn_setup(cls_session, sizeof(*iser_conn), conn_idx); + cls_conn = iscsi_conn_setup(cls_session, 0, conn_idx); if (!cls_conn) return NULL; conn = cls_conn->dd_data; @@ -336,10 +334,6 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) */ conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN; - iser_conn = conn->dd_data; - conn->dd_data = iser_conn; - iser_conn->iscsi_conn = conn; - return cls_conn; } @@ -347,8 +341,7 @@ static void iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn) { struct iscsi_conn *conn = cls_conn->dd_data; - struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iser_conn *ib_conn = iser_conn->ib_conn; + struct iser_conn *ib_conn = conn->dd_data; iscsi_conn_teardown(cls_conn); /* @@ -357,7 +350,7 @@ iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn) * we free it here. */ if (ib_conn) { - ib_conn->iser_conn = NULL; + ib_conn->iscsi_conn = NULL; iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */ } } @@ -368,7 +361,6 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, int is_leading) { struct iscsi_conn *conn = cls_conn->dd_data; - struct iscsi_iser_conn *iser_conn; struct iscsi_session *session; struct iser_conn *ib_conn; struct iscsi_endpoint *ep; @@ -395,11 +387,11 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, /* binds the iSER connection retrieved from the previously * connected ep_handle to the iSCSI layer connection. exchanges * connection pointers */ - iser_info("binding iscsi/iser conn %p %p to ib_conn %p\n", - conn, conn->dd_data, ib_conn); - iser_conn = conn->dd_data; - ib_conn->iser_conn = iser_conn; - iser_conn->ib_conn = ib_conn; + iser_info("binding iscsi conn %p to ib_conn %p\n", conn, ib_conn); + + conn->dd_data = ib_conn; + ib_conn->iscsi_conn = conn; + iser_conn_get(ib_conn); /* ref iscsi/ib conn binding */ return 0; } @@ -408,8 +400,7 @@ static void iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) { struct iscsi_conn *conn = cls_conn->dd_data; - struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iser_conn *ib_conn = iser_conn->ib_conn; + struct iser_conn *ib_conn = conn->dd_data; /* * Userspace may have goofed up and not bound the connection or @@ -423,7 +414,7 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) */ iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */ } - iser_conn->ib_conn = NULL; + conn->dd_data = NULL; } static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) @@ -661,7 +652,7 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) struct iser_conn *ib_conn; ib_conn = ep->dd_data; - if (ib_conn->iser_conn) + if (ib_conn->iscsi_conn) /* * Must suspend xmit path if the ep is bound to the * iscsi_conn, so we know we are not accessing the ib_conn @@ -669,7 +660,7 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) * * This may not be bound if the ep poll failed. */ - iscsi_suspend_tx(ib_conn->iser_conn->iscsi_conn); + iscsi_suspend_tx(ib_conn->iscsi_conn); iser_info("ib conn %p state %d\n", ib_conn, ib_conn->state); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 95f291fca178..41fa5e9c1bcc 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -214,7 +214,6 @@ struct iser_data_buf { /* fwd declarations */ struct iser_device; struct iser_cq_desc; -struct iscsi_iser_conn; struct iscsi_iser_task; struct iscsi_endpoint; @@ -319,7 +318,7 @@ struct fast_reg_descriptor { }; struct iser_conn { - struct iscsi_iser_conn *iser_conn; /* iser conn for upcalls */ + struct iscsi_conn *iscsi_conn; struct iscsi_endpoint *ep; enum iser_ib_conn_state state; /* rdma connection state */ atomic_t refcount; @@ -358,14 +357,9 @@ struct iser_conn { }; }; -struct iscsi_iser_conn { - struct iscsi_conn *iscsi_conn;/* ptr to iscsi conn */ - struct iser_conn *ib_conn; /* iSER IB conn */ -}; - struct iscsi_iser_task { struct iser_tx_desc desc; - struct iscsi_iser_conn *iser_conn; + struct iser_conn *ib_conn; enum iser_task_status status; struct scsi_cmnd *sc; int command_sent; /* set if command sent */ diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 7fd95fe6d989..401909006aec 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -49,7 +49,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_device *device = iser_task->iser_conn->ib_conn->device; + struct iser_device *device = iser_task->ib_conn->device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -77,7 +77,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, iser_err("Total data length: %ld, less than EDTL: " "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", iser_task->data[ISER_DIR_IN].data_len, edtl, - task->itt, iser_task->iser_conn); + task->itt, iser_task->ib_conn); return -EINVAL; } @@ -110,7 +110,7 @@ iser_prepare_write_cmd(struct iscsi_task *task, unsigned int edtl) { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_device *device = iser_task->iser_conn->ib_conn->device; + struct iser_device *device = iser_task->ib_conn->device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -349,7 +349,7 @@ free_login_buf: static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) { - struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = conn->dd_data; struct iscsi_session *session = conn->session; iser_dbg("req op %x flags %x\n", req->opcode, req->flags); @@ -362,19 +362,18 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) * response) and no posted send buffers left - they must have been * consumed during previous login phases. */ - WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1); - WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); + WARN_ON(ib_conn->post_recv_buf_count != 1); + WARN_ON(atomic_read(&ib_conn->post_send_buf_count) != 0); if (session->discovery_sess) { iser_info("Discovery session, re-using login RX buffer\n"); return 0; } else iser_info("Normal session, posting batch of RX %d buffers\n", - iser_conn->ib_conn->min_posted_rx); + ib_conn->min_posted_rx); /* Initial post receive buffers */ - if (iser_post_recvm(iser_conn->ib_conn, - iser_conn->ib_conn->min_posted_rx)) + if (iser_post_recvm(ib_conn, ib_conn->min_posted_rx)) return -ENOMEM; return 0; @@ -386,7 +385,7 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) int iser_send_command(struct iscsi_conn *conn, struct iscsi_task *task) { - struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; unsigned long edtl; int err; @@ -399,7 +398,7 @@ int iser_send_command(struct iscsi_conn *conn, /* build the tx desc regd header and add it to the tx desc dto */ tx_desc->type = ISCSI_TX_SCSI_COMMAND; - iser_create_send_desc(iser_conn->ib_conn, tx_desc); + iser_create_send_desc(ib_conn, tx_desc); if (hdr->flags & ISCSI_FLAG_CMD_READ) { data_buf = &iser_task->data[ISER_DIR_IN]; @@ -438,7 +437,7 @@ int iser_send_command(struct iscsi_conn *conn, iser_task->status = ISER_TASK_STATUS_STARTED; - err = iser_post_send(iser_conn->ib_conn, tx_desc); + err = iser_post_send(ib_conn, tx_desc); if (!err) return 0; @@ -454,7 +453,7 @@ int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_task *task, struct iscsi_data *hdr) { - struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *tx_desc = NULL; struct iser_regd_buf *regd_buf; @@ -503,7 +502,7 @@ int iser_send_data_out(struct iscsi_conn *conn, itt, buf_offset, data_seg_len); - err = iser_post_send(iser_conn->ib_conn, tx_desc); + err = iser_post_send(ib_conn, tx_desc); if (!err) return 0; @@ -516,19 +515,18 @@ send_data_out_error: int iser_send_control(struct iscsi_conn *conn, struct iscsi_task *task) { - struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *mdesc = &iser_task->desc; unsigned long data_seg_len; int err = 0; struct iser_device *device; - struct iser_conn *ib_conn = iser_conn->ib_conn; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; - iser_create_send_desc(iser_conn->ib_conn, mdesc); + iser_create_send_desc(ib_conn, mdesc); - device = iser_conn->ib_conn->device; + device = ib_conn->device; data_seg_len = ntoh24(task->hdr->dlength); @@ -543,14 +541,13 @@ int iser_send_control(struct iscsi_conn *conn, ib_conn->login_req_dma, task->data_count, DMA_TO_DEVICE); - memcpy(iser_conn->ib_conn->login_req_buf, task->data, - task->data_count); + memcpy(ib_conn->login_req_buf, task->data, task->data_count); ib_dma_sync_single_for_device(device->ib_device, ib_conn->login_req_dma, task->data_count, DMA_TO_DEVICE); - tx_dsg->addr = iser_conn->ib_conn->login_req_dma; + tx_dsg->addr = ib_conn->login_req_dma; tx_dsg->length = task->data_count; tx_dsg->lkey = device->mr->lkey; mdesc->num_sge = 2; @@ -559,7 +556,7 @@ int iser_send_control(struct iscsi_conn *conn, if (task == conn->login_task) { iser_dbg("op %x dsl %lx, posting login rx buffer\n", task->hdr->opcode, data_seg_len); - err = iser_post_recvl(iser_conn->ib_conn); + err = iser_post_recvl(ib_conn); if (err) goto send_control_error; err = iser_post_rx_bufs(conn, task->hdr); @@ -567,7 +564,7 @@ int iser_send_control(struct iscsi_conn *conn, goto send_control_error; } - err = iser_post_send(iser_conn->ib_conn, mdesc); + err = iser_post_send(ib_conn, mdesc); if (!err) return 0; @@ -583,7 +580,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, unsigned long rx_xfer_len, struct iser_conn *ib_conn) { - struct iscsi_iser_conn *conn = ib_conn->iser_conn; struct iscsi_hdr *hdr; u64 rx_dma; int rx_buflen, outstanding, count, err; @@ -605,17 +601,17 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); - iscsi_iser_recv(conn->iscsi_conn, hdr, - rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN); + iscsi_iser_recv(ib_conn->iscsi_conn, hdr, rx_desc->data, + rx_xfer_len - ISER_HEADERS_LEN); ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, - rx_buflen, DMA_FROM_DEVICE); + rx_buflen, DMA_FROM_DEVICE); /* decrementing conn->post_recv_buf_count only --after-- freeing the * * task eliminates the need to worry on tasks which are completed in * * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ - conn->ib_conn->post_recv_buf_count--; + ib_conn->post_recv_buf_count--; if (rx_dma == ib_conn->login_resp_dma) return; @@ -676,7 +672,7 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { - struct iser_device *device = iser_task->iser_conn->ib_conn->device; + struct iser_device *device = iser_task->ib_conn->device; int is_rdma_data_aligned = 1; int is_rdma_prot_aligned = 1; int prot_count = scsi_prot_sg_count(iser_task->sc); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 0995565f5dda..bc3f70e44c95 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -49,7 +49,7 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, struct iser_data_buf *data_copy, enum iser_data_dir cmd_dir) { - struct ib_device *dev = iser_task->iser_conn->ib_conn->device->ib_device; + struct ib_device *dev = iser_task->ib_conn->device->ib_device; struct scatterlist *sgl = (struct scatterlist *)data->buf; struct scatterlist *sg; char *mem = NULL; @@ -116,7 +116,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, struct ib_device *dev; unsigned long cmd_data_len; - dev = iser_task->iser_conn->ib_conn->device->ib_device; + dev = iser_task->ib_conn->device->ib_device; ib_dma_unmap_sg(dev, &data_copy->sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? @@ -322,7 +322,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, struct ib_device *dev; iser_task->dir[iser_dir] = 1; - dev = iser_task->iser_conn->ib_conn->device->ib_device; + dev = iser_task->ib_conn->device->ib_device; data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir); if (data->dma_nents == 0) { @@ -337,7 +337,7 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, { struct ib_device *dev; - dev = iser_task->iser_conn->ib_conn->device->ib_device; + dev = iser_task->ib_conn->device->ib_device; ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE); } @@ -348,7 +348,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir, int aligned_len) { - struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; + struct iscsi_conn *iscsi_conn = iser_task->ib_conn->iscsi_conn; iscsi_conn->fmr_unalign_cnt++; iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", @@ -377,7 +377,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { - struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; + struct iser_conn *ib_conn = iser_task->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; struct iser_data_buf *mem = &iser_task->data[cmd_dir]; @@ -533,7 +533,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, struct fast_reg_descriptor *desc, struct ib_sge *data_sge, struct ib_sge *prot_sge, struct ib_sge *sig_sge) { - struct iser_conn *iser_conn = iser_task->iser_conn->ib_conn; + struct iser_conn *ib_conn = iser_task->ib_conn; struct iser_pi_context *pi_ctx = desc->pi_ctx; struct ib_send_wr sig_wr, inv_wr; struct ib_send_wr *bad_wr, *wr = NULL; @@ -579,7 +579,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, else wr->next = &sig_wr; - ret = ib_post_send(iser_conn->qp, wr, &bad_wr); + ret = ib_post_send(ib_conn->qp, wr, &bad_wr); if (ret) { iser_err("reg_sig_mr failed, ret:%d\n", ret); goto err; @@ -609,7 +609,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct ib_sge *sge) { struct fast_reg_descriptor *desc = regd_buf->reg.mem_h; - struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; + struct iser_conn *ib_conn = iser_task->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; struct ib_mr *mr; @@ -700,7 +700,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { - struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; + struct iser_conn *ib_conn = iser_task->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; struct iser_data_buf *mem = &iser_task->data[cmd_dir]; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 89fadd85a776..b3668a59ca8d 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -745,9 +745,8 @@ static int iser_disconnected_handler(struct rdma_cm_id *cma_id) * terminated asynchronously from the iSCSI layer's perspective. */ if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING)){ - if (ib_conn->iser_conn) - iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, - ISCSI_ERR_CONN_FAILED); + if (ib_conn->iscsi_conn) + iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); else iser_err("iscsi_iser connection isn't bound\n"); } @@ -951,7 +950,7 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; - struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; + struct iser_conn *ib_conn = iser_task->ib_conn; struct fast_reg_descriptor *desc = reg->mem_h; if (!reg->is_mr) @@ -1061,7 +1060,7 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc, * perspective. */ if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING)) - iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, + iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); /* no more non completed posts to the QP, complete the -- cgit v1.2.3 From 4f9208ad3f6625f707210c00c7e7f7a0f688d019 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 1 Apr 2014 16:28:40 +0300 Subject: IB/iser: Print QP information once connection is established Add an iser info print with the local/remote QP information carried out when the connection is established. While here, fix a little leftover from the T10 work and set a debug print to be carried in debug and not info level. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_verbs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index b3668a59ca8d..b6ff08e85084 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -340,8 +340,8 @@ iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, } desc->reg_indicators &= ~ISER_FASTREG_PROTECTED; - iser_info("Create fr_desc %p page_list %p\n", - desc, desc->data_frpl->page_list); + iser_dbg("Create fr_desc %p page_list %p\n", + desc, desc->data_frpl->page_list); return 0; sig_mr_failure: @@ -728,6 +728,11 @@ failure: static void iser_connected_handler(struct rdma_cm_id *cma_id) { struct iser_conn *ib_conn; + struct ib_qp_attr attr; + struct ib_qp_init_attr init_attr; + + (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); + iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num); ib_conn = (struct iser_conn *)cma_id->context; ib_conn->state = ISER_CONN_UP; -- cgit v1.2.3 From 3ee07d27ac10450ebf5769441fd313c722c03e4a Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 1 Apr 2014 16:28:41 +0300 Subject: IB/iser: Update Mellanox copyright note Update Mellanox copyrights for 2014 on the iser initiator driver. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 2 +- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 +- drivers/infiniband/ulp/iser/iser_initiator.c | 2 +- drivers/infiniband/ulp/iser/iser_memory.c | 2 +- drivers/infiniband/ulp/iser/iser_verbs.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index bfbbb2d1aff7..25f195ef44b0 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -5,7 +5,7 @@ * Copyright (C) 2004 Alex Aizman * Copyright (C) 2005 Mike Christie * Copyright (c) 2005, 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved. * maintained by openib-general@openib.org * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 41fa5e9c1bcc..a4e4876cf260 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -8,7 +8,7 @@ * * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 401909006aec..2e2d903db838 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index bc3f70e44c95..47acd3ad3a17 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index b6ff08e85084..32849f2becde 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU -- cgit v1.2.3 From 5de2ad986d7e3626e37a024d7ad69dfd6ab13306 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 1 Apr 2014 16:28:42 +0300 Subject: IB/iser: Bump driver version to 1.3 Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index a4e4876cf260..324129f80d40 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -69,7 +69,7 @@ #define DRV_NAME "iser" #define PFX DRV_NAME ": " -#define DRV_VER "1.1" +#define DRV_VER "1.3" #define iser_dbg(fmt, arg...) \ do { \ -- cgit v1.2.3 From 446bf432a9b084d9f3471eca309cc53fa434ccc7 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 28 Mar 2014 13:26:42 -0400 Subject: IB/qib: Remove ib_sg_dma_address() and ib_sg_dma_len() overloads Remove the overload for .dma_len and .dma_address The removal of these methods is compensated for by code changes to .map_sg to insure that the vanilla sg_dma_address() and sg_dma_len() will do the same thing as the equivalent former ib_sg_dma_address() and ib_sg_dma_len() calls into the drivers. Suggested-by: Bart Van Assche Reviewed-by: Dennis Dalessandro Tested-by: Vinod Kumar Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_dma.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c index 2920bb39a65b..59fe092b4b0f 100644 --- a/drivers/infiniband/hw/qib/qib_dma.c +++ b/drivers/infiniband/hw/qib/qib_dma.c @@ -108,6 +108,10 @@ static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl, ret = 0; break; } + sg->dma_address = addr + sg->offset; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif } return ret; } @@ -119,21 +123,6 @@ static void qib_unmap_sg(struct ib_device *dev, BUG_ON(!valid_dma_direction(direction)); } -static u64 qib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) -{ - u64 addr = (u64) page_address(sg_page(sg)); - - if (addr) - addr += sg->offset; - return addr; -} - -static unsigned int qib_sg_dma_len(struct ib_device *dev, - struct scatterlist *sg) -{ - return sg->length; -} - static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) { @@ -173,8 +162,6 @@ struct ib_dma_mapping_ops qib_dma_mapping_ops = { .unmap_page = qib_dma_unmap_page, .map_sg = qib_map_sg, .unmap_sg = qib_unmap_sg, - .dma_address = qib_sg_dma_address, - .dma_len = qib_sg_dma_len, .sync_single_for_cpu = qib_sync_single_for_cpu, .sync_single_for_device = qib_sync_single_for_device, .alloc_coherent = qib_dma_alloc_coherent, -- cgit v1.2.3 From 49c5c27e05c915f0da4c0e756da313cf09ae2c55 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 28 Mar 2014 15:04:43 -0400 Subject: IB/ipath: Remove ib_sg_dma_address() and ib_sg_dma_len() overloads The removal of these methods is compensated for by code changes to .map_sg to insure that the vanilla sg_dma_address() and sg_dma_len() will do the same thing as the equivalent former ib_sg_dma_address() and ib_sg_dma_len() calls into the drivers. The introduction of this patch required that the struct ipath_dma_mapping_ops be converted to a C99 initializer. Suggested-by: Bart Van Assche Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_dma.c | 43 ++++++++++++--------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c index 644c2c74e054..123a8c053539 100644 --- a/drivers/infiniband/hw/ipath/ipath_dma.c +++ b/drivers/infiniband/hw/ipath/ipath_dma.c @@ -115,6 +115,10 @@ static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl, ret = 0; break; } + sg->dma_address = addr + sg->offset; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif } return ret; } @@ -126,21 +130,6 @@ static void ipath_unmap_sg(struct ib_device *dev, BUG_ON(!valid_dma_direction(direction)); } -static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) -{ - u64 addr = (u64) page_address(sg_page(sg)); - - if (addr) - addr += sg->offset; - return addr; -} - -static unsigned int ipath_sg_dma_len(struct ib_device *dev, - struct scatterlist *sg) -{ - return sg->length; -} - static void ipath_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, @@ -176,17 +165,15 @@ static void ipath_dma_free_coherent(struct ib_device *dev, size_t size, } struct ib_dma_mapping_ops ipath_dma_mapping_ops = { - ipath_mapping_error, - ipath_dma_map_single, - ipath_dma_unmap_single, - ipath_dma_map_page, - ipath_dma_unmap_page, - ipath_map_sg, - ipath_unmap_sg, - ipath_sg_dma_address, - ipath_sg_dma_len, - ipath_sync_single_for_cpu, - ipath_sync_single_for_device, - ipath_dma_alloc_coherent, - ipath_dma_free_coherent + .mapping_error = ipath_mapping_error, + .map_single = ipath_dma_map_single, + .unmap_single = ipath_dma_unmap_single, + .map_page = ipath_dma_map_page, + .unmap_page = ipath_dma_unmap_page, + .map_sg = ipath_map_sg, + .unmap_sg = ipath_unmap_sg, + .sync_single_for_cpu = ipath_sync_single_for_cpu, + .sync_single_for_device = ipath_sync_single_for_device, + .alloc_coherent = ipath_dma_alloc_coherent, + .free_coherent = ipath_dma_free_coherent }; -- cgit v1.2.3 From f3585a6ae31cb0f0ebda53b161fbed7c9a679572 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 28 Mar 2014 13:26:53 -0400 Subject: IB/ehca: Remove ib_sg_dma_address() and ib_sg_dma_len() overloads These methods appear to only mimic the sg_dma_address() and sg_dma_len() behavior. They can be safely removed. Suggested-by: Bart Van Assche Cc: Bart Van Assche Cc: Hoang-Nam Nguyen Cc: Christoph Raisch Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_mrmw.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index bcfb0c183620..65873eeca9b0 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -2591,16 +2591,6 @@ static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, /* This is only a stub; nothing to be done here */ } -static u64 ehca_dma_address(struct ib_device *dev, struct scatterlist *sg) -{ - return sg->dma_address; -} - -static unsigned int ehca_dma_len(struct ib_device *dev, struct scatterlist *sg) -{ - return sg->length; -} - static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) @@ -2653,8 +2643,6 @@ struct ib_dma_mapping_ops ehca_dma_mapping_ops = { .unmap_page = ehca_dma_unmap_page, .map_sg = ehca_dma_map_sg, .unmap_sg = ehca_dma_unmap_sg, - .dma_address = ehca_dma_address, - .dma_len = ehca_dma_len, .sync_single_for_cpu = ehca_dma_sync_single_for_cpu, .sync_single_for_device = ehca_dma_sync_single_for_device, .alloc_coherent = ehca_dma_alloc_coherent, -- cgit v1.2.3 From ea58a595657db88f55b5159442fdf0e34e1b4d95 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 28 Mar 2014 13:26:59 -0400 Subject: IB/core: Remove overload in ib_sg_dma* The code is replaced by driver specific changes and avoids the pointer NULL test for drivers that don't overload these operations. Suggested-by: Reviewed-by: Dennis Dalessandro Tested-by: Vinod Kumar Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6793f32ccb58..57777167dea7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1266,10 +1266,6 @@ struct ib_dma_mapping_ops { void (*unmap_sg)(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction); - u64 (*dma_address)(struct ib_device *dev, - struct scatterlist *sg); - unsigned int (*dma_len)(struct ib_device *dev, - struct scatterlist *sg); void (*sync_single_for_cpu)(struct ib_device *dev, u64 dma_handle, size_t size, @@ -2089,12 +2085,13 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, * ib_sg_dma_address - Return the DMA address from a scatter/gather entry * @dev: The device for which the DMA addresses were created * @sg: The scatter/gather entry + * + * Note: this function is obsolete. To do: change all occurrences of + * ib_sg_dma_address() into sg_dma_address(). */ static inline u64 ib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) { - if (dev->dma_ops) - return dev->dma_ops->dma_address(dev, sg); return sg_dma_address(sg); } @@ -2102,12 +2099,13 @@ static inline u64 ib_sg_dma_address(struct ib_device *dev, * ib_sg_dma_len - Return the DMA length from a scatter/gather entry * @dev: The device for which the DMA addresses were created * @sg: The scatter/gather entry + * + * Note: this function is obsolete. To do: change all occurrences of + * ib_sg_dma_len() into sg_dma_len(). */ static inline unsigned int ib_sg_dma_len(struct ib_device *dev, struct scatterlist *sg) { - if (dev->dma_ops) - return dev->dma_ops->dma_len(dev, sg); return sg_dma_len(sg); } -- cgit v1.2.3 From b2853fd6c2d0f383dbdf7427e263eb576a633867 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 27 Mar 2014 10:52:58 +0200 Subject: IB/core: Don't resolve passive side RoCE L2 address in CMA REQ handler The code that resolves the passive side source MAC within the rdma_cm connection request handler was both redundant and buggy, so remove it. It was redundant since later, when an RC QP is modified to RTR state, the resolution will take place in the ib_core module. It was buggy because this callback also deals with UD SIDR exchange, for which we incorrectly looked at the REQ member of the CM event and dereferenced a random value. Fixes: dd5f03beb4f7 ("IB/core: Ethernet L2 attributes in verbs/cm structures") Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 17 ----------------- drivers/infiniband/core/cma.c | 26 -------------------------- include/rdma/ib_cm.h | 1 - 3 files changed, 44 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0601b9daf840..c3239170d8b7 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -349,23 +349,6 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, grh, &av->ah_attr); } -int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac) -{ - struct cm_id_private *cm_id_priv; - - cm_id_priv = container_of(id, struct cm_id_private, id); - - if (smac != NULL) - memcpy(cm_id_priv->av.smac, smac, sizeof(cm_id_priv->av.smac)); - - if (alt_smac != NULL) - memcpy(cm_id_priv->alt_av.smac, alt_smac, - sizeof(cm_id_priv->alt_av.smac)); - - return 0; -} -EXPORT_SYMBOL(ib_update_cm_av); - static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) { struct cm_device *cm_dev; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 199958d9ddc8..42c3058e6e9c 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1284,15 +1284,6 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) struct rdma_id_private *listen_id, *conn_id; struct rdma_cm_event event; int offset, ret; - u8 smac[ETH_ALEN]; - u8 alt_smac[ETH_ALEN]; - u8 *psmac = smac; - u8 *palt_smac = alt_smac; - int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) == - RDMA_TRANSPORT_IB) && - (rdma_port_get_link_layer(cm_id->device, - ib_event->param.req_rcvd.port) == - IB_LINK_LAYER_ETHERNET)); listen_id = cm_id->context; if (!cma_check_req_qp_type(&listen_id->id, ib_event)) @@ -1336,28 +1327,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) ret = conn_id->id.event_handler(&conn_id->id, &event); if (ret) goto err3; - - if (is_iboe) { - if (ib_event->param.req_rcvd.primary_path != NULL) - rdma_addr_find_smac_by_sgid( - &ib_event->param.req_rcvd.primary_path->sgid, - psmac, NULL); - else - psmac = NULL; - if (ib_event->param.req_rcvd.alternate_path != NULL) - rdma_addr_find_smac_by_sgid( - &ib_event->param.req_rcvd.alternate_path->sgid, - palt_smac, NULL); - else - palt_smac = NULL; - } /* * Acquire mutex to prevent user executing rdma_destroy_id() * while we're accessing the cm_id. */ mutex_lock(&lock); - if (is_iboe) - ib_update_cm_av(cm_id, psmac, palt_smac); if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index f29e3a27c2cc..0e3ff30647d5 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -601,5 +601,4 @@ struct ib_cm_sidr_rep_param { int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_param *param); -int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac); #endif /* IB_CM_H */ -- cgit v1.2.3 From a7db89eb89cd6a444b16fdd602e818eed34d8222 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:35 +0530 Subject: RDMA/cxgb4: Lock around accept/reject downcalls There is a race between ULP threads doing an accept/reject, and the ingress processing thread handling close/abort for the same connection. The accept/reject path needs to hold the lock to serialize these paths. Signed-off-by: Steve Wise [ Fold in locking fix found by Dan Carpenter . - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a1bc41d04620..6836d114d75a 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -760,7 +760,7 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, ep->mpa_skb = skb; c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); start_ep_timer(ep); - state_set(&ep->com, MPA_REQ_SENT); + __state_set(&ep->com, MPA_REQ_SENT); ep->mpa_attr.initiator = 1; ep->snd_seq += mpalen; return; @@ -926,7 +926,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) skb_get(skb); t4_set_arp_err_handler(skb, NULL, arp_failure_discard); ep->mpa_skb = skb; - state_set(&ep->com, MPA_REP_SENT); + __state_set(&ep->com, MPA_REP_SENT); ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -944,6 +944,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid, be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn)); + mutex_lock(&ep->com.mutex); dst_confirm(ep->dst); /* setup the hwtid for this connection */ @@ -967,7 +968,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) send_mpa_req(ep, skb, 1); else send_mpa_req(ep, skb, mpa_rev); - + mutex_unlock(&ep->com.mutex); return 0; } @@ -2511,22 +2512,28 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { - int err; + int err = 0; + int disconnect = 0; struct c4iw_ep *ep = to_ep(cm_id); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { + mutex_lock(&ep->com.mutex); + if (ep->com.state == DEAD) { + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return -ECONNRESET; } set_bit(ULP_REJECT, &ep->com.history); - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); + BUG_ON(ep->com.state != MPA_REQ_RCVD); if (mpa_rev == 0) abort_connection(ep, NULL, GFP_KERNEL); else { err = send_mpa_reject(ep, pdata, pdata_len); - err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + disconnect = 1; } + mutex_unlock(&ep->com.mutex); + if (disconnect) + err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL); c4iw_put_ep(&ep->com); return 0; } @@ -2541,12 +2548,14 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { + + mutex_lock(&ep->com.mutex); + if (ep->com.state == DEAD) { err = -ECONNRESET; goto err; } - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); + BUG_ON(ep->com.state != MPA_REQ_RCVD); BUG_ON(!qp); set_bit(ULP_ACCEPT, &ep->com.history); @@ -2615,14 +2624,16 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (err) goto err1; - state_set(&ep->com, FPDU_MODE); + __state_set(&ep->com, FPDU_MODE); established_upcall(ep); + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return 0; err1: ep->com.cm_id = NULL; cm_id->rem_ref(cm_id); err: + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return err; } -- cgit v1.2.3 From 977116c69862a6062f302395cb3546544d7e1bc1 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:36 +0530 Subject: RDMA/cxgb4: Drop RX_DATA packets if the endpoint is gone Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 6836d114d75a..8a645d872483 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1521,6 +1521,8 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) __u8 status = hdr->status; ep = lookup_tid(t, tid); + if (!ep) + return 0; PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); skb_pull(skb, sizeof(*hdr)); skb_trim(skb, dlen); -- cgit v1.2.3 From c529fb50463992982c246155e095577aa0485f57 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:37 +0530 Subject: RDMA/cxgb4: rx_data() needs to hold the ep mutex To avoid racing with other threads doing close/flush/whatever, rx_data() should hold the endpoint mutex. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 8a645d872483..26046c23334c 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1170,7 +1170,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * the connection. */ stop_ep_timer(ep); - if (state_read(&ep->com) != MPA_REQ_SENT) + if (ep->com.state != MPA_REQ_SENT) return; /* @@ -1245,7 +1245,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * start reply message including private data. And * the MPA header is valid. */ - state_set(&ep->com, FPDU_MODE); + __state_set(&ep->com, FPDU_MODE); ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; @@ -1360,7 +1360,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) } goto out; err: - state_set(&ep->com, ABORTING); + __state_set(&ep->com, ABORTING); send_abort(ep, skb, GFP_KERNEL); out: connect_reply_upcall(ep, err); @@ -1375,7 +1375,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) != MPA_REQ_WAIT) + if (ep->com.state != MPA_REQ_WAIT) return; /* @@ -1496,7 +1496,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, ep->mpa_attr.p2p_type); - state_set(&ep->com, MPA_REQ_RCVD); + __state_set(&ep->com, MPA_REQ_RCVD); stop_ep_timer(ep); /* drive upcall */ @@ -1526,11 +1526,12 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); skb_pull(skb, sizeof(*hdr)); skb_trim(skb, dlen); + mutex_lock(&ep->com.mutex); /* update RX credits */ update_rx_credits(ep, dlen); - switch (state_read(&ep->com)) { + switch (ep->com.state) { case MPA_REQ_SENT: ep->rcv_seq += dlen; process_mpa_reply(ep, skb); @@ -1546,7 +1547,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) pr_err("%s Unexpected streaming data." \ " qpid %u ep %p state %d tid %u status %d\n", __func__, ep->com.qp->wq.sq.qid, ep, - state_read(&ep->com), ep->hwtid, status); + ep->com.state, ep->hwtid, status); attrs.next_state = C4IW_QP_STATE_TERMINATE; c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); @@ -1555,6 +1556,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) default: break; } + mutex_unlock(&ep->com.mutex); return 0; } -- cgit v1.2.3 From 96bb2706c883a21d7da6a261d7e60d3bab4cf6bd Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 27 Mar 2014 12:03:47 -0500 Subject: RDMA/cxgb4: Disable DSGL use by default Current hardware doesn't correctly support DSGL. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/mem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 22a2e3e4540f..2630838ab1c2 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -37,9 +37,9 @@ #include "iw_cxgb4.h" -int use_dsgl = 1; +int use_dsgl = 0; module_param(use_dsgl, int, 0644); -MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=1)"); +MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=0)"); #define T4_ULPTX_MIN_IO 32 #define C4IW_MAX_INLINE_SIZE 96 -- cgit v1.2.3 From ea61762679cd4d409dcaa6f502f190f4c8156d09 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:56:54 +0530 Subject: RDMA/ocrdma: EQ full catastrophe avoidance Stale entries in the CQ being destroyed causes hardware to generate EQEs indefinitely for a given CQ. Thus causing uncontrolled execution of irq_handler. This patch fixes this using following sementics: * irq_handler will ring EQ doorbell atleast once and implement budgeting scheme. * cq_destroy will count number of valid entires during destroy and ring cq-db so that hardware does not generate uncontrolled EQE. * cq_destroy will synchronize with last running irq_handler instance. * arm_cq will always defer arming CQ till poll_cq, except for the first arm_cq call. * poll_cq will always ring cq-db with arm=SET if arm_cq was called prior to enter poll_cq. * poll_cq will always ring cq-db with arm=UNSET if arm_cq was not called prior to enter poll_cq. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 18 ++++++- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 53 +++++++++---------- drivers/infiniband/hw/ocrdma/ocrdma_hw.h | 1 + drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 81 ++++++++++++++++++++++------- 4 files changed, 103 insertions(+), 50 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 7c001b97b23f..61f508e74823 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -209,8 +209,8 @@ struct ocrdma_cq { */ u32 max_hw_cqe; bool phase_change; - bool armed, solicited; - bool arm_needed; + bool deferred_arm, deferred_sol; + bool first_arm; spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization * to cq polling @@ -223,6 +223,7 @@ struct ocrdma_cq { struct ocrdma_ucontext *ucontext; dma_addr_t pa; u32 len; + u32 cqe_cnt; /* head of all qp's sq and rq for which cqes need to be flushed * by the software. @@ -436,4 +437,17 @@ static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev, return 0; } +static inline int ocrdma_get_eq_table_index(struct ocrdma_dev *dev, + int eqid) +{ + int indx; + + for (indx = 0; indx < dev->eq_cnt; indx++) { + if (dev->eq_tbl[indx].q.id == eqid) + return indx; + } + + return -EINVAL; +} + #endif diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index ac3fbf2db00a..e3c75e01feb9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -444,7 +444,7 @@ mbx_err: return status; } -static int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq) +int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq) { int irq; @@ -574,6 +574,7 @@ static int ocrdma_create_mq(struct ocrdma_dev *dev) if (status) goto alloc_err; + dev->eq_tbl[0].cq_cnt++; status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->eq_tbl[0].q); if (status) goto mbx_cq_free; @@ -858,16 +859,8 @@ static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx) BUG(); cq = dev->cq_tbl[cq_idx]; - if (cq == NULL) { - pr_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx); + if (cq == NULL) return; - } - spin_lock_irqsave(&cq->cq_lock, flags); - cq->armed = false; - cq->solicited = false; - spin_unlock_irqrestore(&cq->cq_lock, flags); - - ocrdma_ring_cq_db(dev, cq->id, false, false, 0); if (cq->ibcq.comp_handler) { spin_lock_irqsave(&cq->comp_handler_lock, flags); @@ -892,27 +885,35 @@ static irqreturn_t ocrdma_irq_handler(int irq, void *handle) struct ocrdma_dev *dev = eq->dev; struct ocrdma_eqe eqe; struct ocrdma_eqe *ptr; - u16 eqe_popped = 0; u16 cq_id; - while (1) { + int budget = eq->cq_cnt; + + do { ptr = ocrdma_get_eqe(eq); eqe = *ptr; ocrdma_le32_to_cpu(&eqe, sizeof(eqe)); if ((eqe.id_valid & OCRDMA_EQE_VALID_MASK) == 0) break; - eqe_popped += 1; + ptr->id_valid = 0; + /* ring eq doorbell as soon as its consumed. */ + ocrdma_ring_eq_db(dev, eq->q.id, false, true, 1); /* check whether its CQE or not. */ if ((eqe.id_valid & OCRDMA_EQE_FOR_CQE_MASK) == 0) { cq_id = eqe.id_valid >> OCRDMA_EQE_RESOURCE_ID_SHIFT; ocrdma_cq_handler(dev, cq_id); } ocrdma_eq_inc_tail(eq); - } - ocrdma_ring_eq_db(dev, eq->q.id, true, true, eqe_popped); - /* Ring EQ doorbell with num_popped to 0 to enable interrupts again. */ - if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX) - ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0); + + /* There can be a stale EQE after the last bound CQ is + * destroyed. EQE valid and budget == 0 implies this. + */ + if (budget) + budget--; + + } while (budget); + + ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0); return IRQ_HANDLED; } @@ -1357,12 +1358,10 @@ static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id) int i; mutex_lock(&dev->dev_lock); - for (i = 0; i < dev->eq_cnt; i++) { - if (dev->eq_tbl[i].q.id != eq_id) - continue; - dev->eq_tbl[i].cq_cnt -= 1; - break; - } + i = ocrdma_get_eq_table_index(dev, eq_id); + if (i == -EINVAL) + BUG(); + dev->eq_tbl[i].cq_cnt -= 1; mutex_unlock(&dev->dev_lock); } @@ -1417,6 +1416,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, cq->eqn = ocrdma_bind_eq(dev); cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER3; cqe_count = cq->len / cqe_size; + cq->cqe_cnt = cqe_count; if (cqe_count > 1024) { /* Set cnt to 3 to indicate more than 1024 cq entries */ cmd->cmd.ev_cnt_flags |= (0x3 << OCRDMA_CREATE_CQ_CNT_SHIFT); @@ -1484,12 +1484,9 @@ int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq) (cq->id << OCRDMA_DESTROY_CQ_QID_SHIFT) & OCRDMA_DESTROY_CQ_QID_MASK; - ocrdma_unbind_eq(dev, cq->eqn); status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); - if (status) - goto mbx_err; + ocrdma_unbind_eq(dev, cq->eqn); dma_free_coherent(&dev->nic_info.pdev->dev, cq->len, cq->va, cq->pa); -mbx_err: kfree(cmd); return status; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index db3d55f368bf..77da536d5d96 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -131,5 +131,6 @@ int ocrdma_qp_state_change(struct ocrdma_qp *, enum ib_qp_state new_state, bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *); bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *); void ocrdma_flush_qp(struct ocrdma_qp *); +int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq); #endif /* __OCRDMA_HW_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index f1108ebae83a..7d59ed340991 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -910,6 +910,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, spin_lock_init(&cq->comp_handler_lock); INIT_LIST_HEAD(&cq->sq_head); INIT_LIST_HEAD(&cq->rq_head); + cq->first_arm = true; if (ib_ctx) { uctx = get_ocrdma_ucontext(ib_ctx); @@ -927,9 +928,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, goto ctx_err; } cq->phase = OCRDMA_CQE_VALID; - cq->arm_needed = true; dev->cq_tbl[cq->id] = cq; - return &cq->ibcq; ctx_err: @@ -952,15 +951,52 @@ int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt, return status; } +static void ocrdma_flush_cq(struct ocrdma_cq *cq) +{ + int cqe_cnt; + int valid_count = 0; + unsigned long flags; + + struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device); + struct ocrdma_cqe *cqe = NULL; + + cqe = cq->va; + cqe_cnt = cq->cqe_cnt; + + /* Last irq might have scheduled a polling thread + * sync-up with it before hard flushing. + */ + spin_lock_irqsave(&cq->cq_lock, flags); + while (cqe_cnt) { + if (is_cqe_valid(cq, cqe)) + valid_count++; + cqe++; + cqe_cnt--; + } + ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count); + spin_unlock_irqrestore(&cq->cq_lock, flags); +} + int ocrdma_destroy_cq(struct ib_cq *ibcq) { int status; struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); + struct ocrdma_eq *eq = NULL; struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); int pdid = 0; + u32 irq, indx; - status = ocrdma_mbx_destroy_cq(dev, cq); + dev->cq_tbl[cq->id] = NULL; + indx = ocrdma_get_eq_table_index(dev, cq->eqn); + if (indx == -EINVAL) + BUG(); + eq = &dev->eq_tbl[indx]; + irq = ocrdma_get_irq(dev, eq); + synchronize_irq(irq); + ocrdma_flush_cq(cq); + + status = ocrdma_mbx_destroy_cq(dev, cq); if (cq->ucontext) { pdid = cq->ucontext->cntxt_pd->id; ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, @@ -969,7 +1005,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq) ocrdma_get_db_addr(dev, pdid), dev->nic_info.db_page_size); } - dev->cq_tbl[cq->id] = NULL; kfree(cq); return status; @@ -2705,10 +2740,18 @@ expand_cqe: } stop_cqe: cq->getp = cur_getp; - if (polled_hw_cqes || expand || stop) { - ocrdma_ring_cq_db(dev, cq->id, cq->armed, cq->solicited, + if (cq->deferred_arm) { + ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol, + polled_hw_cqes); + cq->deferred_arm = false; + cq->deferred_sol = false; + } else { + /* We need to pop the CQE. No need to arm */ + ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol, polled_hw_cqes); + cq->deferred_sol = false; } + return i; } @@ -2780,30 +2823,28 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); u16 cq_id; - u16 cur_getp; - struct ocrdma_cqe *cqe; unsigned long flags; + bool arm_needed = false, sol_needed = false; cq_id = cq->id; spin_lock_irqsave(&cq->cq_lock, flags); if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED) - cq->armed = true; + arm_needed = true; if (cq_flags & IB_CQ_SOLICITED) - cq->solicited = true; - - cur_getp = cq->getp; - cqe = cq->va + cur_getp; + sol_needed = true; - /* check whether any valid cqe exist or not, if not then safe to - * arm. If cqe is not yet consumed, then let it get consumed and then - * we arm it to avoid false interrupts. - */ - if (!is_cqe_valid(cq, cqe) || cq->arm_needed) { - cq->arm_needed = false; - ocrdma_ring_cq_db(dev, cq_id, cq->armed, cq->solicited, 0); + if (cq->first_arm) { + ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0); + cq->first_arm = false; + goto skip_defer; } + cq->deferred_arm = true; + +skip_defer: + cq->deferred_sol = sol_needed; spin_unlock_irqrestore(&cq->cq_lock, flags); + return 0; } -- cgit v1.2.3 From 2df84fa87f4d00299031b1335748c54176edfe87 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:56:55 +0530 Subject: RDMA/ocrdma: SQ and RQ doorbell offset clean up Introducing new macros to define SQ and RQ doorbell offset. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 7 ------- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 5 ++++- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 23 +++++++---------------- 3 files changed, 11 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 61f508e74823..1b51e67a26d7 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -386,13 +386,6 @@ static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq) return container_of(ibsrq, struct ocrdma_srq, ibsrq); } - -static inline int ocrdma_get_num_posted_shift(struct ocrdma_qp *qp) -{ - return ((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY && - qp->id < 128) ? 24 : 16); -} - static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe) { int cqe_valid; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 60d5ac23ea80..e71685a7ffae 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -103,7 +103,10 @@ enum { OCRDMA_DB_GEN2_SRQ_OFFSET = OCRDMA_DB_GEN2_RQ_OFFSET, OCRDMA_DB_CQ_OFFSET = 0x120, OCRDMA_DB_EQ_OFFSET = OCRDMA_DB_CQ_OFFSET, - OCRDMA_DB_MQ_OFFSET = 0x140 + OCRDMA_DB_MQ_OFFSET = 0x140, + + OCRDMA_DB_SQ_SHIFT = 16, + OCRDMA_DB_RQ_SHIFT = 24 }; #define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 7d59ed340991..be75d2b1ea9f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1127,15 +1127,9 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp, } uresp.db_page_addr = usr_db; uresp.db_page_size = dev->nic_info.db_page_size; - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { - uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET; - uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET; - uresp.db_shift = 24; - } else { - uresp.db_sq_offset = OCRDMA_DB_SQ_OFFSET; - uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET; - uresp.db_shift = 16; - } + uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET; + uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET; + uresp.db_shift = OCRDMA_DB_RQ_SHIFT; if (qp->dpp_enabled) { uresp.dpp_credit = dpp_credit_lmt; @@ -1308,7 +1302,7 @@ static void ocrdma_flush_rq_db(struct ocrdma_qp *qp) { if (qp->db_cache) { u32 val = qp->rq.dbid | (qp->db_cache << - ocrdma_get_num_posted_shift(qp)); + OCRDMA_DB_RQ_SHIFT); iowrite32(val, qp->rq_db); qp->db_cache = 0; } @@ -2052,7 +2046,7 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, static void ocrdma_ring_sq_db(struct ocrdma_qp *qp) { - u32 val = qp->sq.dbid | (1 << 16); + u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT); iowrite32(val, qp->sq_db); } @@ -2157,12 +2151,9 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, static void ocrdma_ring_rq_db(struct ocrdma_qp *qp) { - u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp)); + u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT); - if (qp->state != OCRDMA_QPS_INIT) - iowrite32(val, qp->rq_db); - else - qp->db_cache++; + iowrite32(val, qp->rq_db); } static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr, -- cgit v1.2.3 From 21c3391a9adfaddd00481a1d03bf30fc1304e292 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:56:56 +0530 Subject: RDMA/ocrdma: Read ASIC_ID register to select asic_gen ocrdma driver selects execution path based on sli_family and asic generation number. This introduces code to read the asic gen number from pci register instead of obtaining it from the Emulex NIC driver. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 13 +++++++++++++ drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 6 +++--- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 17 +++++++++++++++-- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 6 +++--- 5 files changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 1b51e67a26d7..24fe248fb35a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -197,6 +197,7 @@ struct ocrdma_dev { int id; struct ocrdma_mr *stag_arr[OCRDMA_MAX_STAG]; u16 pvid; + u32 asic_id; }; struct ocrdma_cq { @@ -443,4 +444,16 @@ static inline int ocrdma_get_eq_table_index(struct ocrdma_dev *dev, return -EINVAL; } +static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev) +{ + if (dev->nic_info.dev_family == 0xF && !dev->asic_id) { + pci_read_config_dword( + dev->nic_info.pdev, + OCRDMA_SLI_ASIC_ID_OFFSET, &dev->asic_id); + } + + return (dev->asic_id & OCRDMA_SLI_ASIC_GEN_NUM_MASK) >> + OCRDMA_SLI_ASIC_GEN_NUM_SHIFT; +} + #endif diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index e3c75e01feb9..ec310d258553 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1037,7 +1037,7 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_inline_data = attr->wqe_size - (sizeof(struct ocrdma_hdr_wqe) + sizeof(struct ocrdma_sge)); - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { attr->ird = 1; attr->ird_page_size = OCRDMA_MIN_Q_PAGE_SIZE; attr->num_ird_pages = MAX_OCRDMA_IRD_PAGES; @@ -1379,7 +1379,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, __func__, dev->id, dev->attr.max_cqe, entries); return -EINVAL; } - if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY)) + if (dpp_cq && (ocrdma_get_asic_type(dev) != OCRDMA_ASIC_GEN_SKH_R)) return -EINVAL; if (dpp_cq) { @@ -1439,7 +1439,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, } /* shared eq between all the consumer cqs. */ cmd->cmd.eqn = cq->eqn; - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { if (dpp_cq) cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP << OCRDMA_CREATE_CQ_TYPE_SHIFT; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 1a8a945efa60..b9e147816703 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -286,7 +286,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.process_mad = ocrdma_process_mad; - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { dev->ibdev.uverbs_cmd_mask |= OCRDMA_UVERBS(CREATE_SRQ) | OCRDMA_UVERBS(MODIFY_SRQ) | diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index e71685a7ffae..de4ebfc4e0e2 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -30,8 +30,16 @@ #define Bit(_b) (1 << (_b)) -#define OCRDMA_GEN1_FAMILY 0xB -#define OCRDMA_GEN2_FAMILY 0x0F +enum { + OCRDMA_ASIC_GEN_SKH_R = 0x04, + OCRDMA_ASIC_GEN_LANCER = 0x0B +}; + +enum { + OCRDMA_ASIC_REV_A0 = 0x00, + OCRDMA_ASIC_REV_B0 = 0x10, + OCRDMA_ASIC_REV_C0 = 0x20 +}; #define OCRDMA_SUBSYS_ROCE 10 enum { @@ -141,6 +149,11 @@ enum { #define OCRDMA_MIN_Q_PAGE_SIZE (4096) #define OCRDMA_MAX_Q_PAGES (8) +#define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C +#define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF +#define OCRDMA_SLI_ASIC_GEN_NUM_MASK 0x0000FF00 +#define OCRDMA_SLI_ASIC_GEN_NUM_SHIFT 0x08 + /* # 0: 4K Bytes # 1: 8K Bytes diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index be75d2b1ea9f..c5b40583f1ae 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -267,7 +267,7 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, if (udata && uctx) { pd->dpp_enabled = - dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY; + ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R; pd->num_dpp_qp = pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; } @@ -1161,7 +1161,7 @@ err: static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp, struct ocrdma_pd *pd) { - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { qp->sq_db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size) + OCRDMA_DB_GEN2_SQ_OFFSET; @@ -1688,7 +1688,7 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq, (srq->pd->id * dev->nic_info.db_page_size); uresp.db_page_size = dev->nic_info.db_page_size; uresp.num_rqe_allocated = srq->rq.max_cnt; - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET; uresp.db_shift = 24; } else { -- cgit v1.2.3 From 1eebbb6ec3eb760f77240af7e850e93222e89afe Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:56:57 +0530 Subject: RDMA/ocrdma: Allow DPP QP creation Allow creating DPP QP even if inline-data is not requested. This is an optimization to lower latency. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index ec310d258553..6eedd2d56d85 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2026,8 +2026,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs, OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK; qp->rq_cq = cq; - if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp && - (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) { + if (pd->dpp_enabled && pd->num_dpp_qp) { ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq, dpp_cq_id); } -- cgit v1.2.3 From b6b87d2e6930b6dbb926998d3250c61cef1ec562 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:56:58 +0530 Subject: RDMA/ocrdma: ABI versioning between ocrdma and be2net While loading RoCE driver be2net driver should check for ABI version to catch functional incompatibilities. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_abi.h | 1 + drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h index fbac8eb44036..2a14d4a36739 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h @@ -29,6 +29,7 @@ #define __OCRDMA_ABI_H__ #define OCRDMA_ABI_VERSION 1 +#define OCRDMA_BE_ROCE_ABI_VERSION 1 /* user kernel communication data structures. */ struct ocrdma_alloc_ucontext_resp { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index b9e147816703..fc9d71edfb4e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -478,6 +478,7 @@ static struct ocrdma_driver ocrdma_drv = { .add = ocrdma_add, .remove = ocrdma_remove, .state_change_handler = ocrdma_event_handler, + .be_abi_version = OCRDMA_BE_ROCE_ABI_VERSION, }; static void ocrdma_unregister_inet6addr_notifier(void) -- cgit v1.2.3 From 2407116ef90f684246e208275e6883e0706be432 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:56:59 +0530 Subject: be2net: Add abi version between be2net and ocrdma This patch adds abi versioning between be2net and ocrdma driver modules to catch functional incompatibilities in the two drivers. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/net/ethernet/emulex/benet/be_roce.c | 6 ++++++ drivers/net/ethernet/emulex/benet/be_roce.h | 3 +++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c index 9cd5415fe017..aa7f94375108 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.c +++ b/drivers/net/ethernet/emulex/benet/be_roce.c @@ -35,6 +35,12 @@ static void _be_roce_dev_add(struct be_adapter *adapter) if (!ocrdma_drv) return; + + if (ocrdma_drv->be_abi_version != BE_ROCE_ABI_VERSION) { + dev_warn(&pdev->dev, "Cannot initialize RoCE due to ocrdma ABI mismatch\n"); + return; + } + if (pdev->device == OC_DEVICE_ID5) { /* only msix is supported on these devices */ if (!msix_enabled(adapter)) diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h index 2cd1129e19af..1bfb16164df8 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.h +++ b/drivers/net/ethernet/emulex/benet/be_roce.h @@ -21,6 +21,8 @@ #include #include +#define BE_ROCE_ABI_VERSION 1 + struct ocrdma_dev; enum be_interrupt_mode { @@ -52,6 +54,7 @@ struct be_dev_info { /* ocrdma driver register's the callback functions with nic driver. */ struct ocrdma_driver { unsigned char name[32]; + u32 be_abi_version; struct ocrdma_dev *(*add) (struct be_dev_info *dev_info); void (*remove) (struct ocrdma_dev *); void (*state_change_handler) (struct ocrdma_dev *, u32 new_state); -- cgit v1.2.3 From 0154410bd456affe21f31ab27278847461316e0b Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:57:00 +0530 Subject: RDMA/ocrdma: Update version string Update the driver vrsion string and node description string Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 4 +++- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 24fe248fb35a..00b3a296777f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -39,7 +39,9 @@ #include #include "ocrdma_sli.h" -#define OCRDMA_ROCE_DEV_VERSION "1.0.0" +#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u" + +#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" #define OCRDMA_MAX_AH 512 diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index fc9d71edfb4e..ae17a3670bcd 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -41,8 +41,8 @@ #include "ocrdma_hw.h" #include "ocrdma_abi.h" -MODULE_VERSION(OCRDMA_ROCE_DEV_VERSION); -MODULE_DESCRIPTION("Emulex RoCE HCA Driver"); +MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION); +MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION); MODULE_AUTHOR("Emulex Corporation"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 2e6e9f2bb894e902b7fbd3f6865719eaa8306b6c Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:57:01 +0530 Subject: RDMA/ocrdma: Increment abi version count Increment the ABI version count for driver/library interface. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h index 2a14d4a36739..5a82ce535181 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h @@ -28,7 +28,7 @@ #ifndef __OCRDMA_ABI_H__ #define __OCRDMA_ABI_H__ -#define OCRDMA_ABI_VERSION 1 +#define OCRDMA_ABI_VERSION 2 #define OCRDMA_BE_ROCE_ABI_VERSION 1 /* user kernel communication data structures. */ -- cgit v1.2.3 From 9d1878a369b23f48a5ca5bcbd89abb0e569c58cc Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 4 Feb 2014 11:57:02 +0530 Subject: RDMA/ocrdma: Memory leak fix in ocrdma_dereg_mr() Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index c5b40583f1ae..8cc00d2a06af 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -840,8 +840,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr) status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey); - if (mr->hwmr.fr_mr == 0) - ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); + ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); /* it could be user registered memory. */ if (mr->umem) -- cgit v1.2.3 From cf5788ade718a2cc654170ff11c7d6f6f1ecbdcc Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 4 Feb 2014 11:57:03 +0530 Subject: RDMA/ocrdma: Use non-zero tag in SRQ posting As part of SRQ receive buffers posting we populate a non-zero tag which will be returned in SRQ receive completions. Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 8cc00d2a06af..2b56c4288680 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1538,7 +1538,7 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq) int discard_cnt = 0; u32 cur_getp, stop_getp; struct ocrdma_cqe *cqe; - u32 qpn = 0; + u32 qpn = 0, wqe_idx = 0; spin_lock_irqsave(&cq->cq_lock, cq_flags); @@ -1567,24 +1567,29 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq) if (qpn == 0 || qpn != qp->id) goto skip_cqe; - /* mark cqe discarded so that it is not picked up later - * in the poll_cq(). - */ - discard_cnt += 1; - cqe->cmn.qpn = 0; if (is_cqe_for_sq(cqe)) { ocrdma_hwq_inc_tail(&qp->sq); } else { if (qp->srq) { + wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >> + OCRDMA_CQE_BUFTAG_SHIFT) & + qp->srq->rq.max_wqe_idx; + if (wqe_idx < 1) + BUG(); spin_lock_irqsave(&qp->srq->q_lock, flags); ocrdma_hwq_inc_tail(&qp->srq->rq); - ocrdma_srq_toggle_bit(qp->srq, cur_getp); + ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1); spin_unlock_irqrestore(&qp->srq->q_lock, flags); } else { ocrdma_hwq_inc_tail(&qp->rq); } } + /* mark cqe discarded so that it is not picked up later + * in the poll_cq(). + */ + discard_cnt += 1; + cqe->cmn.qpn = 0; skip_cqe: cur_getp = (cur_getp + 1) % cq->max_hw_cqe; } while (cur_getp != stop_getp); @@ -2238,7 +2243,7 @@ static int ocrdma_srq_get_idx(struct ocrdma_srq *srq) if (row == srq->bit_fields_len) BUG(); - return indx; + return indx + 1; /* Use from index 1 */ } static void ocrdma_ring_srq_db(struct ocrdma_srq *srq) @@ -2575,10 +2580,13 @@ static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc, srq = get_ocrdma_srq(qp->ibqp.srq); wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >> - OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx; + OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx; + if (wqe_idx < 1) + BUG(); + ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx]; spin_lock_irqsave(&srq->q_lock, flags); - ocrdma_srq_toggle_bit(srq, wqe_idx); + ocrdma_srq_toggle_bit(srq, wqe_idx - 1); spin_unlock_irqrestore(&srq->q_lock, flags); ocrdma_hwq_inc_tail(&srq->rq); } -- cgit v1.2.3 From ac578aef8b9f6095016a12390f0ff638a3a54988 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 4 Feb 2014 11:57:04 +0530 Subject: RDMA/ocrdma: Display proper value for max_mw Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 1 + drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 1 + drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 00b3a296777f..4ea8496c7107 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -67,6 +67,7 @@ struct ocrdma_dev_attr { int max_mr; u64 max_mr_size; u32 max_num_mr_pbl; + int max_mw; int max_fmr; int max_map_per_fmr; int max_pages_per_frmr; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 6eedd2d56d85..f1ec9d9a7224 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1016,6 +1016,7 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->local_ca_ack_delay = (rsp->max_pd_ca_ack_delay & OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK) >> OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT; + attr->max_mw = rsp->max_mw; attr->max_mr = rsp->max_mr; attr->max_mr_size = ~0ull; attr->max_fmr = 0; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 2b56c4288680..95eeeeb1251b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -89,7 +89,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; attr->max_mr = dev->attr.max_mr; - attr->max_mw = 0; + attr->max_mw = dev->attr.max_mw; attr->max_pd = dev->attr.max_pd; attr->atomic_cap = 0; attr->max_fmr = 0; -- cgit v1.2.3 From 1228056bcff2e4029c3d9d5d31cf122eea3a4c6a Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 4 Feb 2014 11:57:05 +0530 Subject: RDMA/ocrdma: Handle CQ overrun error Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index f1ec9d9a7224..ebcb26079daf 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -640,7 +640,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, { struct ocrdma_qp *qp = NULL; struct ocrdma_cq *cq = NULL; - struct ib_event ib_evt; + struct ib_event ib_evt = { 0 }; int cq_event = 0; int qp_event = 1; int srq_event = 0; @@ -665,6 +665,8 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, case OCRDMA_CQ_OVERRUN_ERROR: ib_evt.element.cq = &cq->ibcq; ib_evt.event = IB_EVENT_CQ_ERR; + cq_event = 1; + qp_event = 0; break; case OCRDMA_CQ_QPCAT_ERROR: ib_evt.element.qp = &qp->ibqp; @@ -726,6 +728,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, qp->srq->ibsrq. srq_context); } else if (dev_event) { + pr_err("%s: Fatal event received\n", dev->ibdev.name); ib_dispatch_event(&ib_evt); } -- cgit v1.2.3 From bbc5ec524eecf8af95b81c3c1d15cbc672568b4e Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 4 Feb 2014 11:57:06 +0530 Subject: RDMA/ocrdma: Support non-embedded mailbox commands Added a routine to issue non-embedded mailbox commands for handling large mailbox request/response data. Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 55 +++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index ebcb26079daf..e6797ff4c99b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -953,7 +953,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe) { int status = 0; u16 cqe_status, ext_status; - struct ocrdma_mqe *rsp; + struct ocrdma_mqe *rsp_mqe; + struct ocrdma_mbx_rsp *rsp = NULL; mutex_lock(&dev->mqe_ctx.lock); ocrdma_post_mqe(dev, mqe); @@ -962,23 +963,61 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe) goto mbx_err; cqe_status = dev->mqe_ctx.cqe_status; ext_status = dev->mqe_ctx.ext_status; - rsp = ocrdma_get_mqe_rsp(dev); - ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe))); + rsp_mqe = ocrdma_get_mqe_rsp(dev); + ocrdma_copy_le32_to_cpu(mqe, rsp_mqe, (sizeof(*mqe))); + if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >> + OCRDMA_MQE_HDR_EMB_SHIFT) + rsp = &mqe->u.rsp; + if (cqe_status || ext_status) { - pr_err("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n", - __func__, - (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> - OCRDMA_MBX_RSP_OPCODE_SHIFT, cqe_status, ext_status); + pr_err("%s() cqe_status=0x%x, ext_status=0x%x,", + __func__, cqe_status, ext_status); + if (rsp) { + /* This is for embedded cmds. */ + pr_err("opcode=0x%x, subsystem=0x%x\n", + (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> + OCRDMA_MBX_RSP_OPCODE_SHIFT, + (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >> + OCRDMA_MBX_RSP_SUBSYS_SHIFT); + } status = ocrdma_get_mbx_cqe_errno(cqe_status); goto mbx_err; } - if (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK) + /* For non embedded, rsp errors are handled in ocrdma_nonemb_mbx_cmd */ + if (rsp && (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK)) status = ocrdma_get_mbx_errno(mqe->u.rsp.status); mbx_err: mutex_unlock(&dev->mqe_ctx.lock); return status; } +static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe, + void *payload_va) +{ + int status = 0; + struct ocrdma_mbx_rsp *rsp = payload_va; + + if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >> + OCRDMA_MQE_HDR_EMB_SHIFT) + BUG(); + + status = ocrdma_mbx_cmd(dev, mqe); + if (!status) + /* For non embedded, only CQE failures are handled in + * ocrdma_mbx_cmd. We need to check for RSP errors. + */ + if (rsp->status & OCRDMA_MBX_RSP_STATUS_MASK) + status = ocrdma_get_mbx_errno(rsp->status); + + if (status) + pr_err("opcode=0x%x, subsystem=0x%x\n", + (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> + OCRDMA_MBX_RSP_OPCODE_SHIFT, + (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >> + OCRDMA_MBX_RSP_SUBSYS_SHIFT); + return status; +} + static void ocrdma_get_attr(struct ocrdma_dev *dev, struct ocrdma_dev_attr *attr, struct ocrdma_mbx_query_config *rsp) -- cgit v1.2.3 From a51f06e1679e2abac2e8a817884e60edc18c5c86 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 4 Feb 2014 11:57:07 +0530 Subject: RDMA/ocrdma: Query controller information Issue mailbox commands to query ocrdma controller information and phy information and print them while adding ocrdma device. Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/Makefile | 2 +- drivers/infiniband/hw/ocrdma/ocrdma.h | 61 +++ drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 148 +++++++ drivers/infiniband/hw/ocrdma/ocrdma_hw.h | 2 + drivers/infiniband/hw/ocrdma/ocrdma_main.c | 14 + drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 233 ++++++++++- drivers/infiniband/hw/ocrdma/ocrdma_stats.c | 623 ++++++++++++++++++++++++++++ drivers/infiniband/hw/ocrdma/ocrdma_stats.h | 54 +++ 8 files changed, 1135 insertions(+), 2 deletions(-) create mode 100644 drivers/infiniband/hw/ocrdma/ocrdma_stats.c create mode 100644 drivers/infiniband/hw/ocrdma/ocrdma_stats.h diff --git a/drivers/infiniband/hw/ocrdma/Makefile b/drivers/infiniband/hw/ocrdma/Makefile index 06a5bed12e43..d1bfd4f4cdde 100644 --- a/drivers/infiniband/hw/ocrdma/Makefile +++ b/drivers/infiniband/hw/ocrdma/Makefile @@ -2,4 +2,4 @@ ccflags-y := -Idrivers/net/ethernet/emulex/benet obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma.o -ocrdma-y := ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o +ocrdma-y := ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o ocrdma_stats.o diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 4ea8496c7107..3042c87a74ba 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -44,10 +44,17 @@ #define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" +#define OC_NAME_SH OCRDMA_NODE_DESC "(Skyhawk)" +#define OC_NAME_UNKNOWN OCRDMA_NODE_DESC "(Unknown)" + +#define OC_SKH_DEVICE_PF 0x720 +#define OC_SKH_DEVICE_VF 0x728 #define OCRDMA_MAX_AH 512 #define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME) +#define convert_to_64bit(lo, hi) ((u64)hi << 32 | (u64)lo) + struct ocrdma_dev_attr { u8 fw_ver[32]; u32 vendor_id; @@ -86,6 +93,12 @@ struct ocrdma_dev_attr { u8 num_ird_pages; }; +struct ocrdma_dma_mem { + void *va; + dma_addr_t pa; + u32 size; +}; + struct ocrdma_pbl { void *va; dma_addr_t pa; @@ -151,6 +164,26 @@ struct ocrdma_mr { struct ocrdma_hw_mr hwmr; }; +struct ocrdma_stats { + u8 type; + struct ocrdma_dev *dev; +}; + +struct stats_mem { + struct ocrdma_mqe mqe; + void *va; + dma_addr_t pa; + u32 size; + char *debugfs_mem; +}; + +struct phy_info { + u16 auto_speeds_supported; + u16 fixed_speeds_supported; + u16 phy_type; + u16 interface_type; +}; + struct ocrdma_dev { struct ib_device ibdev; struct ocrdma_dev_attr attr; @@ -194,6 +227,9 @@ struct ocrdma_dev { struct mqe_ctx mqe_ctx; struct be_dev_info nic_info; + struct phy_info phy; + char model_number[32]; + u32 hba_port_num; struct list_head entry; struct rcu_head rcu; @@ -201,6 +237,20 @@ struct ocrdma_dev { struct ocrdma_mr *stag_arr[OCRDMA_MAX_STAG]; u16 pvid; u32 asic_id; + + ulong last_stats_time; + struct mutex stats_lock; /* provide synch for debugfs operations */ + struct stats_mem stats_mem; + struct ocrdma_stats rsrc_stats; + struct ocrdma_stats rx_stats; + struct ocrdma_stats wqe_stats; + struct ocrdma_stats tx_stats; + struct ocrdma_stats db_err_stats; + struct ocrdma_stats tx_qp_err_stats; + struct ocrdma_stats rx_qp_err_stats; + struct ocrdma_stats tx_dbg_stats; + struct ocrdma_stats rx_dbg_stats; + struct dentry *dir; }; struct ocrdma_cq { @@ -434,6 +484,17 @@ static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev, return 0; } +static inline char *hca_name(struct ocrdma_dev *dev) +{ + switch (dev->nic_info.pdev->device) { + case OC_SKH_DEVICE_PF: + case OC_SKH_DEVICE_VF: + return OC_NAME_SH; + default: + return OC_NAME_UNKNOWN; + } +} + static inline int ocrdma_get_eq_table_index(struct ocrdma_dev *dev, int eqid) { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index e6797ff4c99b..5d3485846d87 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -243,6 +243,23 @@ static int ocrdma_get_mbx_errno(u32 status) return err_num; } +char *port_speed_string(struct ocrdma_dev *dev) +{ + char *str = ""; + u16 speeds_supported; + + speeds_supported = dev->phy.fixed_speeds_supported | + dev->phy.auto_speeds_supported; + if (speeds_supported & OCRDMA_PHY_SPEED_40GBPS) + str = "40Gbps "; + else if (speeds_supported & OCRDMA_PHY_SPEED_10GBPS) + str = "10Gbps "; + else if (speeds_supported & OCRDMA_PHY_SPEED_1GBPS) + str = "1Gbps "; + + return str; +} + static int ocrdma_get_mbx_cqe_errno(u16 cqe_status) { int err_num = -EINVAL; @@ -332,6 +349,11 @@ static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len) return mqe; } +static void *ocrdma_alloc_mqe(void) +{ + return kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL); +} + static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q) { dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma); @@ -1154,6 +1176,96 @@ mbx_err: return status; } +int ocrdma_mbx_rdma_stats(struct ocrdma_dev *dev, bool reset) +{ + struct ocrdma_rdma_stats_req *req = dev->stats_mem.va; + struct ocrdma_mqe *mqe = &dev->stats_mem.mqe; + struct ocrdma_rdma_stats_resp *old_stats = NULL; + int status; + + old_stats = kzalloc(sizeof(*old_stats), GFP_KERNEL); + if (old_stats == NULL) + return -ENOMEM; + + memset(mqe, 0, sizeof(*mqe)); + mqe->hdr.pyld_len = dev->stats_mem.size; + mqe->hdr.spcl_sge_cnt_emb |= + (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) & + OCRDMA_MQE_HDR_SGE_CNT_MASK; + mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dev->stats_mem.pa & 0xffffffff); + mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dev->stats_mem.pa); + mqe->u.nonemb_req.sge[0].len = dev->stats_mem.size; + + /* Cache the old stats */ + memcpy(old_stats, req, sizeof(struct ocrdma_rdma_stats_resp)); + memset(req, 0, dev->stats_mem.size); + + ocrdma_init_mch((struct ocrdma_mbx_hdr *)req, + OCRDMA_CMD_GET_RDMA_STATS, + OCRDMA_SUBSYS_ROCE, + dev->stats_mem.size); + if (reset) + req->reset_stats = reset; + + status = ocrdma_nonemb_mbx_cmd(dev, mqe, dev->stats_mem.va); + if (status) + /* Copy from cache, if mbox fails */ + memcpy(req, old_stats, sizeof(struct ocrdma_rdma_stats_resp)); + else + ocrdma_le32_to_cpu(req, dev->stats_mem.size); + + kfree(old_stats); + return status; +} + +static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) +{ + int status = -ENOMEM; + struct ocrdma_dma_mem dma; + struct ocrdma_mqe *mqe; + struct ocrdma_get_ctrl_attribs_rsp *ctrl_attr_rsp; + struct mgmt_hba_attribs *hba_attribs; + + mqe = ocrdma_alloc_mqe(); + if (!mqe) + return status; + memset(mqe, 0, sizeof(*mqe)); + + dma.size = sizeof(struct ocrdma_get_ctrl_attribs_rsp); + dma.va = dma_alloc_coherent(&dev->nic_info.pdev->dev, + dma.size, &dma.pa, GFP_KERNEL); + if (!dma.va) + goto free_mqe; + + mqe->hdr.pyld_len = dma.size; + mqe->hdr.spcl_sge_cnt_emb |= + (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) & + OCRDMA_MQE_HDR_SGE_CNT_MASK; + mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dma.pa & 0xffffffff); + mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dma.pa); + mqe->u.nonemb_req.sge[0].len = dma.size; + + memset(dma.va, 0, dma.size); + ocrdma_init_mch((struct ocrdma_mbx_hdr *)dma.va, + OCRDMA_CMD_GET_CTRL_ATTRIBUTES, + OCRDMA_SUBSYS_COMMON, + dma.size); + + status = ocrdma_nonemb_mbx_cmd(dev, mqe, dma.va); + if (!status) { + ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va; + hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs; + + dev->hba_port_num = hba_attribs->phy_port; + strncpy(dev->model_number, + hba_attribs->controller_model_number, 31); + } + dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa); +free_mqe: + kfree(mqe); + return status; +} + static int ocrdma_mbx_query_dev(struct ocrdma_dev *dev) { int status = -ENOMEM; @@ -1201,6 +1313,35 @@ mbx_err: return status; } +static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev) +{ + int status = -ENOMEM; + struct ocrdma_mqe *cmd; + struct ocrdma_get_phy_info_rsp *rsp; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_PHY_DETAILS, sizeof(*cmd)); + if (!cmd) + return status; + + ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0], + OCRDMA_CMD_PHY_DETAILS, OCRDMA_SUBSYS_COMMON, + sizeof(*cmd)); + + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + + rsp = (struct ocrdma_get_phy_info_rsp *)cmd; + dev->phy.phy_type = le16_to_cpu(rsp->phy_type); + dev->phy.auto_speeds_supported = + le16_to_cpu(rsp->auto_speeds_supported); + dev->phy.fixed_speeds_supported = + le16_to_cpu(rsp->fixed_speeds_supported); +mbx_err: + kfree(cmd); + return status; +} + int ocrdma_mbx_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd) { int status = -ENOMEM; @@ -2570,6 +2711,13 @@ int ocrdma_init_hw(struct ocrdma_dev *dev) status = ocrdma_mbx_create_ah_tbl(dev); if (status) goto conf_err; + status = ocrdma_mbx_get_phy_info(dev); + if (status) + goto conf_err; + status = ocrdma_mbx_get_ctrl_attribs(dev); + if (status) + goto conf_err; + return 0; conf_err: diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index 77da536d5d96..e513f7293142 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -133,4 +133,6 @@ bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *); void ocrdma_flush_qp(struct ocrdma_qp *); int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq); +int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset); +char *port_speed_string(struct ocrdma_dev *dev); #endif /* __OCRDMA_HW_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index ae17a3670bcd..7d18b3ad21e7 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -39,6 +39,7 @@ #include "ocrdma_ah.h" #include "be_roce.h" #include "ocrdma_hw.h" +#include "ocrdma_stats.h" #include "ocrdma_abi.h" MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION); @@ -372,6 +373,15 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) spin_lock(&ocrdma_devlist_lock); list_add_tail_rcu(&dev->entry, &ocrdma_dev_list); spin_unlock(&ocrdma_devlist_lock); + /* Init stats */ + ocrdma_add_port_stats(dev); + + pr_info("%s %s: %s \"%s\" port %d\n", + dev_name(&dev->nic_info.pdev->dev), hca_name(dev), + port_speed_string(dev), dev->model_number, + dev->hba_port_num); + pr_info("%s ocrdma%d driver loaded successfully\n", + dev_name(&dev->nic_info.pdev->dev), dev->id); return dev; alloc_err: @@ -400,6 +410,7 @@ static void ocrdma_remove(struct ocrdma_dev *dev) /* first unregister with stack to stop all the active traffic * of the registered clients. */ + ocrdma_rem_port_stats(dev); ib_unregister_device(&dev->ibdev); spin_lock(&ocrdma_devlist_lock); @@ -492,6 +503,8 @@ static int __init ocrdma_init_module(void) { int status; + ocrdma_init_debugfs(); + status = register_inetaddr_notifier(&ocrdma_inetaddr_notifier); if (status) return status; @@ -513,6 +526,7 @@ static void __exit ocrdma_exit_module(void) { be_roce_unregister_driver(&ocrdma_drv); ocrdma_unregister_inet6addr_notifier(); + ocrdma_rem_debugfs(); } module_init(ocrdma_init_module); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index de4ebfc4e0e2..6e048b7283c4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -72,6 +72,7 @@ enum { OCRDMA_CMD_ATTACH_MCAST, OCRDMA_CMD_DETACH_MCAST, + OCRDMA_CMD_GET_RDMA_STATS, OCRDMA_CMD_MAX }; @@ -82,12 +83,14 @@ enum { OCRDMA_CMD_CREATE_CQ = 12, OCRDMA_CMD_CREATE_EQ = 13, OCRDMA_CMD_CREATE_MQ = 21, + OCRDMA_CMD_GET_CTRL_ATTRIBUTES = 32, OCRDMA_CMD_GET_FW_VER = 35, OCRDMA_CMD_DELETE_MQ = 53, OCRDMA_CMD_DELETE_CQ = 54, OCRDMA_CMD_DELETE_EQ = 55, OCRDMA_CMD_GET_FW_CONFIG = 58, - OCRDMA_CMD_CREATE_MQ_EXT = 90 + OCRDMA_CMD_CREATE_MQ_EXT = 90, + OCRDMA_CMD_PHY_DETAILS = 102 }; enum { @@ -578,6 +581,30 @@ enum { OCRDMA_FN_MODE_RDMA = 0x4 }; +struct ocrdma_get_phy_info_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + + u16 phy_type; + u16 interface_type; + u32 misc_params; + u16 ext_phy_details; + u16 rsvd; + u16 auto_speeds_supported; + u16 fixed_speeds_supported; + u32 future_use[2]; +}; + +enum { + OCRDMA_PHY_SPEED_ZERO = 0x0, + OCRDMA_PHY_SPEED_10MBPS = 0x1, + OCRDMA_PHY_SPEED_100MBPS = 0x2, + OCRDMA_PHY_SPEED_1GBPS = 0x4, + OCRDMA_PHY_SPEED_10GBPS = 0x8, + OCRDMA_PHY_SPEED_40GBPS = 0x20 +}; + + struct ocrdma_get_link_speed_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; @@ -1719,4 +1746,208 @@ struct ocrdma_av { u32 valid; } __packed; +struct ocrdma_rsrc_stats { + u32 dpp_pds; + u32 non_dpp_pds; + u32 rc_dpp_qps; + u32 uc_dpp_qps; + u32 ud_dpp_qps; + u32 rc_non_dpp_qps; + u32 rsvd; + u32 uc_non_dpp_qps; + u32 ud_non_dpp_qps; + u32 rsvd1; + u32 srqs; + u32 rbqs; + u32 r64K_nsmr; + u32 r64K_to_2M_nsmr; + u32 r2M_to_44M_nsmr; + u32 r44M_to_1G_nsmr; + u32 r1G_to_4G_nsmr; + u32 nsmr_count_4G_to_32G; + u32 r32G_to_64G_nsmr; + u32 r64G_to_128G_nsmr; + u32 r128G_to_higher_nsmr; + u32 embedded_nsmr; + u32 frmr; + u32 prefetch_qps; + u32 ondemand_qps; + u32 phy_mr; + u32 mw; + u32 rsvd2[7]; +}; + +struct ocrdma_db_err_stats { + u32 sq_doorbell_errors; + u32 cq_doorbell_errors; + u32 rq_srq_doorbell_errors; + u32 cq_overflow_errors; + u32 rsvd[4]; +}; + +struct ocrdma_wqe_stats { + u32 large_send_rc_wqes_lo; + u32 large_send_rc_wqes_hi; + u32 large_write_rc_wqes_lo; + u32 large_write_rc_wqes_hi; + u32 rsvd[4]; + u32 read_wqes_lo; + u32 read_wqes_hi; + u32 frmr_wqes_lo; + u32 frmr_wqes_hi; + u32 mw_bind_wqes_lo; + u32 mw_bind_wqes_hi; + u32 invalidate_wqes_lo; + u32 invalidate_wqes_hi; + u32 rsvd1[2]; + u32 dpp_wqe_drops; + u32 rsvd2[5]; +}; + +struct ocrdma_tx_stats { + u32 send_pkts_lo; + u32 send_pkts_hi; + u32 write_pkts_lo; + u32 write_pkts_hi; + u32 read_pkts_lo; + u32 read_pkts_hi; + u32 read_rsp_pkts_lo; + u32 read_rsp_pkts_hi; + u32 ack_pkts_lo; + u32 ack_pkts_hi; + u32 send_bytes_lo; + u32 send_bytes_hi; + u32 write_bytes_lo; + u32 write_bytes_hi; + u32 read_req_bytes_lo; + u32 read_req_bytes_hi; + u32 read_rsp_bytes_lo; + u32 read_rsp_bytes_hi; + u32 ack_timeouts; + u32 rsvd[5]; +}; + + +struct ocrdma_tx_qp_err_stats { + u32 local_length_errors; + u32 local_protection_errors; + u32 local_qp_operation_errors; + u32 retry_count_exceeded_errors; + u32 rnr_retry_count_exceeded_errors; + u32 rsvd[3]; +}; + +struct ocrdma_rx_stats { + u32 roce_frame_bytes_lo; + u32 roce_frame_bytes_hi; + u32 roce_frame_icrc_drops; + u32 roce_frame_payload_len_drops; + u32 ud_drops; + u32 qp1_drops; + u32 psn_error_request_packets; + u32 psn_error_resp_packets; + u32 rnr_nak_timeouts; + u32 rnr_nak_receives; + u32 roce_frame_rxmt_drops; + u32 nak_count_psn_sequence_errors; + u32 rc_drop_count_lookup_errors; + u32 rq_rnr_naks; + u32 srq_rnr_naks; + u32 roce_frames_lo; + u32 roce_frames_hi; + u32 rsvd; +}; + +struct ocrdma_rx_qp_err_stats { + u32 nak_invalid_requst_errors; + u32 nak_remote_operation_errors; + u32 nak_count_remote_access_errors; + u32 local_length_errors; + u32 local_protection_errors; + u32 local_qp_operation_errors; + u32 rsvd[2]; +}; + +struct ocrdma_tx_dbg_stats { + u32 data[100]; +}; + +struct ocrdma_rx_dbg_stats { + u32 data[200]; +}; + +struct ocrdma_rdma_stats_req { + struct ocrdma_mbx_hdr hdr; + u8 reset_stats; + u8 rsvd[3]; +} __packed; + +struct ocrdma_rdma_stats_resp { + struct ocrdma_mbx_hdr hdr; + struct ocrdma_rsrc_stats act_rsrc_stats; + struct ocrdma_rsrc_stats th_rsrc_stats; + struct ocrdma_db_err_stats db_err_stats; + struct ocrdma_wqe_stats wqe_stats; + struct ocrdma_tx_stats tx_stats; + struct ocrdma_tx_qp_err_stats tx_qp_err_stats; + struct ocrdma_rx_stats rx_stats; + struct ocrdma_rx_qp_err_stats rx_qp_err_stats; + struct ocrdma_tx_dbg_stats tx_dbg_stats; + struct ocrdma_rx_dbg_stats rx_dbg_stats; +} __packed; + + +struct mgmt_hba_attribs { + u8 flashrom_version_string[32]; + u8 manufacturer_name[32]; + u32 supported_modes; + u32 rsvd0[3]; + u8 ncsi_ver_string[12]; + u32 default_extended_timeout; + u8 controller_model_number[32]; + u8 controller_description[64]; + u8 controller_serial_number[32]; + u8 ip_version_string[32]; + u8 firmware_version_string[32]; + u8 bios_version_string[32]; + u8 redboot_version_string[32]; + u8 driver_version_string[32]; + u8 fw_on_flash_version_string[32]; + u32 functionalities_supported; + u16 max_cdblength; + u8 asic_revision; + u8 generational_guid[16]; + u8 hba_port_count; + u16 default_link_down_timeout; + u8 iscsi_ver_min_max; + u8 multifunction_device; + u8 cache_valid; + u8 hba_status; + u8 max_domains_supported; + u8 phy_port; + u32 firmware_post_status; + u32 hba_mtu[8]; + u32 rsvd1[4]; +}; + +struct mgmt_controller_attrib { + struct mgmt_hba_attribs hba_attribs; + u16 pci_vendor_id; + u16 pci_device_id; + u16 pci_sub_vendor_id; + u16 pci_sub_system_id; + u8 pci_bus_number; + u8 pci_device_number; + u8 pci_function_number; + u8 interface_type; + u64 unique_identifier; + u32 rsvd0[5]; +}; + +struct ocrdma_get_ctrl_attribs_rsp { + struct ocrdma_mbx_hdr hdr; + struct mgmt_controller_attrib ctrl_attribs; +}; + + #endif /* __OCRDMA_SLI_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c new file mode 100644 index 000000000000..6c54106f5e64 --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -0,0 +1,623 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2014 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#include +#include "ocrdma_stats.h" + +static struct dentry *ocrdma_dbgfs_dir; + +static int ocrdma_add_stat(char *start, char *pcur, + char *name, u64 count) +{ + char buff[128] = {0}; + int cpy_len = 0; + + snprintf(buff, 128, "%s: %llu\n", name, count); + cpy_len = strlen(buff); + + if (pcur + cpy_len > start + OCRDMA_MAX_DBGFS_MEM) { + pr_err("%s: No space in stats buff\n", __func__); + return 0; + } + + memcpy(pcur, buff, cpy_len); + return cpy_len; +} + +static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev) +{ + struct stats_mem *mem = &dev->stats_mem; + + /* Alloc mbox command mem*/ + mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req), + sizeof(struct ocrdma_rdma_stats_resp)); + + mem->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, mem->size, + &mem->pa, GFP_KERNEL); + if (!mem->va) { + pr_err("%s: stats mbox allocation failed\n", __func__); + return false; + } + + memset(mem->va, 0, mem->size); + + /* Alloc debugfs mem */ + mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL); + if (!mem->debugfs_mem) { + pr_err("%s: stats debugfs mem allocation failed\n", __func__); + return false; + } + + return true; +} + +static void ocrdma_release_stats_mem(struct ocrdma_dev *dev) +{ + struct stats_mem *mem = &dev->stats_mem; + + if (mem->va) + dma_free_coherent(&dev->nic_info.pdev->dev, mem->size, + mem->va, mem->pa); + kfree(mem->debugfs_mem); +} + +static char *ocrdma_resource_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "active_dpp_pds", + (u64)rsrc_stats->dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "active_non_dpp_pds", + (u64)rsrc_stats->non_dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "active_rc_dpp_qps", + (u64)rsrc_stats->rc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_uc_dpp_qps", + (u64)rsrc_stats->uc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_ud_dpp_qps", + (u64)rsrc_stats->ud_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_rc_non_dpp_qps", + (u64)rsrc_stats->rc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_uc_non_dpp_qps", + (u64)rsrc_stats->uc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_ud_non_dpp_qps", + (u64)rsrc_stats->ud_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_srqs", + (u64)rsrc_stats->srqs); + pcur += ocrdma_add_stat(stats, pcur, "active_rbqs", + (u64)rsrc_stats->rbqs); + pcur += ocrdma_add_stat(stats, pcur, "active_64K_nsmr", + (u64)rsrc_stats->r64K_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_64K_to_2M_nsmr", + (u64)rsrc_stats->r64K_to_2M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_2M_to_44M_nsmr", + (u64)rsrc_stats->r2M_to_44M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_44M_to_1G_nsmr", + (u64)rsrc_stats->r44M_to_1G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_1G_to_4G_nsmr", + (u64)rsrc_stats->r1G_to_4G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_nsmr_count_4G_to_32G", + (u64)rsrc_stats->nsmr_count_4G_to_32G); + pcur += ocrdma_add_stat(stats, pcur, "active_32G_to_64G_nsmr", + (u64)rsrc_stats->r32G_to_64G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_64G_to_128G_nsmr", + (u64)rsrc_stats->r64G_to_128G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_128G_to_higher_nsmr", + (u64)rsrc_stats->r128G_to_higher_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_embedded_nsmr", + (u64)rsrc_stats->embedded_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_frmr", + (u64)rsrc_stats->frmr); + pcur += ocrdma_add_stat(stats, pcur, "active_prefetch_qps", + (u64)rsrc_stats->prefetch_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_ondemand_qps", + (u64)rsrc_stats->ondemand_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_phy_mr", + (u64)rsrc_stats->phy_mr); + pcur += ocrdma_add_stat(stats, pcur, "active_mw", + (u64)rsrc_stats->mw); + + /* Print the threshold stats */ + rsrc_stats = &rdma_stats->th_rsrc_stats; + + pcur += ocrdma_add_stat(stats, pcur, "threshold_dpp_pds", + (u64)rsrc_stats->dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "threshold_non_dpp_pds", + (u64)rsrc_stats->non_dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_dpp_qps", + (u64)rsrc_stats->rc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_dpp_qps", + (u64)rsrc_stats->uc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_dpp_qps", + (u64)rsrc_stats->ud_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_non_dpp_qps", + (u64)rsrc_stats->rc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_non_dpp_qps", + (u64)rsrc_stats->uc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_non_dpp_qps", + (u64)rsrc_stats->ud_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_srqs", + (u64)rsrc_stats->srqs); + pcur += ocrdma_add_stat(stats, pcur, "threshold_rbqs", + (u64)rsrc_stats->rbqs); + pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_nsmr", + (u64)rsrc_stats->r64K_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_to_2M_nsmr", + (u64)rsrc_stats->r64K_to_2M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_2M_to_44M_nsmr", + (u64)rsrc_stats->r2M_to_44M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_44M_to_1G_nsmr", + (u64)rsrc_stats->r44M_to_1G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_1G_to_4G_nsmr", + (u64)rsrc_stats->r1G_to_4G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_nsmr_count_4G_to_32G", + (u64)rsrc_stats->nsmr_count_4G_to_32G); + pcur += ocrdma_add_stat(stats, pcur, "threshold_32G_to_64G_nsmr", + (u64)rsrc_stats->r32G_to_64G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_64G_to_128G_nsmr", + (u64)rsrc_stats->r64G_to_128G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_128G_to_higher_nsmr", + (u64)rsrc_stats->r128G_to_higher_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_embedded_nsmr", + (u64)rsrc_stats->embedded_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_frmr", + (u64)rsrc_stats->frmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_prefetch_qps", + (u64)rsrc_stats->prefetch_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_ondemand_qps", + (u64)rsrc_stats->ondemand_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_phy_mr", + (u64)rsrc_stats->phy_mr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_mw", + (u64)rsrc_stats->mw); + return stats; +} + +static char *ocrdma_rx_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rx_stats *rx_stats = &rdma_stats->rx_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat + (stats, pcur, "roce_frame_bytes", + convert_to_64bit(rx_stats->roce_frame_bytes_lo, + rx_stats->roce_frame_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "roce_frame_icrc_drops", + (u64)rx_stats->roce_frame_icrc_drops); + pcur += ocrdma_add_stat(stats, pcur, "roce_frame_payload_len_drops", + (u64)rx_stats->roce_frame_payload_len_drops); + pcur += ocrdma_add_stat(stats, pcur, "ud_drops", + (u64)rx_stats->ud_drops); + pcur += ocrdma_add_stat(stats, pcur, "qp1_drops", + (u64)rx_stats->qp1_drops); + pcur += ocrdma_add_stat(stats, pcur, "psn_error_request_packets", + (u64)rx_stats->psn_error_request_packets); + pcur += ocrdma_add_stat(stats, pcur, "psn_error_resp_packets", + (u64)rx_stats->psn_error_resp_packets); + pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_timeouts", + (u64)rx_stats->rnr_nak_timeouts); + pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_receives", + (u64)rx_stats->rnr_nak_receives); + pcur += ocrdma_add_stat(stats, pcur, "roce_frame_rxmt_drops", + (u64)rx_stats->roce_frame_rxmt_drops); + pcur += ocrdma_add_stat(stats, pcur, "nak_count_psn_sequence_errors", + (u64)rx_stats->nak_count_psn_sequence_errors); + pcur += ocrdma_add_stat(stats, pcur, "rc_drop_count_lookup_errors", + (u64)rx_stats->rc_drop_count_lookup_errors); + pcur += ocrdma_add_stat(stats, pcur, "rq_rnr_naks", + (u64)rx_stats->rq_rnr_naks); + pcur += ocrdma_add_stat(stats, pcur, "srq_rnr_naks", + (u64)rx_stats->srq_rnr_naks); + pcur += ocrdma_add_stat(stats, pcur, "roce_frames", + convert_to_64bit(rx_stats->roce_frames_lo, + rx_stats->roce_frames_hi)); + + return stats; +} + +static char *ocrdma_tx_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_tx_stats *tx_stats = &rdma_stats->tx_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "send_pkts", + convert_to_64bit(tx_stats->send_pkts_lo, + tx_stats->send_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "write_pkts", + convert_to_64bit(tx_stats->write_pkts_lo, + tx_stats->write_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_pkts", + convert_to_64bit(tx_stats->read_pkts_lo, + tx_stats->read_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_rsp_pkts", + convert_to_64bit(tx_stats->read_rsp_pkts_lo, + tx_stats->read_rsp_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "ack_pkts", + convert_to_64bit(tx_stats->ack_pkts_lo, + tx_stats->ack_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "send_bytes", + convert_to_64bit(tx_stats->send_bytes_lo, + tx_stats->send_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "write_bytes", + convert_to_64bit(tx_stats->write_bytes_lo, + tx_stats->write_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_req_bytes", + convert_to_64bit(tx_stats->read_req_bytes_lo, + tx_stats->read_req_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_rsp_bytes", + convert_to_64bit(tx_stats->read_rsp_bytes_lo, + tx_stats->read_rsp_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "ack_timeouts", + (u64)tx_stats->ack_timeouts); + + return stats; +} + +static char *ocrdma_wqe_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_wqe_stats *wqe_stats = &rdma_stats->wqe_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "large_send_rc_wqes", + convert_to_64bit(wqe_stats->large_send_rc_wqes_lo, + wqe_stats->large_send_rc_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "large_write_rc_wqes", + convert_to_64bit(wqe_stats->large_write_rc_wqes_lo, + wqe_stats->large_write_rc_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_wqes", + convert_to_64bit(wqe_stats->read_wqes_lo, + wqe_stats->read_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "frmr_wqes", + convert_to_64bit(wqe_stats->frmr_wqes_lo, + wqe_stats->frmr_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "mw_bind_wqes", + convert_to_64bit(wqe_stats->mw_bind_wqes_lo, + wqe_stats->mw_bind_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "invalidate_wqes", + convert_to_64bit(wqe_stats->invalidate_wqes_lo, + wqe_stats->invalidate_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "dpp_wqe_drops", + (u64)wqe_stats->dpp_wqe_drops); + return stats; +} + +static char *ocrdma_db_errstats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_db_err_stats *db_err_stats = &rdma_stats->db_err_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "sq_doorbell_errors", + (u64)db_err_stats->sq_doorbell_errors); + pcur += ocrdma_add_stat(stats, pcur, "cq_doorbell_errors", + (u64)db_err_stats->cq_doorbell_errors); + pcur += ocrdma_add_stat(stats, pcur, "rq_srq_doorbell_errors", + (u64)db_err_stats->rq_srq_doorbell_errors); + pcur += ocrdma_add_stat(stats, pcur, "cq_overflow_errors", + (u64)db_err_stats->cq_overflow_errors); + return stats; +} + +static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rx_qp_err_stats *rx_qp_err_stats = + &rdma_stats->rx_qp_err_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors", + (u64)rx_qp_err_stats->nak_invalid_requst_errors); + pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors", + (u64)rx_qp_err_stats->nak_remote_operation_errors); + pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors", + (u64)rx_qp_err_stats->nak_count_remote_access_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_length_errors", + (u64)rx_qp_err_stats->local_length_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors", + (u64)rx_qp_err_stats->local_protection_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors", + (u64)rx_qp_err_stats->local_qp_operation_errors); + return stats; +} + +static char *ocrdma_txqp_errstats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_tx_qp_err_stats *tx_qp_err_stats = + &rdma_stats->tx_qp_err_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "local_length_errors", + (u64)tx_qp_err_stats->local_length_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors", + (u64)tx_qp_err_stats->local_protection_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors", + (u64)tx_qp_err_stats->local_qp_operation_errors); + pcur += ocrdma_add_stat(stats, pcur, "retry_count_exceeded_errors", + (u64)tx_qp_err_stats->retry_count_exceeded_errors); + pcur += ocrdma_add_stat(stats, pcur, "rnr_retry_count_exceeded_errors", + (u64)tx_qp_err_stats->rnr_retry_count_exceeded_errors); + return stats; +} + +static char *ocrdma_tx_dbg_stats(struct ocrdma_dev *dev) +{ + int i; + char *pstats = dev->stats_mem.debugfs_mem; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_tx_dbg_stats *tx_dbg_stats = + &rdma_stats->tx_dbg_stats; + + memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + for (i = 0; i < 100; i++) + pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i, + tx_dbg_stats->data[i]); + + return dev->stats_mem.debugfs_mem; +} + +static char *ocrdma_rx_dbg_stats(struct ocrdma_dev *dev) +{ + int i; + char *pstats = dev->stats_mem.debugfs_mem; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rx_dbg_stats *rx_dbg_stats = + &rdma_stats->rx_dbg_stats; + + memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + for (i = 0; i < 200; i++) + pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i, + rx_dbg_stats->data[i]); + + return dev->stats_mem.debugfs_mem; +} + +static void ocrdma_update_stats(struct ocrdma_dev *dev) +{ + ulong now = jiffies, secs; + int status = 0; + + secs = jiffies_to_msecs(now - dev->last_stats_time) / 1000U; + if (secs) { + /* update */ + status = ocrdma_mbx_rdma_stats(dev, false); + if (status) + pr_err("%s: stats mbox failed with status = %d\n", + __func__, status); + dev->last_stats_time = jiffies; + } +} + +static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer, + size_t usr_buf_len, loff_t *ppos) +{ + struct ocrdma_stats *pstats = filp->private_data; + struct ocrdma_dev *dev = pstats->dev; + ssize_t status = 0; + char *data = NULL; + + /* No partial reads */ + if (*ppos != 0) + return 0; + + mutex_lock(&dev->stats_lock); + + ocrdma_update_stats(dev); + + switch (pstats->type) { + case OCRDMA_RSRC_STATS: + data = ocrdma_resource_stats(dev); + break; + case OCRDMA_RXSTATS: + data = ocrdma_rx_stats(dev); + break; + case OCRDMA_WQESTATS: + data = ocrdma_wqe_stats(dev); + break; + case OCRDMA_TXSTATS: + data = ocrdma_tx_stats(dev); + break; + case OCRDMA_DB_ERRSTATS: + data = ocrdma_db_errstats(dev); + break; + case OCRDMA_RXQP_ERRSTATS: + data = ocrdma_rxqp_errstats(dev); + break; + case OCRDMA_TXQP_ERRSTATS: + data = ocrdma_txqp_errstats(dev); + break; + case OCRDMA_TX_DBG_STATS: + data = ocrdma_tx_dbg_stats(dev); + break; + case OCRDMA_RX_DBG_STATS: + data = ocrdma_rx_dbg_stats(dev); + break; + + default: + status = -EFAULT; + goto exit; + } + + if (usr_buf_len < strlen(data)) { + status = -ENOSPC; + goto exit; + } + + status = simple_read_from_buffer(buffer, usr_buf_len, ppos, data, + strlen(data)); +exit: + mutex_unlock(&dev->stats_lock); + return status; +} + +static int ocrdma_debugfs_open(struct inode *inode, struct file *file) +{ + if (inode->i_private) + file->private_data = inode->i_private; + return 0; +} + +static const struct file_operations ocrdma_dbg_ops = { + .owner = THIS_MODULE, + .open = ocrdma_debugfs_open, + .read = ocrdma_dbgfs_ops_read, +}; + +void ocrdma_add_port_stats(struct ocrdma_dev *dev) +{ + if (!ocrdma_dbgfs_dir) + return; + + /* Create post stats base dir */ + dev->dir = debugfs_create_dir(dev->ibdev.name, ocrdma_dbgfs_dir); + if (!dev->dir) + goto err; + + dev->rsrc_stats.type = OCRDMA_RSRC_STATS; + dev->rsrc_stats.dev = dev; + if (!debugfs_create_file("resource_stats", S_IRUSR, dev->dir, + &dev->rsrc_stats, &ocrdma_dbg_ops)) + goto err; + + dev->rx_stats.type = OCRDMA_RXSTATS; + dev->rx_stats.dev = dev; + if (!debugfs_create_file("rx_stats", S_IRUSR, dev->dir, + &dev->rx_stats, &ocrdma_dbg_ops)) + goto err; + + dev->wqe_stats.type = OCRDMA_WQESTATS; + dev->wqe_stats.dev = dev; + if (!debugfs_create_file("wqe_stats", S_IRUSR, dev->dir, + &dev->wqe_stats, &ocrdma_dbg_ops)) + goto err; + + dev->tx_stats.type = OCRDMA_TXSTATS; + dev->tx_stats.dev = dev; + if (!debugfs_create_file("tx_stats", S_IRUSR, dev->dir, + &dev->tx_stats, &ocrdma_dbg_ops)) + goto err; + + dev->db_err_stats.type = OCRDMA_DB_ERRSTATS; + dev->db_err_stats.dev = dev; + if (!debugfs_create_file("db_err_stats", S_IRUSR, dev->dir, + &dev->db_err_stats, &ocrdma_dbg_ops)) + goto err; + + + dev->tx_qp_err_stats.type = OCRDMA_TXQP_ERRSTATS; + dev->tx_qp_err_stats.dev = dev; + if (!debugfs_create_file("tx_qp_err_stats", S_IRUSR, dev->dir, + &dev->tx_qp_err_stats, &ocrdma_dbg_ops)) + goto err; + + dev->rx_qp_err_stats.type = OCRDMA_RXQP_ERRSTATS; + dev->rx_qp_err_stats.dev = dev; + if (!debugfs_create_file("rx_qp_err_stats", S_IRUSR, dev->dir, + &dev->rx_qp_err_stats, &ocrdma_dbg_ops)) + goto err; + + + dev->tx_dbg_stats.type = OCRDMA_TX_DBG_STATS; + dev->tx_dbg_stats.dev = dev; + if (!debugfs_create_file("tx_dbg_stats", S_IRUSR, dev->dir, + &dev->tx_dbg_stats, &ocrdma_dbg_ops)) + goto err; + + dev->rx_dbg_stats.type = OCRDMA_RX_DBG_STATS; + dev->rx_dbg_stats.dev = dev; + if (!debugfs_create_file("rx_dbg_stats", S_IRUSR, dev->dir, + &dev->rx_dbg_stats, &ocrdma_dbg_ops)) + goto err; + + /* Now create dma_mem for stats mbx command */ + if (!ocrdma_alloc_stats_mem(dev)) + goto err; + + mutex_init(&dev->stats_lock); + + return; +err: + ocrdma_release_stats_mem(dev); + debugfs_remove_recursive(dev->dir); + dev->dir = NULL; +} + +void ocrdma_rem_port_stats(struct ocrdma_dev *dev) +{ + if (!dev->dir) + return; + mutex_destroy(&dev->stats_lock); + ocrdma_release_stats_mem(dev); + debugfs_remove(dev->dir); +} + +void ocrdma_init_debugfs(void) +{ + /* Create base dir in debugfs root dir */ + ocrdma_dbgfs_dir = debugfs_create_dir("ocrdma", NULL); +} + +void ocrdma_rem_debugfs(void) +{ + debugfs_remove_recursive(ocrdma_dbgfs_dir); +} diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h new file mode 100644 index 000000000000..5f5e20c46d7c --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h @@ -0,0 +1,54 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2014 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#ifndef __OCRDMA_STATS_H__ +#define __OCRDMA_STATS_H__ + +#include +#include "ocrdma.h" +#include "ocrdma_hw.h" + +#define OCRDMA_MAX_DBGFS_MEM 4096 + +enum OCRDMA_STATS_TYPE { + OCRDMA_RSRC_STATS, + OCRDMA_RXSTATS, + OCRDMA_WQESTATS, + OCRDMA_TXSTATS, + OCRDMA_DB_ERRSTATS, + OCRDMA_RXQP_ERRSTATS, + OCRDMA_TXQP_ERRSTATS, + OCRDMA_TX_DBG_STATS, + OCRDMA_RX_DBG_STATS +}; + +void ocrdma_rem_debugfs(void); +void ocrdma_init_debugfs(void); +void ocrdma_rem_port_stats(struct ocrdma_dev *dev); +void ocrdma_add_port_stats(struct ocrdma_dev *dev); + +#endif /* __OCRDMA_STATS_H__ */ -- cgit v1.2.3 From 334b8db3a6c57bec78bcaacef3b1658bfc6e2efe Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 4 Feb 2014 11:57:09 +0530 Subject: RDMA/ocrdma: Display FW version Adding a sysfs file for getting the FW version. Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 42 +++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 7d18b3ad21e7..61248b99f98f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -339,9 +339,42 @@ static void ocrdma_free_resources(struct ocrdma_dev *dev) kfree(dev->sgid_tbl); } +/* OCRDMA sysfs interface */ +static ssize_t show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ocrdma_dev *dev = dev_get_drvdata(device); + + return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); +} + +static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ocrdma_dev *dev = dev_get_drvdata(device); + + return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]); +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); + +static struct device_attribute *ocrdma_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_fw_ver +}; + +static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++) + device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]); +} + static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) { - int status = 0; + int status = 0, i; struct ocrdma_dev *dev; dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev)); @@ -370,6 +403,9 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) if (status) goto alloc_err; + for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++) + if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i])) + goto sysfs_err; spin_lock(&ocrdma_devlist_lock); list_add_tail_rcu(&dev->entry, &ocrdma_dev_list); spin_unlock(&ocrdma_devlist_lock); @@ -384,6 +420,8 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) dev_name(&dev->nic_info.pdev->dev), dev->id); return dev; +sysfs_err: + ocrdma_remove_sysfiles(dev); alloc_err: ocrdma_free_resources(dev); ocrdma_cleanup_hw(dev); @@ -411,6 +449,8 @@ static void ocrdma_remove(struct ocrdma_dev *dev) * of the registered clients. */ ocrdma_rem_port_stats(dev); + ocrdma_remove_sysfiles(dev); + ib_unregister_device(&dev->ibdev); spin_lock(&ocrdma_devlist_lock); -- cgit v1.2.3 From fad51b7d361b4f1b439aa10fa258a35dac62b297 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:57:10 +0530 Subject: RDMA/ocrdma: Code clean-up Clean up code. Also modifying GSI QP to error during ocrdma_close is fixed. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 6 ++---- drivers/infiniband/hw/ocrdma/ocrdma_abi.h | 4 +--- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 20 ++++++++------------ drivers/infiniband/hw/ocrdma/ocrdma_main.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 16 ++++++++-------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 21 +++------------------ 7 files changed, 24 insertions(+), 47 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 3042c87a74ba..19011dbb930f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -35,6 +35,7 @@ #include #include +#include #include #include "ocrdma_sli.h" @@ -234,7 +235,7 @@ struct ocrdma_dev { struct list_head entry; struct rcu_head rcu; int id; - struct ocrdma_mr *stag_arr[OCRDMA_MAX_STAG]; + u64 stag_arr[OCRDMA_MAX_STAG]; u16 pvid; u32 asic_id; @@ -287,7 +288,6 @@ struct ocrdma_cq { struct ocrdma_pd { struct ib_pd ibpd; - struct ocrdma_dev *dev; struct ocrdma_ucontext *uctx; u32 id; int num_dpp_qp; @@ -372,10 +372,8 @@ struct ocrdma_qp { bool dpp_enabled; u8 *ird_q_va; bool signaled; - u16 db_cache; }; - struct ocrdma_ucontext { struct ib_ucontext ibucontext; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h index 5a82ce535181..1554cca5712a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h @@ -108,9 +108,7 @@ struct ocrdma_create_qp_uresp { u32 db_sq_offset; u32 db_rq_offset; u32 db_shift; - u64 rsvd1; - u64 rsvd2; - u64 rsvd3; + u64 rsvd[11]; } __packed; struct ocrdma_create_srq_uresp { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 34071143006e..d4cc01f10c01 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -100,7 +100,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) if (!(attr->ah_flags & IB_AH_GRH)) return ERR_PTR(-EINVAL); - ah = kzalloc(sizeof *ah, GFP_ATOMIC); + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 5d3485846d87..3bbf2010a821 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -32,7 +32,6 @@ #include #include -#include #include "ocrdma.h" #include "ocrdma_hw.h" @@ -386,8 +385,8 @@ static void ocrdma_build_q_pages(struct ocrdma_pa *q_pa, int cnt, } } -static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q, - int queue_type) +static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, + struct ocrdma_queue_info *q, int queue_type) { u8 opcode = 0; int status; @@ -778,7 +777,6 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev, } } - static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe) { /* async CQE processing */ @@ -825,8 +823,6 @@ static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id) ocrdma_process_acqe(dev, cqe); else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK) ocrdma_process_mcqe(dev, cqe); - else - pr_err("%s() cqe->compl is not set.\n", __func__); memset(cqe, 0, sizeof(struct ocrdma_mcqe)); ocrdma_mcq_inc_tail(dev); } @@ -1050,6 +1046,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_qp = (rsp->qp_srq_cq_ird_ord & OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT; + attr->max_srq = + (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET; attr->max_send_sge = ((rsp->max_write_send_sge & OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT); @@ -1065,9 +1064,6 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp & OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT; - attr->max_srq = - (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >> - OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET; attr->max_ird_per_qp = (rsp->max_ird_ord_per_qp & OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT; @@ -1411,7 +1407,7 @@ static int ocrdma_build_q_conf(u32 *num_entries, int entry_size, static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev) { - int i ; + int i; int status = 0; int max_ah; struct ocrdma_create_ah_tbl *cmd; @@ -2279,7 +2275,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0], sizeof(cmd->params.dgid)); status = ocrdma_query_gid(&qp->dev->ibdev, 1, - ah_attr->grh.sgid_index, &sgid); + ah_attr->grh.sgid_index, &sgid); if (status) return status; @@ -2666,7 +2662,7 @@ static int ocrdma_create_eqs(struct ocrdma_dev *dev) for (i = 0; i < num_eq; i++) { status = ocrdma_create_eq(dev, &dev->eq_tbl[i], - OCRDMA_EQ_LEN); + OCRDMA_EQ_LEN); if (status) { status = -EINVAL; break; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 61248b99f98f..8bd9db689b2f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -488,7 +488,7 @@ static int ocrdma_close(struct ocrdma_dev *dev) cur_qp = dev->qp_tbl; for (i = 0; i < OCRDMA_MAX_QP; i++) { qp = cur_qp[i]; - if (qp) { + if (qp && qp->ibqp.qp_type != IB_QPT_GSI) { /* change the QP state to ERROR */ _ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 6e048b7283c4..96c9ee602ba4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -152,11 +152,10 @@ enum { #define OCRDMA_MIN_Q_PAGE_SIZE (4096) #define OCRDMA_MAX_Q_PAGES (8) -#define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C -#define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF -#define OCRDMA_SLI_ASIC_GEN_NUM_MASK 0x0000FF00 -#define OCRDMA_SLI_ASIC_GEN_NUM_SHIFT 0x08 - +#define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C +#define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF +#define OCRDMA_SLI_ASIC_GEN_NUM_MASK 0x0000FF00 +#define OCRDMA_SLI_ASIC_GEN_NUM_SHIFT 0x08 /* # 0: 4K Bytes # 1: 8K Bytes @@ -633,7 +632,7 @@ enum { enum { OCRDMA_CREATE_CQ_VER2 = 2, - OCRDMA_CREATE_CQ_VER3 = 3, + OCRDMA_CREATE_CQ_VER3 = 3, OCRDMA_CREATE_CQ_PAGE_CNT_MASK = 0xFFFF, OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT = 16, @@ -1093,6 +1092,7 @@ enum { OCRDMA_MODIFY_QP_RSP_MAX_ORD_MASK = 0xFFFF << OCRDMA_MODIFY_QP_RSP_MAX_ORD_SHIFT }; + struct ocrdma_modify_qp_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; @@ -1105,8 +1105,8 @@ struct ocrdma_query_qp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_hdr req; -#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0 -#define OCRDMA_QUERY_UP_QP_ID_MASK 0xFFFFFF +#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0 +#define OCRDMA_QUERY_UP_QP_ID_MASK 0xFFFFFF u32 qp_id; }; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 95eeeeb1251b..6d3990f28772 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -53,7 +53,7 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port, dev = get_ocrdma_dev(ibdev); memset(sgid, 0, sizeof(*sgid)); - if (index >= OCRDMA_MAX_SGID) + if (index > OCRDMA_MAX_SGID) return -EINVAL; memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid)); @@ -144,7 +144,6 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev, } } - int ocrdma_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { @@ -1210,7 +1209,6 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp, qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; } - static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev, struct ib_qp_init_attr *attrs) { @@ -1296,17 +1294,6 @@ gen_err: return ERR_PTR(status); } - -static void ocrdma_flush_rq_db(struct ocrdma_qp *qp) -{ - if (qp->db_cache) { - u32 val = qp->rq.dbid | (qp->db_cache << - OCRDMA_DB_RQ_SHIFT); - iowrite32(val, qp->rq_db); - qp->db_cache = 0; - } -} - int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) { @@ -1325,8 +1312,6 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (status < 0) return status; status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask); - if (!status && attr_mask & IB_QP_STATE && attr->qp_state == IB_QPS_RTR) - ocrdma_flush_rq_db(qp); return status; } @@ -2042,7 +2027,7 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, fast_reg->num_sges = wr->wr.fast_reg.page_list_len; fast_reg->size_sge = get_encoded_page_size(1 << wr->wr.fast_reg.page_shift); - mr = (struct ocrdma_mr *) (unsigned long) qp->dev->stag_arr[(hdr->lkey >> 8) & + mr = (struct ocrdma_mr *)qp->dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)]; build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr); return 0; @@ -2877,7 +2862,7 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len) goto mbx_err; mr->ibmr.rkey = mr->hwmr.lkey; mr->ibmr.lkey = mr->hwmr.lkey; - dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = mr; + dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (u64)mr; return &mr->ibmr; mbx_err: ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); -- cgit v1.2.3 From 7a1e89d8b793feda96754fd3ba5079c4ba3862ee Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 17 Mar 2014 23:14:17 -0700 Subject: RDMA/ocrdma: Fix warnings about pointer <-> integer casts We should cast pointers to and from unsigned long to turn them into ints. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 6d3990f28772..dd554a2ffe56 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -2027,8 +2027,8 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, fast_reg->num_sges = wr->wr.fast_reg.page_list_len; fast_reg->size_sge = get_encoded_page_size(1 << wr->wr.fast_reg.page_shift); - mr = (struct ocrdma_mr *)qp->dev->stag_arr[(hdr->lkey >> 8) & - (OCRDMA_MAX_STAG - 1)]; + mr = (struct ocrdma_mr *) (unsigned long) + qp->dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)]; build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr); return 0; } @@ -2862,7 +2862,8 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len) goto mbx_err; mr->ibmr.rkey = mr->hwmr.lkey; mr->ibmr.lkey = mr->hwmr.lkey; - dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (u64)mr; + dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = + (unsigned long) mr; return &mr->ibmr; mbx_err: ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); -- cgit v1.2.3 From 2d8f57d56f5850a75d6225519e77f75ad9949f25 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 18 Mar 2014 14:54:56 +0530 Subject: RDMA/ocrdma: Unregister inet notifier when unloading ocrdma Unregister the inet notifier during ocrdma unload to avoid a panic after driver unload. Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 8bd9db689b2f..7c504e079744 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -539,6 +539,11 @@ static void ocrdma_unregister_inet6addr_notifier(void) #endif } +static void ocrdma_unregister_inetaddr_notifier(void) +{ + unregister_inetaddr_notifier(&ocrdma_inetaddr_notifier); +} + static int __init ocrdma_init_module(void) { int status; @@ -552,13 +557,19 @@ static int __init ocrdma_init_module(void) #if IS_ENABLED(CONFIG_IPV6) status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier); if (status) - return status; + goto err_notifier6; #endif status = be_roce_register_driver(&ocrdma_drv); if (status) - ocrdma_unregister_inet6addr_notifier(); + goto err_be_reg; + return 0; + +err_be_reg: + ocrdma_unregister_inet6addr_notifier(); +err_notifier6: + ocrdma_unregister_inetaddr_notifier(); return status; } @@ -566,6 +577,7 @@ static void __exit ocrdma_exit_module(void) { be_roce_unregister_driver(&ocrdma_drv); ocrdma_unregister_inet6addr_notifier(); + ocrdma_unregister_inetaddr_notifier(); ocrdma_rem_debugfs(); } -- cgit v1.2.3