From 185b9826782a53529b2b57328a8f49b1d0cf8f8f Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 15 Feb 2022 19:55:29 +0200 Subject: RDMA/mlx5: Remove redundant work in struct mlx5_cache_ent delayed_cache_work_func() and the cache_work_func() are both wrappers of __cache_work_func(). Instead of having a special not delayed work, use the delayed work with delay = 0. Link: https://lore.kernel.org/r/18b6ae205e75f087aa4a2a05c81ea8b66d8d88dc.1644947594.git.leonro@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw/mlx5/mr.c') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 157d862fb864..cd14d1b9dc1d 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -465,14 +465,14 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) return; if (ent->available_mrs < ent->limit) { ent->fill_to_high_water = true; - queue_work(ent->dev->cache.wq, &ent->work); + mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); } else if (ent->fill_to_high_water && ent->available_mrs + ent->pending < 2 * ent->limit) { /* * Once we start populating due to hitting a low water mark * continue until we pass the high water mark. */ - queue_work(ent->dev->cache.wq, &ent->work); + mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); } else if (ent->available_mrs == 2 * ent->limit) { ent->fill_to_high_water = false; } else if (ent->available_mrs > 2 * ent->limit) { @@ -482,7 +482,7 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) queue_delayed_work(ent->dev->cache.wq, &ent->dwork, msecs_to_jiffies(1000)); else - queue_work(ent->dev->cache.wq, &ent->work); + mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); } } @@ -558,14 +558,6 @@ static void delayed_cache_work_func(struct work_struct *work) __cache_work_func(ent); } -static void cache_work_func(struct work_struct *work) -{ - struct mlx5_cache_ent *ent; - - ent = container_of(work, struct mlx5_cache_ent, work); - __cache_work_func(ent); -} - /* Allocate a special entry from the cache */ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, unsigned int entry, int access_flags) @@ -726,7 +718,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->dev = dev; ent->limit = 0; - INIT_WORK(&ent->work, cache_work_func); INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); if (i > MR_CACHE_LAST_STD_ENTRY) { @@ -770,7 +761,6 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) spin_lock_irq(&ent->lock); ent->disabled = true; spin_unlock_irq(&ent->lock); - cancel_work_sync(&ent->work); cancel_delayed_work_sync(&ent->dwork); } -- cgit v1.2.3 From 2f0e60d5e9f96341a0c8a01be8878cdb3b29ff20 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 15 Feb 2022 19:55:30 +0200 Subject: RDMA/mlx5: Fix the flow of a miss in the allocation of a cache ODP MR When an ODP MR cache entry is empty and trying to allocate it, increment the ent->miss counter and call to queue_adjust_cache_locked() to verify the entry is balanced. Fixes: aad719dcf379 ("RDMA/mlx5: Allow MRs to be created in the cache synchronously") Link: https://lore.kernel.org/r/09503e295276dcacc92cb1d8aef1ad0961c99dc1.1644947594.git.leonro@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband/hw/mlx5/mr.c') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index cd14d1b9dc1d..bce3cb6af524 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -577,6 +577,8 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, ent = &cache->ent[entry]; spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { + queue_adjust_cache_locked(ent); + ent->miss++; spin_unlock_irq(&ent->lock); mr = create_cache_mr(ent); if (IS_ERR(mr)) -- cgit v1.2.3 From 56561ac6b27d489feb5d1e7e8b2a55a15063fcad Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 15 Feb 2022 19:55:31 +0200 Subject: RDMA/mlx5: Merge similar flows of allocating MR from the cache When allocating a MR from the cache, the driver calls to get_cache_mr(), and in case of failure, retries with create_cache_mr(). This is the flow of mlx5_mr_cache_alloc(), so use it instead. Link: https://lore.kernel.org/r/53c85fcd4de6ec9de0b8e6cbb1bf5d5fe19900c3.1644947594.git.leonro@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 47 +++++------------------------------------ 1 file changed, 5 insertions(+), 42 deletions(-) (limited to 'drivers/infiniband/hw/mlx5/mr.c') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index bce3cb6af524..0c1dc13b4c45 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -558,23 +558,16 @@ static void delayed_cache_work_func(struct work_struct *work) __cache_work_func(ent); } -/* Allocate a special entry from the cache */ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - unsigned int entry, int access_flags) + struct mlx5_cache_ent *ent, + int access_flags) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; - if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY || - entry >= ARRAY_SIZE(cache->ent))) - return ERR_PTR(-EINVAL); - /* Matches access in alloc_cache_mr() */ if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) return ERR_PTR(-EOPNOTSUPP); - ent = &cache->ent[entry]; spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { queue_adjust_cache_locked(ent); @@ -592,32 +585,9 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, mlx5_clear_mr(mr); } - mr->access_flags = access_flags; return mr; } -/* Return a MR already available in the cache */ -static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent) -{ - struct mlx5_ib_mr *mr = NULL; - struct mlx5_cache_ent *ent = req_ent; - - spin_lock_irq(&ent->lock); - if (!list_empty(&ent->head)) { - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_del(&mr->list); - ent->available_mrs--; - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); - mlx5_clear_mr(mr); - return mr; - } - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); - req_ent->miss++; - return NULL; -} - static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_cache_ent *ent = mr->cache_ent; @@ -951,16 +921,9 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, return mr; } - mr = get_cache_mr(ent); - if (!mr) { - mr = create_cache_mr(ent); - /* - * The above already tried to do the same stuff as reg_create(), - * no reason to try it again. - */ - if (IS_ERR(mr)) - return mr; - } + mr = mlx5_mr_cache_alloc(dev, ent, access_flags); + if (IS_ERR(mr)) + return mr; mr->ibmr.pd = pd; mr->umem = umem; -- cgit v1.2.3 From 9ee2516c43823652da597633aed9646dac51c1f8 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 15 Feb 2022 19:55:32 +0200 Subject: RDMA/mlx5: Store ndescs instead of the translation table size Currently, ent->xlt stores the translation table size. This data should not be stored in the cache entry but be written directly to the mailbox. Store ndescs instead, and deduce the translation table size from it according to the access mode. Link: https://lore.kernel.org/r/e9dbfaa1f279793a6bd28ee5a31cb4f0f0d70f05.1644947594.git.leonro@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw/mlx5/mr.c') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 0c1dc13b4c45..eb14ea4bcbba 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -176,6 +176,25 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) spin_unlock_irqrestore(&ent->lock, flags); } +static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs) +{ + int ret = 0; + + switch (access_mode) { + case MLX5_MKC_ACCESS_MODE_MTT: + ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / + sizeof(struct mlx5_mtt)); + break; + case MLX5_MKC_ACCESS_MODE_KSM: + ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / + sizeof(struct mlx5_klm)); + break; + default: + WARN_ON(1); + } + return ret; +} + static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) { struct mlx5_ib_mr *mr; @@ -191,7 +210,8 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); + MLX5_SET(mkc, mkc, translations_octword_size, + get_mkc_octo_size(ent->access_mode, ent->ndescs)); MLX5_SET(mkc, mkc, log_page_size, ent->page); return mr; } @@ -701,8 +721,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) continue; ent->page = PAGE_SHIFT; - ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) / - MLX5_IB_UMR_OCTOWORD; + ent->ndescs = 1 << ent->order; ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && !dev->is_rep && mlx5_core_is_pf(dev->mdev) && -- cgit v1.2.3 From 77528e2aed9246cf8017b8a6f1b658a264d6f2b2 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 15 Feb 2022 19:55:33 +0200 Subject: RDMA/mlx5: Reorder calls to pcie_relaxed_ordering_enabled() The mkc is the key for the mkey cache, hence, created in each attempt to get a cache mkey, while pcie_relaxed_ordering_enabled() is called during the setting of the mkc, but used only for cases where IB_ACCESS_RELAXED_ORDERING is set. pcie_relaxed_ordering_enabled() is an expensive call (26 us). Reorder the code so the driver will call it only when it is needed. Link: https://lore.kernel.org/r/684be1366cb1d4f05aa3e78986205e4bc410443a.1644947594.git.leonro@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/hw/mlx5/mr.c') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index eb14ea4bcbba..eab7921eb91f 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -68,7 +68,6 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, struct ib_pd *pd) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - bool ro_pci_enabled = pcie_relaxed_ordering_enabled(dev->mdev->pdev); MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); @@ -76,12 +75,13 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, mkc, lr, 1); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) - MLX5_SET(mkc, mkc, relaxed_ordering_write, - (acc & IB_ACCESS_RELAXED_ORDERING) && ro_pci_enabled); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) - MLX5_SET(mkc, mkc, relaxed_ordering_read, - (acc & IB_ACCESS_RELAXED_ORDERING) && ro_pci_enabled); + if ((acc & IB_ACCESS_RELAXED_ORDERING) && + pcie_relaxed_ordering_enabled(dev->mdev->pdev)) { + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) + MLX5_SET(mkc, mkc, relaxed_ordering_write, 1); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) + MLX5_SET(mkc, mkc, relaxed_ordering_read, 1); + } MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, qpn, 0xffffff); -- cgit v1.2.3