From 24bce5080306bd5255cbda3d6b09a29d5515b470 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 21 Jun 2007 11:05:58 -0700 Subject: IB/umem: Fix possible hang on process exit If ib_umem_release() is called after ib_uverbs_close() sets context->closing, then a process can get stuck in a D state, because the code boils down to if (down_write_trylock(&mm->mmap_sem)) down_write(&mm->mmap_sem); which is obviously a stupid instant deadlock. Fix the code so that we only try to take the lock once. This bug was introduced in commit f7c6a7b5 ("IB/uverbs: Export ib_umem_get()/ib_umem_release() to modules") which fortunately never made it into a release, and was reported by Pete Wyckoff . Signed-off-by: Roland Dreier --- drivers/infiniband/core/umem.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index b4aec5103c99..d40652a80151 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -225,13 +225,15 @@ void ib_umem_release(struct ib_umem *umem) * up here and not be able to take the mmap_sem. In that case * we defer the vm_locked accounting to the system workqueue. */ - if (context->closing && !down_write_trylock(&mm->mmap_sem)) { - INIT_WORK(&umem->work, ib_umem_account); - umem->mm = mm; - umem->diff = diff; - - schedule_work(&umem->work); - return; + if (context->closing) { + if (!down_write_trylock(&mm->mmap_sem)) { + INIT_WORK(&umem->work, ib_umem_account); + umem->mm = mm; + umem->diff = diff; + + schedule_work(&umem->work); + return; + } } else down_write(&mm->mmap_sem); -- cgit v1.2.3 From 3ec7393a6858a1716e74aa81be6af76fd180021d Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 19 Jun 2007 13:40:41 +0300 Subject: IPoIB/cm: Initialize RX before moving QP to RTR Fix a crasher bug in IPoIB CM: once a QP is in the RTR state, a receive completion (or even an asynchronous error) might be observed on this QP, so we have to initialize all of our receive data structures before moving to the RTR state. As an optimization (since modify_qp might take a long time), the jiffies update done when moving RX to the passive_ids list is also left in place to reduce the chance of the RX being misdetected as stale. This fixes bug . Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 076a0bbb63d7..c64249f7caed 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -309,6 +309,11 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even return -ENOMEM; p->dev = dev; p->id = cm_id; + cm_id->context = p; + p->state = IPOIB_CM_RX_LIVE; + p->jiffies = jiffies; + INIT_LIST_HEAD(&p->list); + p->qp = ipoib_cm_create_rx_qp(dev, p); if (IS_ERR(p->qp)) { ret = PTR_ERR(p->qp); @@ -320,24 +325,24 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even if (ret) goto err_modify; + spin_lock_irq(&priv->lock); + queue_delayed_work(ipoib_workqueue, + &priv->cm.stale_task, IPOIB_CM_RX_DELAY); + /* Add this entry to passive ids list head, but do not re-add it + * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */ + p->jiffies = jiffies; + if (p->state == IPOIB_CM_RX_LIVE) + list_move(&p->list, &priv->cm.passive_ids); + spin_unlock_irq(&priv->lock); + ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn); if (ret) { ipoib_warn(priv, "failed to send REP: %d\n", ret); - goto err_rep; + if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE)) + ipoib_warn(priv, "unable to move qp to error state\n"); } - - cm_id->context = p; - p->jiffies = jiffies; - p->state = IPOIB_CM_RX_LIVE; - spin_lock_irq(&priv->lock); - if (list_empty(&priv->cm.passive_ids)) - queue_delayed_work(ipoib_workqueue, - &priv->cm.stale_task, IPOIB_CM_RX_DELAY); - list_add(&p->list, &priv->cm.passive_ids); - spin_unlock_irq(&priv->lock); return 0; -err_rep: err_modify: ib_destroy_qp(p->qp); err_qp: -- cgit v1.2.3 From 82c3aca6ad9004169df8f2f8c0747686fe4003b3 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 20 Jun 2007 19:22:15 +0300 Subject: IPoIB/cm: Fix interoperability when MTU doesn't match IPoIB connected mode currently rejects a connection request unless the supported MTU is >= the local netdevice MTU. This breaks interoperability with implementations that might have tweaked IPOIB_CM_MTU, and there's real no longer a reason to do so: this test is just a leftover from when we did not tweak MTU per-connection. Fix this by making the test as permissive as possible. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index c64249f7caed..472c632ee52c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -759,9 +759,9 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even p->mtu = be32_to_cpu(data->mtu); - if (p->mtu < priv->dev->mtu + IPOIB_ENCAP_LEN) { - ipoib_warn(priv, "Rejecting connection: mtu %d < device mtu %d + 4\n", - p->mtu, priv->dev->mtu); + if (p->mtu <= IPOIB_ENCAP_LEN) { + ipoib_warn(priv, "Rejecting connection: mtu %d <= %d\n", + p->mtu, IPOIB_ENCAP_LEN); return -EINVAL; } -- cgit v1.2.3 From 13ef5f44c3931dff1d75443a875e97b588d4b8f0 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 21 Jun 2007 13:39:08 -0700 Subject: IPoIB/cm: Remove dead definition of struct ipoib_cm_id It's completely unused. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 472c632ee52c..5ffc464c99aa 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -56,13 +56,6 @@ MODULE_PARM_DESC(cm_data_debug_level, #define IPOIB_CM_RX_DELAY (3 * 256 * HZ) #define IPOIB_CM_RX_UPDATE_MASK (0x3) -struct ipoib_cm_id { - struct ib_cm_id *id; - int flags; - u32 remote_qpn; - u32 remote_mtu; -}; - static struct ib_qp_attr ipoib_cm_err_attr = { .qp_state = IB_QPS_ERR }; -- cgit v1.2.3 From c8681f14013d3ad2fc4fb4e30cfd4ea548f7a249 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 21 Jun 2007 13:39:10 -0700 Subject: IB/mlx4: Correct max_srq_wr returned from mlx4_ib_query_device() We need to keep a spare entry in the SRQ so that there always is a next WQE available when posting receives (so that we can tell the difference between a full queue and an empty queue). So subtract 1 from the value HW gives us before reporting the limit on SRQ entries to consumers. Found by Mellanox QA. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1095c82b38c2..c591616dccde 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -120,7 +120,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma; props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; - props->max_srq_wr = dev->dev->caps.max_srq_wqes; + props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? -- cgit v1.2.3