From 24bce5080306bd5255cbda3d6b09a29d5515b470 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Thu, 21 Jun 2007 11:05:58 -0700
Subject: IB/umem: Fix possible hang on process exit

If ib_umem_release() is called after ib_uverbs_close() sets context->closing,
then a process can get stuck in a D state, because the code boils down to

	if (down_write_trylock(&mm->mmap_sem))
		down_write(&mm->mmap_sem);

which is obviously a stupid instant deadlock.  Fix the code so that we
only try to take the lock once.

This bug was introduced in commit f7c6a7b5 ("IB/uverbs: Export
ib_umem_get()/ib_umem_release() to modules") which fortunately never
made it into a release, and was reported by Pete Wyckoff <pw@osc.edu>.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/umem.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index b4aec5103c99..d40652a80151 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -225,13 +225,15 @@ void ib_umem_release(struct ib_umem *umem)
 	 * up here and not be able to take the mmap_sem.  In that case
 	 * we defer the vm_locked accounting to the system workqueue.
 	 */
-	if (context->closing && !down_write_trylock(&mm->mmap_sem)) {
-		INIT_WORK(&umem->work, ib_umem_account);
-		umem->mm   = mm;
-		umem->diff = diff;
-
-		schedule_work(&umem->work);
-		return;
+	if (context->closing) {
+		if (!down_write_trylock(&mm->mmap_sem)) {
+			INIT_WORK(&umem->work, ib_umem_account);
+			umem->mm   = mm;
+			umem->diff = diff;
+
+			schedule_work(&umem->work);
+			return;
+		}
 	} else
 		down_write(&mm->mmap_sem);
 
-- 
cgit v1.2.3


From 3ec7393a6858a1716e74aa81be6af76fd180021d Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@dev.mellanox.co.il>
Date: Tue, 19 Jun 2007 13:40:41 +0300
Subject: IPoIB/cm: Initialize RX before moving QP to RTR

Fix a crasher bug in IPoIB CM: once a QP is in the RTR state, a
receive completion (or even an asynchronous error) might be observed
on this QP, so we have to initialize all of our receive data
structures before moving to the RTR state.

As an optimization (since modify_qp might take a long time), the
jiffies update done when moving RX to the passive_ids list is also
left in place to reduce the chance of the RX being misdetected as
stale.

This fixes bug <https://bugs.openfabrics.org/show_bug.cgi?id=662>.

Signed-off-by: Michael S. Tsirkin <mst@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 076a0bbb63d7..c64249f7caed 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -309,6 +309,11 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 		return -ENOMEM;
 	p->dev = dev;
 	p->id = cm_id;
+	cm_id->context = p;
+	p->state = IPOIB_CM_RX_LIVE;
+	p->jiffies = jiffies;
+	INIT_LIST_HEAD(&p->list);
+
 	p->qp = ipoib_cm_create_rx_qp(dev, p);
 	if (IS_ERR(p->qp)) {
 		ret = PTR_ERR(p->qp);
@@ -320,24 +325,24 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 	if (ret)
 		goto err_modify;
 
+	spin_lock_irq(&priv->lock);
+	queue_delayed_work(ipoib_workqueue,
+			   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
+	/* Add this entry to passive ids list head, but do not re-add it
+	 * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
+	p->jiffies = jiffies;
+	if (p->state == IPOIB_CM_RX_LIVE)
+		list_move(&p->list, &priv->cm.passive_ids);
+	spin_unlock_irq(&priv->lock);
+
 	ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn);
 	if (ret) {
 		ipoib_warn(priv, "failed to send REP: %d\n", ret);
-		goto err_rep;
+		if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
+			ipoib_warn(priv, "unable to move qp to error state\n");
 	}
-
-	cm_id->context = p;
-	p->jiffies = jiffies;
-	p->state = IPOIB_CM_RX_LIVE;
-	spin_lock_irq(&priv->lock);
-	if (list_empty(&priv->cm.passive_ids))
-		queue_delayed_work(ipoib_workqueue,
-				   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
-	list_add(&p->list, &priv->cm.passive_ids);
-	spin_unlock_irq(&priv->lock);
 	return 0;
 
-err_rep:
 err_modify:
 	ib_destroy_qp(p->qp);
 err_qp:
-- 
cgit v1.2.3


From 82c3aca6ad9004169df8f2f8c0747686fe4003b3 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@dev.mellanox.co.il>
Date: Wed, 20 Jun 2007 19:22:15 +0300
Subject: IPoIB/cm: Fix interoperability when MTU doesn't match

IPoIB connected mode currently rejects a connection request unless the
supported MTU is >= the local netdevice MTU. This breaks
interoperability with implementations that might have tweaked
IPOIB_CM_MTU, and there's real no longer a reason to do so: this test
is just a leftover from when we did not tweak MTU per-connection.  Fix
this by making the test as permissive as possible.

Signed-off-by: Michael S. Tsirkin <mst@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index c64249f7caed..472c632ee52c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -759,9 +759,9 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 
 	p->mtu = be32_to_cpu(data->mtu);
 
-	if (p->mtu < priv->dev->mtu + IPOIB_ENCAP_LEN) {
-		ipoib_warn(priv, "Rejecting connection: mtu %d < device mtu %d + 4\n",
-			   p->mtu, priv->dev->mtu);
+	if (p->mtu <= IPOIB_ENCAP_LEN) {
+		ipoib_warn(priv, "Rejecting connection: mtu %d <= %d\n",
+			   p->mtu, IPOIB_ENCAP_LEN);
 		return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From 13ef5f44c3931dff1d75443a875e97b588d4b8f0 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Thu, 21 Jun 2007 13:39:08 -0700
Subject: IPoIB/cm: Remove dead definition of struct ipoib_cm_id

It's completely unused.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 472c632ee52c..5ffc464c99aa 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -56,13 +56,6 @@ MODULE_PARM_DESC(cm_data_debug_level,
 #define IPOIB_CM_RX_DELAY       (3 * 256 * HZ)
 #define IPOIB_CM_RX_UPDATE_MASK (0x3)
 
-struct ipoib_cm_id {
-	struct ib_cm_id *id;
-	int flags;
-	u32 remote_qpn;
-	u32 remote_mtu;
-};
-
 static struct ib_qp_attr ipoib_cm_err_attr = {
 	.qp_state = IB_QPS_ERR
 };
-- 
cgit v1.2.3


From c8681f14013d3ad2fc4fb4e30cfd4ea548f7a249 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 21 Jun 2007 13:39:10 -0700
Subject: IB/mlx4: Correct max_srq_wr returned from mlx4_ib_query_device()

We need to keep a spare entry in the SRQ so that there always is a
next WQE available when posting receives (so that we can tell the
difference between a full queue and an empty queue).  So subtract 1
from the value HW gives us before reporting the limit on SRQ entries
to consumers.

Found by Mellanox QA.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 1095c82b38c2..c591616dccde 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -120,7 +120,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 	props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
 	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
 	props->max_srq		   = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
-	props->max_srq_wr	   = dev->dev->caps.max_srq_wqes;
+	props->max_srq_wr	   = dev->dev->caps.max_srq_wqes - 1;
 	props->max_srq_sge	   = dev->dev->caps.max_srq_sge;
 	props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
 	props->atomic_cap	   = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
-- 
cgit v1.2.3