aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/Makefile1
-rw-r--r--drivers/infiniband/core/Makefile2
-rw-r--r--drivers/infiniband/core/addr.c5
-rw-r--r--drivers/infiniband/core/cache.c1
-rw-r--r--drivers/infiniband/core/cma.c427
-rw-r--r--drivers/infiniband/core/device.c3
-rw-r--r--drivers/infiniband/core/fmr_pool.c4
-rw-r--r--drivers/infiniband/core/iwcm.c47
-rw-r--r--drivers/infiniband/core/mad.c11
-rw-r--r--drivers/infiniband/core/multicast.c837
-rw-r--r--drivers/infiniband/core/sa.h66
-rw-r--r--drivers/infiniband/core/sa_query.c30
-rw-r--r--drivers/infiniband/core/sysfs.c2
-rw-r--r--drivers/infiniband/core/ucm.c2
-rw-r--r--drivers/infiniband/core/ucma.c206
-rw-r--r--drivers/infiniband/core/user_mad.c4
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c2
-rw-r--r--drivers/infiniband/core/uverbs_main.c6
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c2
-rw-r--r--drivers/infiniband/hw/amso1100/c2_cq.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/Kconfig27
-rw-r--r--drivers/infiniband/hw/cxgb3/Makefile12
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_dbg.c206
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c1279
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h200
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c330
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.h69
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h684
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c188
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h176
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c2080
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h222
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c224
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_ev.c230
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c171
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c1202
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h366
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c1008
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_user.h66
-rw-r--r--drivers/infiniband/hw/cxgb3/tcb.h632
-rw-r--r--drivers/infiniband/hw/ehca/Kconfig8
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h48
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c68
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c5
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c3
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c309
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_iverbs.h8
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c34
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c6
-rw-r--r--drivers/infiniband/hw/ehca/ehca_pd.c3
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c81
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_uverbs.c395
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.h11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_dma.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c14
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c8
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c8
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c40
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c129
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h9
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c110
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c14
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c7
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c9
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig16
-rw-r--r--drivers/infiniband/ulp/ipoib/Makefile1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h215
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c1237
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c29
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c96
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c199
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c40
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c13
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c4
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c7
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h1
91 files changed, 13331 insertions, 939 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 9edfacee7d84..66b36de9fa6f 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -38,6 +38,7 @@ source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/ipath/Kconfig"
source "drivers/infiniband/hw/ehca/Kconfig"
source "drivers/infiniband/hw/amso1100/Kconfig"
+source "drivers/infiniband/hw/cxgb3/Kconfig"
source "drivers/infiniband/ulp/ipoib/Kconfig"
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index 2b5d1098ef45..da2066c4f22c 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/
obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/
obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/
obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/
+obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/
obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/
obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 50fb1cd447b7..189e5d4b9b17 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
-ib_sa-y := sa_query.o
+ib_sa-y := sa_query.o multicast.o
ib_cm-y := cm.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index af939796750d..a91001c59b69 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -360,8 +360,7 @@ static int netevent_callback(struct notifier_block *self, unsigned long event,
if (event == NETEVENT_NEIGH_UPDATE) {
struct neighbour *neigh = ctx;
- if (neigh->dev->type == ARPHRD_INFINIBAND &&
- (neigh->nud_state & NUD_VALID)) {
+ if (neigh->nud_state & NUD_VALID) {
set_timeout(jiffies);
}
}
@@ -374,7 +373,7 @@ static struct notifier_block nb = {
static int addr_init(void)
{
- addr_wq = create_singlethread_workqueue("ib_addr_wq");
+ addr_wq = create_singlethread_workqueue("ib_addr");
if (!addr_wq)
return -ENOMEM;
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 98272fbbfb31..558c9a0fc8b9 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -38,7 +38,6 @@
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
-#include <linux/sched.h> /* INIT_WORK, schedule_work(), flush_scheduled_work() */
#include <rdma/ib_cache.h>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 9e0ab048c878..f8d69b3fa307 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -71,6 +71,8 @@ static struct workqueue_struct *cma_wq;
static DEFINE_IDR(sdp_ps);
static DEFINE_IDR(tcp_ps);
static DEFINE_IDR(udp_ps);
+static DEFINE_IDR(ipoib_ps);
+static int next_port;
struct cma_device {
struct list_head list;
@@ -115,6 +117,7 @@ struct rdma_id_private {
struct list_head list;
struct list_head listen_list;
struct cma_device *cma_dev;
+ struct list_head mc_list;
enum cma_state state;
spinlock_t lock;
@@ -133,10 +136,23 @@ struct rdma_id_private {
} cm_id;
u32 seq_num;
+ u32 qkey;
u32 qp_num;
u8 srq;
};
+struct cma_multicast {
+ struct rdma_id_private *id_priv;
+ union {
+ struct ib_sa_multicast *ib;
+ } multicast;
+ struct list_head list;
+ void *context;
+ struct sockaddr addr;
+ u8 pad[sizeof(struct sockaddr_in6) -
+ sizeof(struct sockaddr)];
+};
+
struct cma_work {
struct work_struct work;
struct rdma_id_private *id;
@@ -242,6 +258,11 @@ static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
}
+static inline int cma_is_ud_ps(enum rdma_port_space ps)
+{
+ return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
+}
+
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
@@ -264,19 +285,41 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv)
id_priv->cma_dev = NULL;
}
+static int cma_set_qkey(struct ib_device *device, u8 port_num,
+ enum rdma_port_space ps,
+ struct rdma_dev_addr *dev_addr, u32 *qkey)
+{
+ struct ib_sa_mcmember_rec rec;
+ int ret = 0;
+
+ switch (ps) {
+ case RDMA_PS_UDP:
+ *qkey = RDMA_UDP_QKEY;
+ break;
+ case RDMA_PS_IPOIB:
+ ib_addr_get_mgid(dev_addr, &rec.mgid);
+ ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
+ *qkey = be32_to_cpu(rec.qkey);
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
static int cma_acquire_dev(struct rdma_id_private *id_priv)
{
- enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type;
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
struct cma_device *cma_dev;
union ib_gid gid;
int ret = -ENODEV;
- switch (rdma_node_get_transport(dev_type)) {
+ switch (rdma_node_get_transport(dev_addr->dev_type)) {
case RDMA_TRANSPORT_IB:
- ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+ ib_addr_get_sgid(dev_addr, &gid);
break;
case RDMA_TRANSPORT_IWARP:
- iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+ iw_addr_get_sgid(dev_addr, &gid);
break;
default:
return -ENODEV;
@@ -286,7 +329,12 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
ret = ib_find_cached_gid(cma_dev->device, &gid,
&id_priv->id.port_num, NULL);
if (!ret) {
- cma_attach_to_dev(id_priv, cma_dev);
+ ret = cma_set_qkey(cma_dev->device,
+ id_priv->id.port_num,
+ id_priv->id.ps, dev_addr,
+ &id_priv->qkey);
+ if (!ret)
+ cma_attach_to_dev(id_priv, cma_dev);
break;
}
}
@@ -324,40 +372,50 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
init_waitqueue_head(&id_priv->wait_remove);
atomic_set(&id_priv->dev_remove, 0);
INIT_LIST_HEAD(&id_priv->listen_list);
+ INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
return &id_priv->id;
}
EXPORT_SYMBOL(rdma_create_id);
-static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
struct ib_qp_attr qp_attr;
- struct rdma_dev_addr *dev_addr;
- int ret;
+ int qp_attr_mask, ret;
- dev_addr = &id_priv->id.route.addr.dev_addr;
- ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
- ib_addr_get_pkey(dev_addr),
- &qp_attr.pkey_index);
+ qp_attr.qp_state = IB_QPS_INIT;
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret)
return ret;
- qp_attr.qp_state = IB_QPS_INIT;
- qp_attr.qp_access_flags = 0;
- qp_attr.port_num = id_priv->id.port_num;
- return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS |
- IB_QP_PKEY_INDEX | IB_QP_PORT);
+ ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+ if (ret)
+ return ret;
+
+ qp_attr.qp_state = IB_QPS_RTR;
+ ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+ if (ret)
+ return ret;
+
+ qp_attr.qp_state = IB_QPS_RTS;
+ qp_attr.sq_psn = 0;
+ ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
+
+ return ret;
}
-static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
struct ib_qp_attr qp_attr;
+ int qp_attr_mask, ret;
qp_attr.qp_state = IB_QPS_INIT;
- qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
+ if (ret)
+ return ret;
- return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS);
+ return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
}
int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
@@ -375,18 +433,10 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
if (IS_ERR(qp))
return PTR_ERR(qp);
- switch (rdma_node_get_transport(id->device->node_type)) {
- case RDMA_TRANSPORT_IB:
- ret = cma_init_ib_qp(id_priv, qp);
- break;
- case RDMA_TRANSPORT_IWARP:
- ret = cma_init_iw_qp(id_priv, qp);
- break;
- default:
- ret = -ENOSYS;
- break;
- }
-
+ if (cma_is_ud_ps(id_priv->id.ps))
+ ret = cma_init_ud_qp(id_priv, qp);
+ else
+ ret = cma_init_conn_qp(id_priv, qp);
if (ret)
goto err;
@@ -459,23 +509,55 @@ static int cma_modify_qp_err(struct rdma_cm_id *id)
return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
}
+static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
+ struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+{
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ int ret;
+
+ ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
+ ib_addr_get_pkey(dev_addr),
+ &qp_attr->pkey_index);
+ if (ret)
+ return ret;
+
+ qp_attr->port_num = id_priv->id.port_num;
+ *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
+
+ if (cma_is_ud_ps(id_priv->id.ps)) {
+ qp_attr->qkey = id_priv->qkey;
+ *qp_attr_mask |= IB_QP_QKEY;
+ } else {
+ qp_attr->qp_access_flags = 0;
+ *qp_attr_mask |= IB_QP_ACCESS_FLAGS;
+ }
+ return 0;
+}
+
int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
int *qp_attr_mask)
{
struct rdma_id_private *id_priv;
- int ret;
+ int ret = 0;
id_priv = container_of(id, struct rdma_id_private, id);
switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
case RDMA_TRANSPORT_IB:
- ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
- qp_attr_mask);
+ if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
+ ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
+ else
+ ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
+ qp_attr_mask);
if (qp_attr->qp_state == IB_QPS_RTR)
qp_attr->rq_psn = id_priv->seq_num;
break;
case RDMA_TRANSPORT_IWARP:
- ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
- qp_attr_mask);
+ if (!id_priv->cm_id.iw) {
+ qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+ *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
+ } else
+ ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
+ qp_attr_mask);
break;
default:
ret = -ENOSYS;
@@ -697,6 +779,19 @@ static void cma_release_port(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
+static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
+{
+ struct cma_multicast *mc;
+
+ while (!list_empty(&id_priv->mc_list)) {
+ mc = container_of(id_priv->mc_list.next,
+ struct cma_multicast, list);
+ list_del(&mc->list);
+ ib_sa_free_multicast(mc->multicast.ib);
+ kfree(mc);
+ }
+}
+
void rdma_destroy_id(struct rdma_cm_id *id)
{
struct rdma_id_private *id_priv;
@@ -721,6 +816,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
default:
break;
}
+ cma_leave_mc_groups(id_priv);
mutex_lock(&lock);
cma_detach_from_dev(id_priv);
}
@@ -971,7 +1067,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
memset(&event, 0, sizeof event);
offset = cma_user_data_offset(listen_id->id.ps);
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
- if (listen_id->id.ps == RDMA_PS_UDP) {
+ if (cma_is_ud_ps(listen_id->id.ps)) {
conn_id = cma_new_udp_id(&listen_id->id, ib_event);
event.param.ud.private_data = ib_event->private_data + offset;
event.param.ud.private_data_len =
@@ -1722,33 +1818,74 @@ static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
unsigned short snum)
{
struct rdma_bind_list *bind_list;
- int port, start, ret;
+ int port, ret;
- bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
+ bind_list = kmalloc(sizeof *bind_list, GFP_KERNEL);
if (!bind_list)
return -ENOMEM;
- start = snum ? snum : sysctl_local_port_range[0];
+ do {
+ ret = idr_get_new_above(ps, bind_list, snum, &port);
+ } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
+
+ if (ret)
+ goto err1;
+ if (port != snum) {
+ ret = -EADDRNOTAVAIL;
+ goto err2;
+ }
+
+ bind_list->ps = ps;
+ bind_list->port = (unsigned short) port;
+ cma_bind_port(bind_list, id_priv);
+ return 0;
+err2:
+ idr_remove(ps, port);
+err1:
+ kfree(bind_list);
+ return ret;
+}
+
+static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
+{
+ struct rdma_bind_list *bind_list;
+ int port, ret;
+
+ bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
+ if (!bind_list)
+ return -ENOMEM;
+
+retry:
do {
- ret = idr_get_new_above(ps, bind_list, start, &port);
+ ret = idr_get_new_above(ps, bind_list, next_port, &port);
} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
if (ret)
- goto err;
+ goto err1;
- if ((snum && port != snum) ||
- (!snum && port > sysctl_local_port_range[1])) {
- idr_remove(ps, port);
+ if (port > sysctl_local_port_range[1]) {
+ if (next_port != sysctl_local_port_range[0]) {
+ idr_remove(ps, port);
+ next_port = sysctl_local_port_range[0];
+ goto retry;
+ }
ret = -EADDRNOTAVAIL;
- goto err;
+ goto err2;
}
+ if (port == sysctl_local_port_range[1])
+ next_port = sysctl_local_port_range[0];
+ else
+ next_port = port + 1;
+
bind_list->ps = ps;
bind_list->port = (unsigned short) port;
cma_bind_port(bind_list, id_priv);
return 0;
-err:
+err2:
+ idr_remove(ps, port);
+err1:
kfree(bind_list);
return ret;
}
@@ -1805,13 +1942,16 @@ static int cma_get_port(struct rdma_id_private *id_priv)
case RDMA_PS_UDP:
ps = &udp_ps;
break;
+ case RDMA_PS_IPOIB:
+ ps = &ipoib_ps;
+ break;
default:
return -EPROTONOSUPPORT;
}
mutex_lock(&lock);
if (cma_any_port(&id_priv->id.route.addr.src_addr))
- ret = cma_alloc_port(ps, id_priv, 0);
+ ret = cma_alloc_any_port(ps, id_priv);
else
ret = cma_use_port(ps, id_priv);
mutex_unlock(&lock);
@@ -1919,7 +2059,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
event.status = ib_event->param.sidr_rep_rcvd.status;
break;
}
- if (rep->qkey != RDMA_UD_QKEY) {
+ if (id_priv->qkey != rep->qkey) {
event.event = RDMA_CM_EVENT_UNREACHABLE;
event.status = -EINVAL;
break;
@@ -2118,7 +2258,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (id->ps == RDMA_PS_UDP)
+ if (cma_is_ud_ps(id->ps))
ret = cma_resolve_ib_udp(id_priv, conn_param);
else
ret = cma_connect_ib(id_priv, conn_param);
@@ -2214,7 +2354,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
rep.status = status;
if (status == IB_SIDR_SUCCESS) {
rep.qp_num = id_priv->qp_num;
- rep.qkey = RDMA_UD_QKEY;
+ rep.qkey = id_priv->qkey;
}
rep.private_data = private_data;
rep.private_data_len = private_data_len;
@@ -2238,7 +2378,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (id->ps == RDMA_PS_UDP)
+ if (cma_is_ud_ps(id->ps))
ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
conn_param->private_data,
conn_param->private_data_len);
@@ -2299,7 +2439,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (id->ps == RDMA_PS_UDP)
+ if (cma_is_ud_ps(id->ps))
ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
private_data, private_data_len);
else
@@ -2350,6 +2490,178 @@ out:
}
EXPORT_SYMBOL(rdma_disconnect);
+static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc = multicast->context;
+ struct rdma_cm_event event;
+ int ret;
+
+ id_priv = mc->id_priv;
+ atomic_inc(&id_priv->dev_remove);
+ if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+ !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+ goto out;
+
+ if (!status && id_priv->id.qp)
+ status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
+ multicast->rec.mlid);
+
+ memset(&event, 0, sizeof event);
+ event.status = status;
+ event.param.ud.private_data = mc->context;
+ if (!status) {
+ event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
+ ib_init_ah_from_mcmember(id_priv->id.device,
+ id_priv->id.port_num, &multicast->rec,
+ &event.param.ud.ah_attr);
+ event.param.ud.qp_num = 0xFFFFFF;
+ event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+ } else
+ event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+
+ ret = id_priv->id.event_handler(&id_priv->id, &event);
+ if (ret) {
+ cma_exch(id_priv, CMA_DESTROYING);
+ cma_release_remove(id_priv);
+ rdma_destroy_id(&id_priv->id);
+ return 0;
+ }
+out:
+ cma_release_remove(id_priv);
+ return 0;
+}
+
+static void cma_set_mgid(struct rdma_id_private *id_priv,
+ struct sockaddr *addr, union ib_gid *mgid)
+{
+ unsigned char mc_map[MAX_ADDR_LEN];
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ struct sockaddr_in *sin = (struct sockaddr_in *) addr;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
+
+ if (cma_any_addr(addr)) {
+ memset(mgid, 0, sizeof *mgid);
+ } else if ((addr->sa_family == AF_INET6) &&
+ ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
+ 0xFF10A01B)) {
+ /* IPv6 address is an SA assigned MGID. */
+ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+ } else {
+ ip_ib_mc_map(sin->sin_addr.s_addr, mc_map);
+ if (id_priv->id.ps == RDMA_PS_UDP)
+ mc_map[7] = 0x01; /* Use RDMA CM signature */
+ mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
+ mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
+ *mgid = *(union ib_gid *) (mc_map + 4);
+ }
+}
+
+static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
+{
+ struct ib_sa_mcmember_rec rec;
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ ib_sa_comp_mask comp_mask;
+ int ret;
+
+ ib_addr_get_mgid(dev_addr, &rec.mgid);
+ ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
+ &rec.mgid, &rec);
+ if (ret)
+ return ret;
+
+ cma_set_mgid(id_priv, &mc->addr, &rec.mgid);
+ if (id_priv->id.ps == RDMA_PS_UDP)
+ rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+ ib_addr_get_sgid(dev_addr, &rec.port_gid);
+ rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
+ rec.join_state = 1;
+
+ comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
+ IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
+ IB_SA_MCMEMBER_REC_FLOW_LABEL |
+ IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+
+ mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
+ id_priv->id.port_num, &rec,
+ comp_mask, GFP_KERNEL,
+ cma_ib_mc_handler, mc);
+ if (IS_ERR(mc->multicast.ib))
+ return PTR_ERR(mc->multicast.ib);
+
+ return 0;
+}
+
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+ void *context)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+ !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+ return -EINVAL;
+
+ mc = kmalloc(sizeof *mc, GFP_KERNEL);
+ if (!mc)
+ return -ENOMEM;
+
+ memcpy(&mc->addr, addr, ip_addr_size(addr));
+ mc->context = context;
+ mc->id_priv = id_priv;
+
+ spin_lock(&id_priv->lock);
+ list_add(&mc->list, &id_priv->mc_list);
+ spin_unlock(&id_priv->lock);
+
+ switch (rdma_node_get_transport(id->device->node_type)) {
+ case RDMA_TRANSPORT_IB:
+ ret = cma_join_ib_multicast(id_priv, mc);
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+
+ if (ret) {
+ spin_lock_irq(&id_priv->lock);
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+ kfree(mc);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(rdma_join_multicast);
+
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+ struct rdma_id_private *id_priv;
+ struct cma_multicast *mc;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ spin_lock_irq(&id_priv->lock);
+ list_for_each_entry(mc, &id_priv->mc_list, list) {
+ if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+
+ if (id->qp)
+ ib_detach_mcast(id->qp,
+ &mc->multicast.ib->rec.mgid,
+ mc->multicast.ib->rec.mlid);
+ ib_sa_free_multicast(mc->multicast.ib);
+ kfree(mc);
+ return;
+ }
+ }
+ spin_unlock_irq(&id_priv->lock);
+}
+EXPORT_SYMBOL(rdma_leave_multicast);
+
static void cma_add_one(struct ib_device *device)
{
struct cma_device *cma_dev;
@@ -2448,7 +2760,11 @@ static int cma_init(void)
{
int ret;
- cma_wq = create_singlethread_workqueue("rdma_cm_wq");
+ get_random_bytes(&next_port, sizeof next_port);
+ next_port = (next_port % (sysctl_local_port_range[1] -
+ sysctl_local_port_range[0])) +
+ sysctl_local_port_range[0];
+ cma_wq = create_singlethread_workqueue("rdma_cm");
if (!cma_wq)
return -ENOMEM;
@@ -2476,6 +2792,7 @@ static void cma_cleanup(void)
idr_destroy(&sdp_ps);
idr_destroy(&tcp_ps);
idr_destroy(&udp_ps);
+ idr_destroy(&ipoib_ps);
}
module_init(cma_init);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 63d2a39fb82c..7fabb425b033 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -36,6 +36,7 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/mutex.h>
@@ -93,7 +94,7 @@ static int ib_device_check_mandatory(struct ib_device *device)
};
int i;
- for (i = 0; i < sizeof mandatory_table / sizeof mandatory_table[0]; ++i) {
+ for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
printk(KERN_WARNING "Device %s is missing mandatory function %s\n",
device->name, mandatory_table[i].name);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 8926a2bd4a87..1d796e7c8199 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -301,7 +301,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
{
struct ib_pool_fmr *fmr;
- struct ib_fmr_attr attr = {
+ struct ib_fmr_attr fmr_attr = {
.max_pages = params->max_pages_per_fmr,
.max_maps = pool->max_remaps,
.page_shift = params->page_shift
@@ -321,7 +321,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
fmr->ref_count = 0;
INIT_HLIST_NODE(&fmr->cache_node);
- fmr->fmr = ib_alloc_fmr(pd, params->access, &attr);
+ fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
if (IS_ERR(fmr->fmr)) {
printk(KERN_WARNING "fmr_create failed for FMR %d", i);
kfree(fmr);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 1039ad57d53b..891d1fa7b2eb 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -146,6 +146,12 @@ static int copy_private_data(struct iw_cm_event *event)
return 0;
}
+static void free_cm_id(struct iwcm_id_private *cm_id_priv)
+{
+ dealloc_work_entries(cm_id_priv);
+ kfree(cm_id_priv);
+}
+
/*
* Release a reference on cm_id. If the last reference is being
* released, enable the waiting thread (in iw_destroy_cm_id) to
@@ -153,21 +159,14 @@ static int copy_private_data(struct iw_cm_event *event)
*/
static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
{
- int ret = 0;
-
BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
if (atomic_dec_and_test(&cm_id_priv->refcount)) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
- if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) {
- BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING);
- BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY,
- &cm_id_priv->flags));
- ret = 1;
- }
complete(&cm_id_priv->destroy_comp);
+ return 1;
}
- return ret;
+ return 0;
}
static void add_ref(struct iw_cm_id *cm_id)
@@ -181,7 +180,11 @@ static void rem_ref(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
- iwcm_deref_id(cm_id_priv);
+ if (iwcm_deref_id(cm_id_priv) &&
+ test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
+ BUG_ON(!list_empty(&cm_id_priv->work_list));
+ free_cm_id(cm_id_priv);
+ }
}
static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
@@ -355,7 +358,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
case IW_CM_STATE_CONN_RECV:
/*
* App called destroy before/without calling accept after
- * receiving connection request event notification.
+ * receiving connection request event notification or
+ * returned non zero from the event callback function.
+ * In either case, must tell the provider to reject.
*/
cm_id_priv->state = IW_CM_STATE_DESTROYING;
break;
@@ -391,9 +396,7 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id)
wait_for_completion(&cm_id_priv->destroy_comp);
- dealloc_work_entries(cm_id_priv);
-
- kfree(cm_id_priv);
+ free_cm_id(cm_id_priv);
}
EXPORT_SYMBOL(iw_destroy_cm_id);
@@ -647,10 +650,11 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
/* Call the client CM handler */
ret = cm_id->cm_handler(cm_id, iw_event);
if (ret) {
+ iw_cm_reject(cm_id, NULL, 0);
set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
destroy_cm_id(cm_id);
if (atomic_read(&cm_id_priv->refcount)==0)
- kfree(cm_id);
+ free_cm_id(cm_id_priv);
}
out:
@@ -854,13 +858,12 @@ static void cm_work_handler(struct work_struct *_work)
destroy_cm_id(&cm_id_priv->id);
}
BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
- if (iwcm_deref_id(cm_id_priv))
- return;
-
- if (atomic_read(&cm_id_priv->refcount)==0 &&
- test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
- dealloc_work_entries(cm_id_priv);
- kfree(cm_id_priv);
+ if (iwcm_deref_id(cm_id_priv)) {
+ if (test_bit(IWCM_F_CALLBACK_DESTROY,
+ &cm_id_priv->flags)) {
+ BUG_ON(!list_empty(&cm_id_priv->work_list));
+ free_cm_id(cm_id_priv);
+ }
return;
}
spin_lock_irqsave(&cm_id_priv->lock, flags);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 5ed141ebd1c8..13efd4170349 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -642,7 +642,8 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
}
-static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
+static void build_smp_wc(struct ib_qp *qp,
+ u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
struct ib_wc *wc)
{
memset(wc, 0, sizeof *wc);
@@ -652,7 +653,7 @@ static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
wc->pkey_index = pkey_index;
wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
wc->src_qp = IB_QP0;
- wc->qp_num = IB_QP0;
+ wc->qp = qp;
wc->slid = slid;
wc->sl = 0;
wc->dlid_path_bits = 0;
@@ -713,7 +714,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
goto out;
}
- build_smp_wc(send_wr->wr_id, be16_to_cpu(smp->dr_slid),
+ build_smp_wc(mad_agent_priv->agent.qp,
+ send_wr->wr_id, be16_to_cpu(smp->dr_slid),
send_wr->wr.ud.pkey_index,
send_wr->wr.ud.port_num, &mad_wc);
@@ -2355,7 +2357,8 @@ static void local_completions(struct work_struct *work)
* Defined behavior is to complete response
* before request
*/
- build_smp_wc((unsigned long) local->mad_send_wr,
+ build_smp_wc(recv_mad_agent->agent.qp,
+ (unsigned long) local->mad_send_wr,
be16_to_cpu(IB_LID_PERMISSIVE),
0, recv_mad_agent->agent.port_num, &wc);
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
new file mode 100644
index 000000000000..4a579b3a1c90
--- /dev/null
+++ b/drivers/infiniband/core/multicast.c
@@ -0,0 +1,837 @@
+/*
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/bitops.h>
+#include <linux/random.h>
+
+#include <rdma/ib_cache.h>
+#include "sa.h"
+
+static void mcast_add_one(struct ib_device *device);
+static void mcast_remove_one(struct ib_device *device);
+
+static struct ib_client mcast_client = {
+ .name = "ib_multicast",
+ .add = mcast_add_one,
+ .remove = mcast_remove_one
+};
+
+static struct ib_sa_client sa_client;
+static struct workqueue_struct *mcast_wq;
+static union ib_gid mgid0;
+
+struct mcast_device;
+
+struct mcast_port {
+ struct mcast_device *dev;
+ spinlock_t lock;
+ struct rb_root table;
+ atomic_t refcount;
+ struct completion comp;
+ u8 port_num;
+};
+
+struct mcast_device {
+ struct ib_device *device;
+ struct ib_event_handler event_handler;
+ int start_port;
+ int end_port;
+ struct mcast_port port[0];
+};
+
+enum mcast_state {
+ MCAST_IDLE,
+ MCAST_JOINING,
+ MCAST_MEMBER,
+ MCAST_BUSY,
+ MCAST_ERROR
+};
+
+struct mcast_member;
+
+struct mcast_group {
+ struct ib_sa_mcmember_rec rec;
+ struct rb_node node;
+ struct mcast_port *port;
+ spinlock_t lock;
+ struct work_struct work;
+ struct list_head pending_list;
+ struct list_head active_list;
+ struct mcast_member *last_join;
+ int members[3];
+ atomic_t refcount;
+ enum mcast_state state;
+ struct ib_sa_query *query;
+ int query_id;
+};
+
+struct mcast_member {
+ struct ib_sa_multicast multicast;
+ struct ib_sa_client *client;
+ struct mcast_group *group;
+ struct list_head list;
+ enum mcast_state state;
+ atomic_t refcount;
+ struct completion comp;
+};
+
+static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
+ void *context);
+static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
+ void *context);
+
+static struct mcast_group *mcast_find(struct mcast_port *port,
+ union ib_gid *mgid)
+{
+ struct rb_node *node = port->table.rb_node;
+ struct mcast_group *group;
+ int ret;
+
+ while (node) {
+ group = rb_entry(node, struct mcast_group, node);
+ ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
+ if (!ret)
+ return group;
+
+ if (ret < 0)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
+ }
+ return NULL;
+}
+
+static struct mcast_group *mcast_insert(struct mcast_port *port,
+ struct mcast_group *group,
+ int allow_duplicates)
+{
+ struct rb_node **link = &port->table.rb_node;
+ struct rb_node *parent = NULL;
+ struct mcast_group *cur_group;
+ int ret;
+
+ while (*link) {
+ parent = *link;
+ cur_group = rb_entry(parent, struct mcast_group, node);
+
+ ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
+ sizeof group->rec.mgid);
+ if (ret < 0)
+ link = &(*link)->rb_left;
+ else if (ret > 0)
+ link = &(*link)->rb_right;
+ else if (allow_duplicates)
+ link = &(*link)->rb_left;
+ else
+ return cur_group;
+ }
+ rb_link_node(&group->node, parent, link);
+ rb_insert_color(&group->node, &port->table);
+ return NULL;
+}
+
+static void deref_port(struct mcast_port *port)
+{
+ if (atomic_dec_and_test(&port->refcount))
+ complete(&port->comp);
+}
+
+static void release_group(struct mcast_group *group)
+{
+ struct mcast_port *port = group->port;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ if (atomic_dec_and_test(&group->refcount)) {
+ rb_erase(&group->node, &port->table);
+ spin_unlock_irqrestore(&port->lock, flags);
+ kfree(group);
+ deref_port(port);
+ } else
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void deref_member(struct mcast_member *member)
+{
+ if (atomic_dec_and_test(&member->refcount))
+ complete(&member->comp);
+}
+
+static void queue_join(struct mcast_member *member)
+{
+ struct mcast_group *group = member->group;
+ unsigned long flags;
+
+ spin_lock_irqsave(&group->lock, flags);
+ list_add(&member->list, &group->pending_list);
+ if (group->state == MCAST_IDLE) {
+ group->state = MCAST_BUSY;
+ atomic_inc(&group->refcount);
+ queue_work(mcast_wq, &group->work);
+ }
+ spin_unlock_irqrestore(&group->lock, flags);
+}
+
+/*
+ * A multicast group has three types of members: full member, non member, and
+ * send only member. We need to keep track of the number of members of each
+ * type based on their join state. Adjust the number of members the belong to
+ * the specified join states.
+ */
+static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
+{
+ int i;
+
+ for (i = 0; i < 3; i++, join_state >>= 1)
+ if (join_state & 0x1)
+ group->members[i] += inc;
+}
+
+/*
+ * If a multicast group has zero members left for a particular join state, but
+ * the group is still a member with the SA, we need to leave that join state.
+ * Determine which join states we still belong to, but that do not have any
+ * active members.
+ */
+static u8 get_leave_state(struct mcast_group *group)
+{
+ u8 leave_state = 0;
+ int i;
+
+ for (i = 0; i < 3; i++)
+ if (!group->members[i])
+ leave_state |= (0x1 << i);
+
+ return leave_state & group->rec.join_state;
+}
+
+static int check_selector(ib_sa_comp_mask comp_mask,
+ ib_sa_comp_mask selector_mask,
+ ib_sa_comp_mask value_mask,
+ u8 selector, u8 src_value, u8 dst_value)
+{
+ int err;
+
+ if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
+ return 0;
+
+ switch (selector) {
+ case IB_SA_GT:
+ err = (src_value <= dst_value);
+ break;
+ case IB_SA_LT:
+ err = (src_value >= dst_value);
+ break;
+ case IB_SA_EQ:
+ err = (src_value != dst_value);
+ break;
+ default:
+ err = 0;
+ break;
+ }
+
+ return err;
+}
+
+static int cmp_rec(struct ib_sa_mcmember_rec *src,
+ struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask)
+{
+ /* MGID must already match */
+
+ if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID &&
+ memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid))
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
+ return -EINVAL;
+ if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
+ IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector,
+ src->mtu, dst->mtu))
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
+ src->traffic_class != dst->traffic_class)
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
+ return -EINVAL;
+ if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
+ IB_SA_MCMEMBER_REC_RATE, dst->rate_selector,
+ src->rate, dst->rate))
+ return -EINVAL;
+ if (check_selector(comp_mask,
+ IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
+ IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
+ dst->packet_life_time_selector,
+ src->packet_life_time, dst->packet_life_time))
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl)
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
+ src->flow_label != dst->flow_label)
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
+ src->hop_limit != dst->hop_limit)
+ return -EINVAL;
+ if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope)
+ return -EINVAL;
+
+ /* join_state checked separately, proxy_join ignored */
+
+ return 0;
+}
+
+static int send_join(struct mcast_group *group, struct mcast_member *member)
+{
+ struct mcast_port *port = group->port;
+ int ret;
+
+ group->last_join = member;
+ ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
+ port->port_num, IB_MGMT_METHOD_SET,
+ &member->multicast.rec,
+ member->multicast.comp_mask,
+ 3000, GFP_KERNEL, join_handler, group,
+ &group->query);
+ if (ret >= 0) {
+ group->query_id = ret;
+ ret = 0;
+ }
+ return ret;
+}
+
+static int send_leave(struct mcast_group *group, u8 leave_state)
+{
+ struct mcast_port *port = group->port;
+ struct ib_sa_mcmember_rec rec;
+ int ret;
+
+ rec = group->rec;
+ rec.join_state = leave_state;
+
+ ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
+ port->port_num, IB_SA_METHOD_DELETE, &rec,
+ IB_SA_MCMEMBER_REC_MGID |
+ IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_JOIN_STATE,
+ 3000, GFP_KERNEL, leave_handler,
+ group, &group->query);
+ if (ret >= 0) {
+ group->query_id = ret;
+ ret = 0;
+ }
+ return ret;
+}
+
+static void join_group(struct mcast_group *group, struct mcast_member *member,
+ u8 join_state)
+{
+ member->state = MCAST_MEMBER;
+ adjust_membership(group, join_state, 1);
+ group->rec.join_state |= join_state;
+ member->multicast.rec = group->rec;
+ member->multicast.rec.join_state = join_state;
+ list_move(&member->list, &group->active_list);
+}
+
+static int fail_join(struct mcast_group *group, struct mcast_member *member,
+ int status)
+{
+ spin_lock_irq(&group->lock);
+ list_del_init(&member->list);
+ spin_unlock_irq(&group->lock);
+ return member->multicast.callback(status, &member->multicast);
+}
+
+static void process_group_error(struct mcast_group *group)
+{
+ struct mcast_member *member;
+ int ret;
+
+ spin_lock_irq(&group->lock);
+ while (!list_empty(&group->active_list)) {
+ member = list_entry(group->active_list.next,
+ struct mcast_member, list);
+ atomic_inc(&member->refcount);
+ list_del_init(&member->list);
+ adjust_membership(group, member->multicast.rec.join_state, -1);
+ member->state = MCAST_ERROR;
+ spin_unlock_irq(&group->lock);
+
+ ret = member->multicast.callback(-ENETRESET,
+ &member->multicast);
+ deref_member(member);
+ if (ret)
+ ib_sa_free_multicast(&member->multicast);
+ spin_lock_irq(&group->lock);
+ }
+
+ group->rec.join_state = 0;
+ group->state = MCAST_BUSY;
+ spin_unlock_irq(&group->lock);
+}
+
+static void mcast_work_handler(struct work_struct *work)
+{
+ struct mcast_group *group;
+ struct mcast_member *member;
+ struct ib_sa_multicast *multicast;
+ int status, ret;
+ u8 join_state;
+
+ group = container_of(work, typeof(*group), work);
+retest:
+ spin_lock_irq(&group->lock);
+ while (!list_empty(&group->pending_list) ||
+ (group->state == MCAST_ERROR)) {
+
+ if (group->state == MCAST_ERROR) {
+ spin_unlock_irq(&group->lock);
+ process_group_error(group);
+ goto retest;
+ }
+
+ member = list_entry(group->pending_list.next,
+ struct mcast_member, list);
+ multicast = &member->multicast;
+ join_state = multicast->rec.join_state;
+ atomic_inc(&member->refcount);
+
+ if (join_state == (group->rec.join_state & join_state)) {
+ status = cmp_rec(&group->rec, &multicast->rec,
+ multicast->comp_mask);
+ if (!status)
+ join_group(group, member, join_state);
+ else
+ list_del_init(&member->list);
+ spin_unlock_irq(&group->lock);
+ ret = multicast->callback(status, multicast);
+ } else {
+ spin_unlock_irq(&group->lock);
+ status = send_join(group, member);
+ if (!status) {
+ deref_member(member);
+ return;
+ }
+ ret = fail_join(group, member, status);
+ }
+
+ deref_member(member);
+ if (ret)
+ ib_sa_free_multicast(&member->multicast);
+ spin_lock_irq(&group->lock);
+ }
+
+ join_state = get_leave_state(group);
+ if (join_state) {
+ group->rec.join_state &= ~join_state;
+ spin_unlock_irq(&group->lock);
+ if (send_leave(group, join_state))
+ goto retest;
+ } else {
+ group->state = MCAST_IDLE;
+ spin_unlock_irq(&group->lock);
+ release_group(group);
+ }
+}
+
+/*
+ * Fail a join request if it is still active - at the head of the pending queue.
+ */
+static void process_join_error(struct mcast_group *group, int status)
+{
+ struct mcast_member *member;
+ int ret;
+
+ spin_lock_irq(&group->lock);
+ member = list_entry(group->pending_list.next,
+ struct mcast_member, list);
+ if (group->last_join == member) {
+ atomic_inc(&member->refcount);
+ list_del_init(&member->list);
+ spin_unlock_irq(&group->lock);
+ ret = member->multicast.callback(status, &member->multicast);
+ deref_member(member);
+ if (ret)
+ ib_sa_free_multicast(&member->multicast);
+ } else
+ spin_unlock_irq(&group->lock);
+}
+
+static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
+ void *context)
+{
+ struct mcast_group *group = context;
+
+ if (status)
+ process_join_error(group, status);
+ else {
+ spin_lock_irq(&group->port->lock);
+ group->rec = *rec;
+ if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
+ rb_erase(&group->node, &group->port->table);
+ mcast_insert(group->port, group, 1);
+ }
+ spin_unlock_irq(&group->port->lock);
+ }
+ mcast_work_handler(&group->work);
+}
+
+static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
+ void *context)
+{
+ struct mcast_group *group = context;
+
+ mcast_work_handler(&group->work);
+}
+
+static struct mcast_group *acquire_group(struct mcast_port *port,
+ union ib_gid *mgid, gfp_t gfp_mask)
+{
+ struct mcast_group *group, *cur_group;
+ unsigned long flags;
+ int is_mgid0;
+
+ is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
+ if (!is_mgid0) {
+ spin_lock_irqsave(&port->lock, flags);
+ group = mcast_find(port, mgid);
+ if (group)
+ goto found;
+ spin_unlock_irqrestore(&port->lock, flags);
+ }
+
+ group = kzalloc(sizeof *group, gfp_mask);
+ if (!group)
+ return NULL;
+
+ group->port = port;
+ group->rec.mgid = *mgid;
+ INIT_LIST_HEAD(&group->pending_list);
+ INIT_LIST_HEAD(&group->active_list);
+ INIT_WORK(&group->work, mcast_work_handler);
+ spin_lock_init(&group->lock);
+
+ spin_lock_irqsave(&port->lock, flags);
+ cur_group = mcast_insert(port, group, is_mgid0);
+ if (cur_group) {
+ kfree(group);
+ group = cur_group;
+ } else
+ atomic_inc(&port->refcount);
+found:
+ atomic_inc(&group->refcount);
+ spin_unlock_irqrestore(&port->lock, flags);
+ return group;
+}
+
+/*
+ * We serialize all join requests to a single group to make our lives much
+ * easier. Otherwise, two users could try to join the same group
+ * simultaneously, with different configurations, one could leave while the
+ * join is in progress, etc., which makes locking around error recovery
+ * difficult.
+ */
+struct ib_sa_multicast *
+ib_sa_join_multicast(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_mcmember_rec *rec,
+ ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
+ int (*callback)(int status,
+ struct ib_sa_multicast *multicast),
+ void *context)
+{
+ struct mcast_device *dev;
+ struct mcast_member *member;
+ struct ib_sa_multicast *multicast;
+ int ret;
+
+ dev = ib_get_client_data(device, &mcast_client);
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ member = kmalloc(sizeof *member, gfp_mask);
+ if (!member)
+ return ERR_PTR(-ENOMEM);
+
+ ib_sa_client_get(client);
+ member->client = client;
+ member->multicast.rec = *rec;
+ member->multicast.comp_mask = comp_mask;
+ member->multicast.callback = callback;
+ member->multicast.context = context;
+ init_completion(&member->comp);
+ atomic_set(&member->refcount, 1);
+ member->state = MCAST_JOINING;
+
+ member->group = acquire_group(&dev->port[port_num - dev->start_port],
+ &rec->mgid, gfp_mask);
+ if (!member->group) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ /*
+ * The user will get the multicast structure in their callback. They
+ * could then free the multicast structure before we can return from
+ * this routine. So we save the pointer to return before queuing
+ * any callback.
+ */
+ multicast = &member->multicast;
+ queue_join(member);
+ return multicast;
+
+err:
+ ib_sa_client_put(client);
+ kfree(member);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_sa_join_multicast);
+
+void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
+{
+ struct mcast_member *member;
+ struct mcast_group *group;
+
+ member = container_of(multicast, struct mcast_member, multicast);
+ group = member->group;
+
+ spin_lock_irq(&group->lock);
+ if (member->state == MCAST_MEMBER)
+ adjust_membership(group, multicast->rec.join_state, -1);
+
+ list_del_init(&member->list);
+
+ if (group->state == MCAST_IDLE) {
+ group->state = MCAST_BUSY;
+ spin_unlock_irq(&group->lock);
+ /* Continue to hold reference on group until callback */
+ queue_work(mcast_wq, &group->work);
+ } else {
+ spin_unlock_irq(&group->lock);
+ release_group(group);
+ }
+
+ deref_member(member);
+ wait_for_completion(&member->comp);
+ ib_sa_client_put(member->client);
+ kfree(member);
+}
+EXPORT_SYMBOL(ib_sa_free_multicast);
+
+int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
+ union ib_gid *mgid, struct ib_sa_mcmember_rec *rec)
+{
+ struct mcast_device *dev;
+ struct mcast_port *port;
+ struct mcast_group *group;
+ unsigned long flags;
+ int ret = 0;
+
+ dev = ib_get_client_data(device, &mcast_client);
+ if (!dev)
+ return -ENODEV;
+
+ port = &dev->port[port_num - dev->start_port];
+ spin_lock_irqsave(&port->lock, flags);
+ group = mcast_find(port, mgid);
+ if (group)
+ *rec = group->rec;
+ else
+ ret = -EADDRNOTAVAIL;
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
+
+int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+ struct ib_sa_mcmember_rec *rec,
+ struct ib_ah_attr *ah_attr)
+{
+ int ret;
+ u16 gid_index;
+ u8 p;
+
+ ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index);
+ if (ret)
+ return ret;
+
+ memset(ah_attr, 0, sizeof *ah_attr);
+ ah_attr->dlid = be16_to_cpu(rec->mlid);
+ ah_attr->sl = rec->sl;
+ ah_attr->port_num = port_num;
+ ah_attr->static_rate = rec->rate;
+
+ ah_attr->ah_flags = IB_AH_GRH;
+ ah_attr->grh.dgid = rec->mgid;
+
+ ah_attr->grh.sgid_index = (u8) gid_index;
+ ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
+ ah_attr->grh.hop_limit = rec->hop_limit;
+ ah_attr->grh.traffic_class = rec->traffic_class;
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_init_ah_from_mcmember);
+
+static void mcast_groups_lost(struct mcast_port *port)
+{
+ struct mcast_group *group;
+ struct rb_node *node;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ for (node = rb_first(&port->table); node; node = rb_next(node)) {
+ group = rb_entry(node, struct mcast_group, node);
+ spin_lock(&group->lock);
+ if (group->state == MCAST_IDLE) {
+ atomic_inc(&group->refcount);
+ queue_work(mcast_wq, &group->work);
+ }
+ group->state = MCAST_ERROR;
+ spin_unlock(&group->lock);
+ }
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void mcast_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ struct mcast_device *dev;
+
+ dev = container_of(handler, struct mcast_device, event_handler);
+
+ switch (event->event) {
+ case IB_EVENT_PORT_ERR:
+ case IB_EVENT_LID_CHANGE:
+ case IB_EVENT_SM_CHANGE:
+ case IB_EVENT_CLIENT_REREGISTER:
+ mcast_groups_lost(&dev->port[event->element.port_num -
+ dev->start_port]);
+ break;
+ default:
+ break;
+ }
+}
+
+static void mcast_add_one(struct ib_device *device)
+{
+ struct mcast_device *dev;
+ struct mcast_port *port;
+ int i;
+
+ if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
+ dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
+ GFP_KERNEL);
+ if (!dev)
+ return;
+
+ if (device->node_type == RDMA_NODE_IB_SWITCH)
+ dev->start_port = dev->end_port = 0;
+ else {
+ dev->start_port = 1;
+ dev->end_port = device->phys_port_cnt;
+ }
+
+ for (i = 0; i <= dev->end_port - dev->start_port; i++) {
+ port = &dev->port[i];
+ port->dev = dev;
+ port->port_num = dev->start_port + i;
+ spin_lock_init(&port->lock);
+ port->table = RB_ROOT;
+ init_completion(&port->comp);
+ atomic_set(&port->refcount, 1);
+ }
+
+ dev->device = device;
+ ib_set_client_data(device, &mcast_client, dev);
+
+ INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
+ ib_register_event_handler(&dev->event_handler);
+}
+
+static void mcast_remove_one(struct ib_device *device)
+{
+ struct mcast_device *dev;
+ struct mcast_port *port;
+ int i;
+
+ dev = ib_get_client_data(device, &mcast_client);
+ if (!dev)
+ return;
+
+ ib_unregister_event_handler(&dev->event_handler);
+ flush_workqueue(mcast_wq);
+
+ for (i = 0; i <= dev->end_port - dev->start_port; i++) {
+ port = &dev->port[i];
+ deref_port(port);
+ wait_for_completion(&port->comp);
+ }
+
+ kfree(dev);
+}
+
+int mcast_init(void)
+{
+ int ret;
+
+ mcast_wq = create_singlethread_workqueue("ib_mcast");
+ if (!mcast_wq)
+ return -ENOMEM;
+
+ ib_sa_register_client(&sa_client);
+
+ ret = ib_register_client(&mcast_client);
+ if (ret)
+ goto err;
+ return 0;
+
+err:
+ ib_sa_unregister_client(&sa_client);
+ destroy_workqueue(mcast_wq);
+ return ret;
+}
+
+void mcast_cleanup(void)
+{
+ ib_unregister_client(&mcast_client);
+ ib_sa_unregister_client(&sa_client);
+ destroy_workqueue(mcast_wq);
+}
diff --git a/drivers/infiniband/core/sa.h b/drivers/infiniband/core/sa.h
new file mode 100644
index 000000000000..24c93fd320fb
--- /dev/null
+++ b/drivers/infiniband/core/sa.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef SA_H
+#define SA_H
+
+#include <rdma/ib_sa.h>
+
+static inline void ib_sa_client_get(struct ib_sa_client *client)
+{
+ atomic_inc(&client->users);
+}
+
+static inline void ib_sa_client_put(struct ib_sa_client *client)
+{
+ if (atomic_dec_and_test(&client->users))
+ complete(&client->comp);
+}
+
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ u8 method,
+ struct ib_sa_mcmember_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_mcmember_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query);
+
+int mcast_init(void);
+void mcast_cleanup(void);
+
+#endif /* SA_H */
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index e45afba75341..68db633711c5 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -47,8 +47,8 @@
#include <linux/workqueue.h>
#include <rdma/ib_pack.h>
-#include <rdma/ib_sa.h>
#include <rdma/ib_cache.h>
+#include "sa.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand subnet administration query support");
@@ -425,17 +425,6 @@ void ib_sa_register_client(struct ib_sa_client *client)
}
EXPORT_SYMBOL(ib_sa_register_client);
-static inline void ib_sa_client_get(struct ib_sa_client *client)
-{
- atomic_inc(&client->users);
-}
-
-static inline void ib_sa_client_put(struct ib_sa_client *client)
-{
- if (atomic_dec_and_test(&client->users))
- complete(&client->comp);
-}
-
void ib_sa_unregister_client(struct ib_sa_client *client)
{
ib_sa_client_put(client);
@@ -482,6 +471,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
ah_attr->sl = rec->sl;
ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f;
ah_attr->port_num = port_num;
+ ah_attr->static_rate = rec->rate;
if (rec->hop_limit > 1) {
ah_attr->ah_flags = IB_AH_GRH;
@@ -901,7 +891,6 @@ err1:
kfree(query);
return ret;
}
-EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
@@ -1053,14 +1042,27 @@ static int __init ib_sa_init(void)
get_random_bytes(&tid, sizeof tid);
ret = ib_register_client(&sa_client);
- if (ret)
+ if (ret) {
printk(KERN_ERR "Couldn't register ib_sa client\n");
+ goto err1;
+ }
+
+ ret = mcast_init();
+ if (ret) {
+ printk(KERN_ERR "Couldn't initialize multicast handling\n");
+ goto err2;
+ }
+ return 0;
+err2:
+ ib_unregister_client(&sa_client);
+err1:
return ret;
}
static void __exit ib_sa_cleanup(void)
{
+ mcast_cleanup();
ib_unregister_client(&sa_client);
idr_destroy(&query_idr);
}
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 709323c14c5d..000c086bf2e9 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -714,8 +714,6 @@ int ib_device_register_sysfs(struct ib_device *device)
if (ret)
goto err_put;
} else {
- int i;
-
for (i = 1; i <= device->phys_port_cnt; ++i) {
ret = add_port(device, i);
if (ret)
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index f15220a0ee75..ee51d79a7ad5 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1221,7 +1221,7 @@ static void ib_ucm_release_class_dev(struct class_device *class_dev)
kfree(dev);
}
-static struct file_operations ucm_fops = {
+static const struct file_operations ucm_fops = {
.owner = THIS_MODULE,
.open = ib_ucm_open,
.release = ib_ucm_close,
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index e2e8d329b443..b516b93b8550 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -70,10 +70,24 @@ struct ucma_context {
u64 uid;
struct list_head list;
+ struct list_head mc_list;
+};
+
+struct ucma_multicast {
+ struct ucma_context *ctx;
+ int id;
+ int events_reported;
+
+ u64 uid;
+ struct list_head list;
+ struct sockaddr addr;
+ u8 pad[sizeof(struct sockaddr_in6) -
+ sizeof(struct sockaddr)];
};
struct ucma_event {
struct ucma_context *ctx;
+ struct ucma_multicast *mc;
struct list_head list;
struct rdma_cm_id *cm_id;
struct rdma_ucm_event_resp resp;
@@ -81,6 +95,7 @@ struct ucma_event {
static DEFINE_MUTEX(mut);
static DEFINE_IDR(ctx_idr);
+static DEFINE_IDR(multicast_idr);
static inline struct ucma_context *_ucma_find_context(int id,
struct ucma_file *file)
@@ -124,6 +139,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
atomic_set(&ctx->ref, 1);
init_completion(&ctx->comp);
+ INIT_LIST_HEAD(&ctx->mc_list);
ctx->file = file;
do {
@@ -147,6 +163,37 @@ error:
return NULL;
}
+static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
+{
+ struct ucma_multicast *mc;
+ int ret;
+
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
+ if (!mc)
+ return NULL;
+
+ do {
+ ret = idr_pre_get(&multicast_idr, GFP_KERNEL);
+ if (!ret)
+ goto error;
+
+ mutex_lock(&mut);
+ ret = idr_get_new(&multicast_idr, mc, &mc->id);
+ mutex_unlock(&mut);
+ } while (ret == -EAGAIN);
+
+ if (ret)
+ goto error;
+
+ mc->ctx = ctx;
+ list_add_tail(&mc->list, &ctx->mc_list);
+ return mc;
+
+error:
+ kfree(mc);
+ return NULL;
+}
+
static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
struct rdma_conn_param *src)
{
@@ -180,8 +227,19 @@ static void ucma_set_event_context(struct ucma_context *ctx,
struct ucma_event *uevent)
{
uevent->ctx = ctx;
- uevent->resp.uid = ctx->uid;
- uevent->resp.id = ctx->id;
+ switch (event->event) {
+ case RDMA_CM_EVENT_MULTICAST_JOIN:
+ case RDMA_CM_EVENT_MULTICAST_ERROR:
+ uevent->mc = (struct ucma_multicast *)
+ event->param.ud.private_data;
+ uevent->resp.uid = uevent->mc->uid;
+ uevent->resp.id = uevent->mc->id;
+ break;
+ default:
+ uevent->resp.uid = ctx->uid;
+ uevent->resp.id = ctx->id;
+ break;
+ }
}
static int ucma_event_handler(struct rdma_cm_id *cm_id,
@@ -199,7 +257,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
ucma_set_event_context(ctx, event, uevent);
uevent->resp.event = event->event;
uevent->resp.status = event->status;
- if (cm_id->ps == RDMA_PS_UDP)
+ if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB)
ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
else
ucma_copy_conn_event(&uevent->resp.param.conn,
@@ -290,6 +348,8 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
list_del(&uevent->list);
uevent->ctx->events_reported++;
+ if (uevent->mc)
+ uevent->mc->events_reported++;
kfree(uevent);
done:
mutex_unlock(&file->mut);
@@ -342,6 +402,19 @@ err1:
return ret;
}
+static void ucma_cleanup_multicast(struct ucma_context *ctx)
+{
+ struct ucma_multicast *mc, *tmp;
+
+ mutex_lock(&mut);
+ list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
+ list_del(&mc->list);
+ idr_remove(&multicast_idr, mc->id);
+ kfree(mc);
+ }
+ mutex_unlock(&mut);
+}
+
static void ucma_cleanup_events(struct ucma_context *ctx)
{
struct ucma_event *uevent, *tmp;
@@ -360,6 +433,19 @@ static void ucma_cleanup_events(struct ucma_context *ctx)
}
}
+static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
+{
+ struct ucma_event *uevent, *tmp;
+
+ list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
+ if (uevent->mc != mc)
+ continue;
+
+ list_del(&uevent->list);
+ kfree(uevent);
+ }
+}
+
static int ucma_free_ctx(struct ucma_context *ctx)
{
int events_reported;
@@ -367,6 +453,8 @@ static int ucma_free_ctx(struct ucma_context *ctx)
/* No new events will be generated after destroying the id. */
rdma_destroy_id(ctx->cm_id);
+ ucma_cleanup_multicast(ctx);
+
/* Cleanup events not yet reported to the user. */
mutex_lock(&ctx->file->mut);
ucma_cleanup_events(ctx);
@@ -731,6 +819,114 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
return ret;
}
+static ssize_t ucma_join_multicast(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_join_mcast cmd;
+ struct rdma_ucm_create_id_resp resp;
+ struct ucma_context *ctx;
+ struct ucma_multicast *mc;
+ int ret;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ mutex_lock(&file->mut);
+ mc = ucma_alloc_multicast(ctx);
+ if (IS_ERR(mc)) {
+ ret = PTR_ERR(mc);
+ goto err1;
+ }
+
+ mc->uid = cmd.uid;
+ memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr);
+ ret = rdma_join_multicast(ctx->cm_id, &mc->addr, mc);
+ if (ret)
+ goto err2;
+
+ resp.id = mc->id;
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp))) {
+ ret = -EFAULT;
+ goto err3;
+ }
+
+ mutex_unlock(&file->mut);
+ ucma_put_ctx(ctx);
+ return 0;
+
+err3:
+ rdma_leave_multicast(ctx->cm_id, &mc->addr);
+ ucma_cleanup_mc_events(mc);
+err2:
+ mutex_lock(&mut);
+ idr_remove(&multicast_idr, mc->id);
+ mutex_unlock(&mut);
+ list_del(&mc->list);
+ kfree(mc);
+err1:
+ mutex_unlock(&file->mut);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_leave_multicast(struct ucma_file *file,
+ const char __user *inbuf,
+ int in_len, int out_len)
+{
+ struct rdma_ucm_destroy_id cmd;
+ struct rdma_ucm_destroy_id_resp resp;
+ struct ucma_multicast *mc;
+ int ret = 0;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ mutex_lock(&mut);
+ mc = idr_find(&multicast_idr, cmd.id);
+ if (!mc)
+ mc = ERR_PTR(-ENOENT);
+ else if (mc->ctx->file != file)
+ mc = ERR_PTR(-EINVAL);
+ else {
+ idr_remove(&multicast_idr, mc->id);
+ atomic_inc(&mc->ctx->ref);
+ }
+ mutex_unlock(&mut);
+
+ if (IS_ERR(mc)) {
+ ret = PTR_ERR(mc);
+ goto out;
+ }
+
+ rdma_leave_multicast(mc->ctx->cm_id, &mc->addr);
+ mutex_lock(&mc->ctx->file->mut);
+ ucma_cleanup_mc_events(mc);
+ list_del(&mc->list);
+ mutex_unlock(&mc->ctx->file->mut);
+
+ ucma_put_ctx(mc->ctx);
+ resp.events_reported = mc->events_reported;
+ kfree(mc);
+
+ if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ &resp, sizeof(resp)))
+ ret = -EFAULT;
+out:
+ return ret;
+}
+
static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len) = {
@@ -750,6 +946,8 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
[RDMA_USER_CM_CMD_GET_OPTION] = NULL,
[RDMA_USER_CM_CMD_SET_OPTION] = NULL,
[RDMA_USER_CM_CMD_NOTIFY] = ucma_notify,
+ [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast,
+ [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast,
};
static ssize_t ucma_write(struct file *filp, const char __user *buf,
@@ -833,7 +1031,7 @@ static int ucma_close(struct inode *inode, struct file *filp)
return 0;
}
-static struct file_operations ucma_fops = {
+static const struct file_operations ucma_fops = {
.owner = THIS_MODULE,
.open = ucma_open,
.release = ucma_close,
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 807fbd6b8414..c069ebeba8e3 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -771,7 +771,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
return 0;
}
-static struct file_operations umad_fops = {
+static const struct file_operations umad_fops = {
.owner = THIS_MODULE,
.read = ib_umad_read,
.write = ib_umad_write,
@@ -846,7 +846,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
return ret;
}
-static struct file_operations umad_sm_fops = {
+static const struct file_operations umad_sm_fops = {
.owner = THIS_MODULE,
.open = ib_umad_sm_open,
.release = ib_umad_sm_close
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 743247ec065e..df1efbc10882 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -933,7 +933,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
resp->wc[i].vendor_err = wc[i].vendor_err;
resp->wc[i].byte_len = wc[i].byte_len;
resp->wc[i].imm_data = (__u32 __force) wc[i].imm_data;
- resp->wc[i].qp_num = wc[i].qp_num;
+ resp->wc[i].qp_num = wc[i].qp->qp_num;
resp->wc[i].src_qp = wc[i].src_qp;
resp->wc[i].wc_flags = wc[i].wc_flags;
resp->wc[i].pkey_index = wc[i].pkey_index;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index a617ca7b6923..f8bc822a3cc3 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -375,7 +375,7 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
return 0;
}
-static struct file_operations uverbs_event_fops = {
+static const struct file_operations uverbs_event_fops = {
.owner = THIS_MODULE,
.read = ib_uverbs_event_read,
.poll = ib_uverbs_event_poll,
@@ -679,14 +679,14 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
return 0;
}
-static struct file_operations uverbs_fops = {
+static const struct file_operations uverbs_fops = {
.owner = THIS_MODULE,
.write = ib_uverbs_write,
.open = ib_uverbs_open,
.release = ib_uverbs_close
};
-static struct file_operations uverbs_mmap_fops = {
+static const struct file_operations uverbs_mmap_fops = {
.owner = THIS_MODULE,
.write = ib_uverbs_write,
.mmap = ib_uverbs_mmap,
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 27fe242ed435..59243d9aedd6 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -1073,7 +1073,7 @@ static int __devinit c2_probe(struct pci_dev *pcidev,
0xffffc000) / sizeof(struct c2_rxp_desc);
/* Request an interrupt line for the driver */
- ret = request_irq(pcidev->irq, c2_interrupt, SA_SHIRQ, DRV_NAME, c2dev);
+ ret = request_irq(pcidev->irq, c2_interrupt, IRQF_SHARED, DRV_NAME, c2dev);
if (ret) {
printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
pci_name(pcidev), pcidev->irq);
diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
index 05c9154d46f4..5175c99ee586 100644
--- a/drivers/infiniband/hw/amso1100/c2_cq.c
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -153,7 +153,7 @@ static inline int c2_poll_one(struct c2_dev *c2dev,
entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce));
entry->wr_id = ce->hdr.context;
- entry->qp_num = ce->handle;
+ entry->qp = &qp->ibqp;
entry->wc_flags = 0;
entry->slid = 0;
entry->sl = 0;
diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig
new file mode 100644
index 000000000000..77977f55dca3
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/Kconfig
@@ -0,0 +1,27 @@
+config INFINIBAND_CXGB3
+ tristate "Chelsio RDMA Driver"
+ depends on CHELSIO_T3 && INFINIBAND && INET
+ select GENERIC_ALLOCATOR
+ ---help---
+ This is an iWARP/RDMA driver for the Chelsio T3 1GbE and
+ 10GbE adapters.
+
+ For general information about Chelsio and our products, visit
+ our website at <http://www.chelsio.com>.
+
+ For customer support, please visit our customer support page at
+ <http://www.chelsio.com/support.htm>.
+
+ Please send feedback to <linux-bugs@chelsio.com>.
+
+ To compile this driver as a module, choose M here: the module
+ will be called iw_cxgb3.
+
+config INFINIBAND_CXGB3_DEBUG
+ bool "Verbose debugging output"
+ depends on INFINIBAND_CXGB3
+ default n
+ ---help---
+ This option causes the Chelsio RDMA driver to produce copious
+ amounts of debug messages. Select this if you are developing
+ the driver or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile
new file mode 100644
index 000000000000..0e110f32f128
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/Makefile
@@ -0,0 +1,12 @@
+EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/cxgb3 \
+ -I$(TOPDIR)/drivers/infiniband/hw/cxgb3/core
+
+obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o
+
+iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \
+ iwch_provider.o iwch.o cxio_hal.o cxio_resource.o
+
+ifdef CONFIG_INFINIBAND_CXGB3_DEBUG
+EXTRA_CFLAGS += -DDEBUG
+iw_cxgb3-y += cxio_dbg.o
+endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
new file mode 100644
index 000000000000..75f7b16a271d
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef DEBUG
+#include <linux/types.h>
+#include "common.h"
+#include "cxgb3_ioctl.h"
+#include "cxio_hal.h"
+#include "cxio_wr.h"
+
+void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag)
+{
+ struct ch_mem_range *m;
+ u64 *data;
+ int rc;
+ int size = 32;
+
+ m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
+ if (!m) {
+ PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
+ return;
+ }
+ m->mem_id = MEM_PMRX;
+ m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
+ m->len = size;
+ PDBG("%s TPT addr 0x%x len %d\n", __FUNCTION__, m->addr, m->len);
+ rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ if (rc) {
+ PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
+ kfree(m);
+ return;
+ }
+
+ data = (u64 *)m->buf;
+ while (size > 0) {
+ PDBG("TPT %08x: %016llx\n", m->addr, (unsigned long long) *data);
+ size -= 8;
+ data++;
+ m->addr += 8;
+ }
+ kfree(m);
+}
+
+void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift)
+{
+ struct ch_mem_range *m;
+ u64 *data;
+ int rc;
+ int size, npages;
+
+ shift += 12;
+ npages = (len + (1ULL << shift) - 1) >> shift;
+ size = npages * sizeof(u64);
+
+ m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
+ if (!m) {
+ PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
+ return;
+ }
+ m->mem_id = MEM_PMRX;
+ m->addr = pbl_addr;
+ m->len = size;
+ PDBG("%s PBL addr 0x%x len %d depth %d\n",
+ __FUNCTION__, m->addr, m->len, npages);
+ rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ if (rc) {
+ PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
+ kfree(m);
+ return;
+ }
+
+ data = (u64 *)m->buf;
+ while (size > 0) {
+ PDBG("PBL %08x: %016llx\n", m->addr, (unsigned long long) *data);
+ size -= 8;
+ data++;
+ m->addr += 8;
+ }
+ kfree(m);
+}
+
+void cxio_dump_wqe(union t3_wr *wqe)
+{
+ __be64 *data = (__be64 *)wqe;
+ uint size = (uint)(be64_to_cpu(*data) & 0xff);
+
+ if (size == 0)
+ size = 8;
+ while (size > 0) {
+ PDBG("WQE %p: %016llx\n", data,
+ (unsigned long long) be64_to_cpu(*data));
+ size--;
+ data++;
+ }
+}
+
+void cxio_dump_wce(struct t3_cqe *wce)
+{
+ __be64 *data = (__be64 *)wce;
+ int size = sizeof(*wce);
+
+ while (size > 0) {
+ PDBG("WCE %p: %016llx\n", data,
+ (unsigned long long) be64_to_cpu(*data));
+ size -= 8;
+ data++;
+ }
+}
+
+void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents)
+{
+ struct ch_mem_range *m;
+ int size = nents * 64;
+ u64 *data;
+ int rc;
+
+ m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
+ if (!m) {
+ PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
+ return;
+ }
+ m->mem_id = MEM_PMRX;
+ m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
+ m->len = size;
+ PDBG("%s RQT addr 0x%x len %d\n", __FUNCTION__, m->addr, m->len);
+ rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ if (rc) {
+ PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
+ kfree(m);
+ return;
+ }
+
+ data = (u64 *)m->buf;
+ while (size > 0) {
+ PDBG("RQT %08x: %016llx\n", m->addr, (unsigned long long) *data);
+ size -= 8;
+ data++;
+ m->addr += 8;
+ }
+ kfree(m);
+}
+
+void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid)
+{
+ struct ch_mem_range *m;
+ int size = TCB_SIZE;
+ u32 *data;
+ int rc;
+
+ m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
+ if (!m) {
+ PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
+ return;
+ }
+ m->mem_id = MEM_CM;
+ m->addr = hwtid * size;
+ m->len = size;
+ PDBG("%s TCB %d len %d\n", __FUNCTION__, m->addr, m->len);
+ rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ if (rc) {
+ PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
+ kfree(m);
+ return;
+ }
+
+ data = (u32 *)m->buf;
+ while (size > 0) {
+ printk("%2u: %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ m->addr,
+ *(data+2), *(data+3), *(data),*(data+1),
+ *(data+6), *(data+7), *(data+4), *(data+5));
+ size -= 32;
+ data += 8;
+ m->addr += 32;
+ }
+ kfree(m);
+}
+#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
new file mode 100644
index 000000000000..114ac3b775dc
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -0,0 +1,1279 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <asm/delay.h>
+
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+
+#include "cxio_resource.h"
+#include "cxio_hal.h"
+#include "cxgb3_offload.h"
+#include "sge_defs.h"
+
+static LIST_HEAD(rdev_list);
+static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL;
+
+static inline struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name)
+{
+ struct cxio_rdev *rdev;
+
+ list_for_each_entry(rdev, &rdev_list, entry)
+ if (!strcmp(rdev->dev_name, dev_name))
+ return rdev;
+ return NULL;
+}
+
+static inline struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev
+ *tdev)
+{
+ struct cxio_rdev *rdev;
+
+ list_for_each_entry(rdev, &rdev_list, entry)
+ if (rdev->t3cdev_p == tdev)
+ return rdev;
+ return NULL;
+}
+
+int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
+ enum t3_cq_opcode op, u32 credit)
+{
+ int ret;
+ struct t3_cqe *cqe;
+ u32 rptr;
+
+ struct rdma_cq_op setup;
+ setup.id = cq->cqid;
+ setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0;
+ setup.op = op;
+ ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup);
+
+ if ((ret < 0) || (op == CQ_CREDIT_UPDATE))
+ return ret;
+
+ /*
+ * If the rearm returned an index other than our current index,
+ * then there might be CQE's in flight (being DMA'd). We must wait
+ * here for them to complete or the consumer can miss a notification.
+ */
+ if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) {
+ int i=0;
+
+ rptr = cq->rptr;
+
+ /*
+ * Keep the generation correct by bumping rptr until it
+ * matches the index returned by the rearm - 1.
+ */
+ while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret)
+ rptr++;
+
+ /*
+ * Now rptr is the index for the (last) cqe that was
+ * in-flight at the time the HW rearmed the CQ. We
+ * spin until that CQE is valid.
+ */
+ cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2);
+ while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
+ udelay(1);
+ if (i++ > 1000000) {
+ BUG_ON(1);
+ printk(KERN_ERR "%s: stalled rnic\n",
+ rdev_p->dev_name);
+ return -EIO;
+ }
+ }
+ }
+ return 0;
+}
+
+static inline int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
+{
+ struct rdma_cq_setup setup;
+ setup.id = cqid;
+ setup.base_addr = 0; /* NULL address */
+ setup.size = 0; /* disaable the CQ */
+ setup.credits = 0;
+ setup.credit_thres = 0;
+ setup.ovfl_mode = 0;
+ return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
+{
+ u64 sge_cmd;
+ struct t3_modify_qp_wr *wqe;
+ struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
+ if (!skb) {
+ PDBG("%s alloc_skb failed\n", __FUNCTION__);
+ return -ENOMEM;
+ }
+ wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
+ memset(wqe, 0, sizeof(*wqe));
+ build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 1, qpid, 7);
+ wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
+ sge_cmd = qpid << 8 | 3;
+ wqe->sge_cmd = cpu_to_be64(sge_cmd);
+ skb->priority = CPL_PRIORITY_CONTROL;
+ return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+}
+
+int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+{
+ struct rdma_cq_setup setup;
+ int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
+
+ cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
+ if (!cq->cqid)
+ return -ENOMEM;
+ cq->sw_queue = kzalloc(size, GFP_KERNEL);
+ if (!cq->sw_queue)
+ return -ENOMEM;
+ cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
+ (1UL << (cq->size_log2)) *
+ sizeof(struct t3_cqe),
+ &(cq->dma_addr), GFP_KERNEL);
+ if (!cq->queue) {
+ kfree(cq->sw_queue);
+ return -ENOMEM;
+ }
+ pci_unmap_addr_set(cq, mapping, cq->dma_addr);
+ memset(cq->queue, 0, size);
+ setup.id = cq->cqid;
+ setup.base_addr = (u64) (cq->dma_addr);
+ setup.size = 1UL << cq->size_log2;
+ setup.credits = 65535;
+ setup.credit_thres = 1;
+ if (rdev_p->t3cdev_p->type == T3B)
+ setup.ovfl_mode = 0;
+ else
+ setup.ovfl_mode = 1;
+ return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+{
+ struct rdma_cq_setup setup;
+ setup.id = cq->cqid;
+ setup.base_addr = (u64) (cq->dma_addr);
+ setup.size = 1UL << cq->size_log2;
+ setup.credits = setup.size;
+ setup.credit_thres = setup.size; /* TBD: overflow recovery */
+ setup.ovfl_mode = 1;
+ return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
+{
+ struct cxio_qpid_list *entry;
+ u32 qpid;
+ int i;
+
+ mutex_lock(&uctx->lock);
+ if (!list_empty(&uctx->qpids)) {
+ entry = list_entry(uctx->qpids.next, struct cxio_qpid_list,
+ entry);
+ list_del(&entry->entry);
+ qpid = entry->qpid;
+ kfree(entry);
+ } else {
+ qpid = cxio_hal_get_qpid(rdev_p->rscp);
+ if (!qpid)
+ goto out;
+ for (i = qpid+1; i & rdev_p->qpmask; i++) {
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ break;
+ entry->qpid = i;
+ list_add_tail(&entry->entry, &uctx->qpids);
+ }
+ }
+out:
+ mutex_unlock(&uctx->lock);
+ PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
+ return qpid;
+}
+
+static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid,
+ struct cxio_ucontext *uctx)
+{
+ struct cxio_qpid_list *entry;
+
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ return;
+ PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
+ entry->qpid = qpid;
+ mutex_lock(&uctx->lock);
+ list_add_tail(&entry->entry, &uctx->qpids);
+ mutex_unlock(&uctx->lock);
+}
+
+void cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
+{
+ struct list_head *pos, *nxt;
+ struct cxio_qpid_list *entry;
+
+ mutex_lock(&uctx->lock);
+ list_for_each_safe(pos, nxt, &uctx->qpids) {
+ entry = list_entry(pos, struct cxio_qpid_list, entry);
+ list_del_init(&entry->entry);
+ if (!(entry->qpid & rdev_p->qpmask))
+ cxio_hal_put_qpid(rdev_p->rscp, entry->qpid);
+ kfree(entry);
+ }
+ mutex_unlock(&uctx->lock);
+}
+
+void cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
+{
+ INIT_LIST_HEAD(&uctx->qpids);
+ mutex_init(&uctx->lock);
+}
+
+int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
+ struct t3_wq *wq, struct cxio_ucontext *uctx)
+{
+ int depth = 1UL << wq->size_log2;
+ int rqsize = 1UL << wq->rq_size_log2;
+
+ wq->qpid = get_qpid(rdev_p, uctx);
+ if (!wq->qpid)
+ return -ENOMEM;
+
+ wq->rq = kzalloc(depth * sizeof(u64), GFP_KERNEL);
+ if (!wq->rq)
+ goto err1;
+
+ wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize);
+ if (!wq->rq_addr)
+ goto err2;
+
+ wq->sq = kzalloc(depth * sizeof(struct t3_swsq), GFP_KERNEL);
+ if (!wq->sq)
+ goto err3;
+
+ wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
+ depth * sizeof(union t3_wr),
+ &(wq->dma_addr), GFP_KERNEL);
+ if (!wq->queue)
+ goto err4;
+
+ memset(wq->queue, 0, depth * sizeof(union t3_wr));
+ pci_unmap_addr_set(wq, mapping, wq->dma_addr);
+ wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
+ if (!kernel_domain)
+ wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
+ (wq->qpid << rdev_p->qpshift);
+ PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __FUNCTION__,
+ wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
+ return 0;
+err4:
+ kfree(wq->sq);
+err3:
+ cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize);
+err2:
+ kfree(wq->rq);
+err1:
+ put_qpid(rdev_p, wq->qpid, uctx);
+ return -ENOMEM;
+}
+
+int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+{
+ int err;
+ err = cxio_hal_clear_cq_ctx(rdev_p, cq->cqid);
+ kfree(cq->sw_queue);
+ dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
+ (1UL << (cq->size_log2))
+ * sizeof(struct t3_cqe), cq->queue,
+ pci_unmap_addr(cq, mapping));
+ cxio_hal_put_cqid(rdev_p->rscp, cq->cqid);
+ return err;
+}
+
+int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq,
+ struct cxio_ucontext *uctx)
+{
+ dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
+ (1UL << (wq->size_log2))
+ * sizeof(union t3_wr), wq->queue,
+ pci_unmap_addr(wq, mapping));
+ kfree(wq->sq);
+ cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2));
+ kfree(wq->rq);
+ put_qpid(rdev_p, wq->qpid, uctx);
+ return 0;
+}
+
+static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
+{
+ struct t3_cqe cqe;
+
+ PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __FUNCTION__,
+ wq, cq, cq->sw_rptr, cq->sw_wptr);
+ memset(&cqe, 0, sizeof(cqe));
+ cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
+ V_CQE_OPCODE(T3_SEND) |
+ V_CQE_TYPE(0) |
+ V_CQE_SWCQE(1) |
+ V_CQE_QPID(wq->qpid) |
+ V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
+ cq->size_log2)));
+ *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
+ cq->sw_wptr++;
+}
+
+void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
+{
+ u32 ptr;
+
+ PDBG("%s wq %p cq %p\n", __FUNCTION__, wq, cq);
+
+ /* flush RQ */
+ PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __FUNCTION__,
+ wq->rq_rptr, wq->rq_wptr, count);
+ ptr = wq->rq_rptr + count;
+ while (ptr++ != wq->rq_wptr)
+ insert_recv_cqe(wq, cq);
+}
+
+static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
+ struct t3_swsq *sqp)
+{
+ struct t3_cqe cqe;
+
+ PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __FUNCTION__,
+ wq, cq, cq->sw_rptr, cq->sw_wptr);
+ memset(&cqe, 0, sizeof(cqe));
+ cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
+ V_CQE_OPCODE(sqp->opcode) |
+ V_CQE_TYPE(1) |
+ V_CQE_SWCQE(1) |
+ V_CQE_QPID(wq->qpid) |
+ V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
+ cq->size_log2)));
+ cqe.u.scqe.wrid_hi = sqp->sq_wptr;
+
+ *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
+ cq->sw_wptr++;
+}
+
+void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
+{
+ __u32 ptr;
+ struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
+
+ ptr = wq->sq_rptr + count;
+ sqp += count;
+ while (ptr != wq->sq_wptr) {
+ insert_sq_cqe(wq, cq, sqp);
+ sqp++;
+ ptr++;
+ }
+}
+
+/*
+ * Move all CQEs from the HWCQ into the SWCQ.
+ */
+void cxio_flush_hw_cq(struct t3_cq *cq)
+{
+ struct t3_cqe *cqe, *swcqe;
+
+ PDBG("%s cq %p cqid 0x%x\n", __FUNCTION__, cq, cq->cqid);
+ cqe = cxio_next_hw_cqe(cq);
+ while (cqe) {
+ PDBG("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n",
+ __FUNCTION__, cq->rptr, cq->sw_wptr);
+ swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2);
+ *swcqe = *cqe;
+ swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
+ cq->sw_wptr++;
+ cq->rptr++;
+ cqe = cxio_next_hw_cqe(cq);
+ }
+}
+
+static inline int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
+{
+ if (CQE_OPCODE(*cqe) == T3_TERMINATE)
+ return 0;
+
+ if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe))
+ return 0;
+
+ if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe))
+ return 0;
+
+ if ((CQE_OPCODE(*cqe) == T3_SEND) && RQ_TYPE(*cqe) &&
+ Q_EMPTY(wq->rq_rptr, wq->rq_wptr))
+ return 0;
+
+ return 1;
+}
+
+void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
+{
+ struct t3_cqe *cqe;
+ u32 ptr;
+
+ *count = 0;
+ ptr = cq->sw_rptr;
+ while (!Q_EMPTY(ptr, cq->sw_wptr)) {
+ cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
+ if ((SQ_TYPE(*cqe) || (CQE_OPCODE(*cqe) == T3_READ_RESP)) &&
+ (CQE_QPID(*cqe) == wq->qpid))
+ (*count)++;
+ ptr++;
+ }
+ PDBG("%s cq %p count %d\n", __FUNCTION__, cq, *count);
+}
+
+void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
+{
+ struct t3_cqe *cqe;
+ u32 ptr;
+
+ *count = 0;
+ PDBG("%s count zero %d\n", __FUNCTION__, *count);
+ ptr = cq->sw_rptr;
+ while (!Q_EMPTY(ptr, cq->sw_wptr)) {
+ cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
+ if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) &&
+ (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq))
+ (*count)++;
+ ptr++;
+ }
+ PDBG("%s cq %p count %d\n", __FUNCTION__, cq, *count);
+}
+
+static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
+{
+ struct rdma_cq_setup setup;
+ setup.id = 0;
+ setup.base_addr = 0; /* NULL address */
+ setup.size = 1; /* enable the CQ */
+ setup.credits = 0;
+
+ /* force SGE to redirect to RspQ and interrupt */
+ setup.credit_thres = 0;
+ setup.ovfl_mode = 1;
+ return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
+{
+ int err;
+ u64 sge_cmd, ctx0, ctx1;
+ u64 base_addr;
+ struct t3_modify_qp_wr *wqe;
+ struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
+
+
+ if (!skb) {
+ PDBG("%s alloc_skb failed\n", __FUNCTION__);
+ return -ENOMEM;
+ }
+ err = cxio_hal_init_ctrl_cq(rdev_p);
+ if (err) {
+ PDBG("%s err %d initializing ctrl_cq\n", __FUNCTION__, err);
+ return err;
+ }
+ rdev_p->ctrl_qp.workq = dma_alloc_coherent(
+ &(rdev_p->rnic_info.pdev->dev),
+ (1 << T3_CTRL_QP_SIZE_LOG2) *
+ sizeof(union t3_wr),
+ &(rdev_p->ctrl_qp.dma_addr),
+ GFP_KERNEL);
+ if (!rdev_p->ctrl_qp.workq) {
+ PDBG("%s dma_alloc_coherent failed\n", __FUNCTION__);
+ return -ENOMEM;
+ }
+ pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
+ rdev_p->ctrl_qp.dma_addr);
+ rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
+ memset(rdev_p->ctrl_qp.workq, 0,
+ (1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr));
+
+ mutex_init(&rdev_p->ctrl_qp.lock);
+ init_waitqueue_head(&rdev_p->ctrl_qp.waitq);
+
+ /* update HW Ctrl QP context */
+ base_addr = rdev_p->ctrl_qp.dma_addr;
+ base_addr >>= 12;
+ ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) |
+ V_EC_BASE_LO((u32) base_addr & 0xffff));
+ ctx0 <<= 32;
+ ctx0 |= V_EC_CREDITS(FW_WR_NUM);
+ base_addr >>= 16;
+ ctx1 = (u32) base_addr;
+ base_addr >>= 32;
+ ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) |
+ V_EC_TYPE(0) | V_EC_GEN(1) |
+ V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
+ wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
+ memset(wqe, 0, sizeof(*wqe));
+ build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 1,
+ T3_CTL_QP_TID, 7);
+ wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
+ sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
+ wqe->sge_cmd = cpu_to_be64(sge_cmd);
+ wqe->ctx1 = cpu_to_be64(ctx1);
+ wqe->ctx0 = cpu_to_be64(ctx0);
+ PDBG("CtrlQP dma_addr 0x%llx workq %p size %d\n",
+ (unsigned long long) rdev_p->ctrl_qp.dma_addr,
+ rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
+ skb->priority = CPL_PRIORITY_CONTROL;
+ return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+}
+
+static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
+{
+ dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
+ (1UL << T3_CTRL_QP_SIZE_LOG2)
+ * sizeof(union t3_wr), rdev_p->ctrl_qp.workq,
+ pci_unmap_addr(&rdev_p->ctrl_qp, mapping));
+ return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID);
+}
+
+/* write len bytes of data into addr (32B aligned address)
+ * If data is NULL, clear len byte of memory to zero.
+ * caller aquires the ctrl_qp lock before the call
+ */
+static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
+ u32 len, void *data, int completion)
+{
+ u32 i, nr_wqe, copy_len;
+ u8 *copy_data;
+ u8 wr_len, utx_len; /* lenght in 8 byte flit */
+ enum t3_wr_flags flag;
+ __be64 *wqe;
+ u64 utx_cmd;
+ addr &= 0x7FFFFFF;
+ nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */
+ PDBG("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n",
+ __FUNCTION__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
+ nr_wqe, data, addr);
+ utx_len = 3; /* in 32B unit */
+ for (i = 0; i < nr_wqe; i++) {
+ if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr,
+ T3_CTRL_QP_SIZE_LOG2)) {
+ PDBG("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, "
+ "wait for more space i %d\n", __FUNCTION__,
+ rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
+ if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
+ !Q_FULL(rdev_p->ctrl_qp.rptr,
+ rdev_p->ctrl_qp.wptr,
+ T3_CTRL_QP_SIZE_LOG2))) {
+ PDBG("%s ctrl_qp workq interrupted\n",
+ __FUNCTION__);
+ return -ERESTARTSYS;
+ }
+ PDBG("%s ctrl_qp wakeup, continue posting work request "
+ "i %d\n", __FUNCTION__, i);
+ }
+ wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
+ (1 << T3_CTRL_QP_SIZE_LOG2)));
+ flag = 0;
+ if (i == (nr_wqe - 1)) {
+ /* last WQE */
+ flag = completion ? T3_COMPLETION_FLAG : 0;
+ if (len % 32)
+ utx_len = len / 32 + 1;
+ else
+ utx_len = len / 32;
+ }
+
+ /*
+ * Force a CQE to return the credit to the workq in case
+ * we posted more than half the max QP size of WRs
+ */
+ if ((i != 0) &&
+ (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) {
+ flag = T3_COMPLETION_FLAG;
+ PDBG("%s force completion at i %d\n", __FUNCTION__, i);
+ }
+
+ /* build the utx mem command */
+ wqe += (sizeof(struct t3_bypass_wr) >> 3);
+ utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3);
+ utx_cmd <<= 32;
+ utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1);
+ *wqe = cpu_to_be64(utx_cmd);
+ wqe++;
+ copy_data = (u8 *) data + i * 96;
+ copy_len = len > 96 ? 96 : len;
+
+ /* clear memory content if data is NULL */
+ if (data)
+ memcpy(wqe, copy_data, copy_len);
+ else
+ memset(wqe, 0, copy_len);
+ if (copy_len % 32)
+ memset(((u8 *) wqe) + copy_len, 0,
+ 32 - (copy_len % 32));
+ wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 +
+ (utx_len << 2);
+ wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
+ (1 << T3_CTRL_QP_SIZE_LOG2)));
+
+ /* wptr in the WRID[31:0] */
+ ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr;
+
+ /*
+ * This must be the last write with a memory barrier
+ * for the genbit
+ */
+ build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag,
+ Q_GENBIT(rdev_p->ctrl_qp.wptr,
+ T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID,
+ wr_len);
+ if (flag == T3_COMPLETION_FLAG)
+ ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID);
+ len -= 96;
+ rdev_p->ctrl_qp.wptr++;
+ }
+ return 0;
+}
+
+/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size
+ * OUT: stag index, actual pbl_size, pbl_addr allocated.
+ * TBD: shared memory region support
+ */
+static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
+ u32 *stag, u8 stag_state, u32 pdid,
+ enum tpt_mem_type type, enum tpt_mem_perm perm,
+ u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl,
+ u32 *pbl_size, u32 *pbl_addr)
+{
+ int err;
+ struct tpt_entry tpt;
+ u32 stag_idx;
+ u32 wptr;
+ int rereg = (*stag != T3_STAG_UNSET);
+
+ stag_state = stag_state > 0;
+ stag_idx = (*stag) >> 8;
+
+ if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) {
+ stag_idx = cxio_hal_get_stag(rdev_p->rscp);
+ if (!stag_idx)
+ return -ENOMEM;
+ *stag = (stag_idx << 8) | ((*stag) & 0xFF);
+ }
+ PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
+ __FUNCTION__, stag_state, type, pdid, stag_idx);
+
+ if (reset_tpt_entry)
+ cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3);
+ else if (!rereg) {
+ *pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3);
+ if (!*pbl_addr) {
+ return -ENOMEM;
+ }
+ }
+
+ mutex_lock(&rdev_p->ctrl_qp.lock);
+
+ /* write PBL first if any - update pbl only if pbl list exist */
+ if (pbl) {
+
+ PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
+ __FUNCTION__, *pbl_addr, rdev_p->rnic_info.pbl_base,
+ *pbl_size);
+ err = cxio_hal_ctrl_qp_write_mem(rdev_p,
+ (*pbl_addr >> 5),
+ (*pbl_size << 3), pbl, 0);
+ if (err)
+ goto ret;
+ }
+
+ /* write TPT entry */
+ if (reset_tpt_entry)
+ memset(&tpt, 0, sizeof(tpt));
+ else {
+ tpt.valid_stag_pdid = cpu_to_be32(F_TPT_VALID |
+ V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) |
+ V_TPT_STAG_STATE(stag_state) |
+ V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid));
+ BUG_ON(page_size >= 28);
+ tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) |
+ F_TPT_MW_BIND_ENABLE |
+ V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
+ V_TPT_PAGE_SIZE(page_size));
+ tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
+ cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3));
+ tpt.len = cpu_to_be32(len);
+ tpt.va_hi = cpu_to_be32((u32) (to >> 32));
+ tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
+ tpt.rsvd_bind_cnt_or_pstag = 0;
+ tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
+ cpu_to_be32(V_TPT_PBL_SIZE((*pbl_size) >> 2));
+ }
+ err = cxio_hal_ctrl_qp_write_mem(rdev_p,
+ stag_idx +
+ (rdev_p->rnic_info.tpt_base >> 5),
+ sizeof(tpt), &tpt, 1);
+
+ /* release the stag index to free pool */
+ if (reset_tpt_entry)
+ cxio_hal_put_stag(rdev_p->rscp, stag_idx);
+ret:
+ wptr = rdev_p->ctrl_qp.wptr;
+ mutex_unlock(&rdev_p->ctrl_qp.lock);
+ if (!err)
+ if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
+ SEQ32_GE(rdev_p->ctrl_qp.rptr,
+ wptr)))
+ return -ERESTARTSYS;
+ return err;
+}
+
+/* IN : stag key, pdid, pbl_size
+ * Out: stag index, actaul pbl_size, and pbl_addr allocated.
+ */
+int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid,
+ enum tpt_mem_perm perm, u32 * pbl_size, u32 * pbl_addr)
+{
+ *stag = T3_STAG_UNSET;
+ return (__cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR,
+ perm, 0, 0ULL, 0, 0, NULL, pbl_size, pbl_addr));
+}
+
+int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
+ enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+ u8 page_size, __be64 *pbl, u32 *pbl_size,
+ u32 *pbl_addr)
+{
+ *stag = T3_STAG_UNSET;
+ return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
+ zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+}
+
+int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
+ enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+ u8 page_size, __be64 *pbl, u32 *pbl_size,
+ u32 *pbl_addr)
+{
+ return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
+ zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+}
+
+int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
+ u32 pbl_addr)
+{
+ return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
+ &pbl_size, &pbl_addr);
+}
+
+int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
+{
+ u32 pbl_size = 0;
+ *stag = T3_STAG_UNSET;
+ return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
+ NULL, &pbl_size, NULL);
+}
+
+int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
+{
+ return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
+ NULL, NULL);
+}
+
+int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
+{
+ struct t3_rdma_init_wr *wqe;
+ struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+ PDBG("%s rdev_p %p\n", __FUNCTION__, rdev_p);
+ wqe = (struct t3_rdma_init_wr *) __skb_put(skb, sizeof(*wqe));
+ wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT));
+ wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) |
+ V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
+ wqe->wrid.id1 = 0;
+ wqe->qpid = cpu_to_be32(attr->qpid);
+ wqe->pdid = cpu_to_be32(attr->pdid);
+ wqe->scqid = cpu_to_be32(attr->scqid);
+ wqe->rcqid = cpu_to_be32(attr->rcqid);
+ wqe->rq_addr = cpu_to_be32(attr->rq_addr - rdev_p->rnic_info.rqt_base);
+ wqe->rq_size = cpu_to_be32(attr->rq_size);
+ wqe->mpaattrs = attr->mpaattrs;
+ wqe->qpcaps = attr->qpcaps;
+ wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
+ wqe->flags = cpu_to_be32(attr->flags);
+ wqe->ord = cpu_to_be32(attr->ord);
+ wqe->ird = cpu_to_be32(attr->ird);
+ wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
+ wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
+ wqe->rsvd = 0;
+ skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */
+ return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+}
+
+void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
+{
+ cxio_ev_cb = ev_cb;
+}
+
+void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
+{
+ cxio_ev_cb = NULL;
+}
+
+static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb)
+{
+ static int cnt;
+ struct cxio_rdev *rdev_p = NULL;
+ struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
+ PDBG("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x"
+ " se %0x notify %0x cqbranch %0x creditth %0x\n",
+ cnt, __FUNCTION__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
+ RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg),
+ RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
+ RSPQ_CREDIT_THRESH(rsp_msg));
+ PDBG("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d "
+ "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+ CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe),
+ CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
+ CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe),
+ CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+ rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
+ if (!rdev_p) {
+ PDBG("%s called by t3cdev %p with null ulp\n", __FUNCTION__,
+ t3cdev_p);
+ return 0;
+ }
+ if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
+ rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1;
+ wake_up_interruptible(&rdev_p->ctrl_qp.waitq);
+ dev_kfree_skb_irq(skb);
+ } else if (CQE_QPID(rsp_msg->cqe) == 0xfff8)
+ dev_kfree_skb_irq(skb);
+ else if (cxio_ev_cb)
+ (*cxio_ev_cb) (rdev_p, skb);
+ else
+ dev_kfree_skb_irq(skb);
+ cnt++;
+ return 0;
+}
+
+/* Caller takes care of locking if needed */
+int cxio_rdev_open(struct cxio_rdev *rdev_p)
+{
+ struct net_device *netdev_p = NULL;
+ int err = 0;
+ if (strlen(rdev_p->dev_name)) {
+ if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) {
+ return -EBUSY;
+ }
+ netdev_p = dev_get_by_name(rdev_p->dev_name);
+ if (!netdev_p) {
+ return -EINVAL;
+ }
+ dev_put(netdev_p);
+ } else if (rdev_p->t3cdev_p) {
+ if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) {
+ return -EBUSY;
+ }
+ netdev_p = rdev_p->t3cdev_p->lldev;
+ strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
+ T3_MAX_DEV_NAME_LEN);
+ } else {
+ PDBG("%s t3cdev_p or dev_name must be set\n", __FUNCTION__);
+ return -EINVAL;
+ }
+
+ list_add_tail(&rdev_p->entry, &rdev_list);
+
+ PDBG("%s opening rnic dev %s\n", __FUNCTION__, rdev_p->dev_name);
+ memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
+ if (!rdev_p->t3cdev_p)
+ rdev_p->t3cdev_p = T3CDEV(netdev_p);
+ rdev_p->t3cdev_p->ulp = (void *) rdev_p;
+ err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
+ &(rdev_p->rnic_info));
+ if (err) {
+ printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
+ __FUNCTION__, rdev_p->t3cdev_p, err);
+ goto err1;
+ }
+ err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
+ &(rdev_p->port_info));
+ if (err) {
+ printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
+ __FUNCTION__, rdev_p->t3cdev_p, err);
+ goto err1;
+ }
+
+ /*
+ * qpshift is the number of bits to shift the qpid left in order
+ * to get the correct address of the doorbell for that qp.
+ */
+ cxio_init_ucontext(rdev_p, &rdev_p->uctx);
+ rdev_p->qpshift = PAGE_SHIFT -
+ ilog2(65536 >>
+ ilog2(rdev_p->rnic_info.udbell_len >>
+ PAGE_SHIFT));
+ rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
+ rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
+ PDBG("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d "
+ "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n",
+ __FUNCTION__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
+ rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p),
+ rdev_p->rnic_info.pbl_base,
+ rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
+ rdev_p->rnic_info.rqt_top);
+ PDBG("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu "
+ "qpnr %d qpmask 0x%x\n",
+ rdev_p->rnic_info.udbell_len,
+ rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
+ rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
+
+ err = cxio_hal_init_ctrl_qp(rdev_p);
+ if (err) {
+ printk(KERN_ERR "%s error %d initializing ctrl_qp.\n",
+ __FUNCTION__, err);
+ goto err1;
+ }
+ err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0,
+ 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ,
+ T3_MAX_NUM_PD);
+ if (err) {
+ printk(KERN_ERR "%s error %d initializing hal resources.\n",
+ __FUNCTION__, err);
+ goto err2;
+ }
+ err = cxio_hal_pblpool_create(rdev_p);
+ if (err) {
+ printk(KERN_ERR "%s error %d initializing pbl mem pool.\n",
+ __FUNCTION__, err);
+ goto err3;
+ }
+ err = cxio_hal_rqtpool_create(rdev_p);
+ if (err) {
+ printk(KERN_ERR "%s error %d initializing rqt mem pool.\n",
+ __FUNCTION__, err);
+ goto err4;
+ }
+ return 0;
+err4:
+ cxio_hal_pblpool_destroy(rdev_p);
+err3:
+ cxio_hal_destroy_resource(rdev_p->rscp);
+err2:
+ cxio_hal_destroy_ctrl_qp(rdev_p);
+err1:
+ list_del(&rdev_p->entry);
+ return err;
+}
+
+void cxio_rdev_close(struct cxio_rdev *rdev_p)
+{
+ if (rdev_p) {
+ cxio_hal_pblpool_destroy(rdev_p);
+ cxio_hal_rqtpool_destroy(rdev_p);
+ list_del(&rdev_p->entry);
+ rdev_p->t3cdev_p->ulp = NULL;
+ cxio_hal_destroy_ctrl_qp(rdev_p);
+ cxio_hal_destroy_resource(rdev_p->rscp);
+ }
+}
+
+int __init cxio_hal_init(void)
+{
+ if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI))
+ return -ENOMEM;
+ t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
+ return 0;
+}
+
+void __exit cxio_hal_exit(void)
+{
+ struct cxio_rdev *rdev, *tmp;
+
+ t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL);
+ list_for_each_entry_safe(rdev, tmp, &rdev_list, entry)
+ cxio_rdev_close(rdev);
+ cxio_hal_destroy_rhdl_resource();
+}
+
+static inline void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
+{
+ struct t3_swsq *sqp;
+ __u32 ptr = wq->sq_rptr;
+ int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr);
+
+ sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
+ while (count--)
+ if (!sqp->signaled) {
+ ptr++;
+ sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
+ } else if (sqp->complete) {
+
+ /*
+ * Insert this completed cqe into the swcq.
+ */
+ PDBG("%s moving cqe into swcq sq idx %ld cq idx %ld\n",
+ __FUNCTION__, Q_PTR2IDX(ptr, wq->sq_size_log2),
+ Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
+ sqp->cqe.header |= htonl(V_CQE_SWCQE(1));
+ *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2))
+ = sqp->cqe;
+ cq->sw_wptr++;
+ sqp->signaled = 0;
+ break;
+ } else
+ break;
+}
+
+static inline void create_read_req_cqe(struct t3_wq *wq,
+ struct t3_cqe *hw_cqe,
+ struct t3_cqe *read_cqe)
+{
+ read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr;
+ read_cqe->len = wq->oldest_read->read_len;
+ read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) |
+ V_CQE_SWCQE(SW_CQE(*hw_cqe)) |
+ V_CQE_OPCODE(T3_READ_REQ) |
+ V_CQE_TYPE(1));
+}
+
+/*
+ * Return a ptr to the next read wr in the SWSQ or NULL.
+ */
+static inline void advance_oldest_read(struct t3_wq *wq)
+{
+
+ u32 rptr = wq->oldest_read - wq->sq + 1;
+ u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2);
+
+ while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) {
+ wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2);
+
+ if (wq->oldest_read->opcode == T3_READ_REQ)
+ return;
+ rptr++;
+ }
+ wq->oldest_read = NULL;
+}
+
+/*
+ * cxio_poll_cq
+ *
+ * Caller must:
+ * check the validity of the first CQE,
+ * supply the wq assicated with the qpid.
+ *
+ * credit: cq credit to return to sge.
+ * cqe_flushed: 1 iff the CQE is flushed.
+ * cqe: copy of the polled CQE.
+ *
+ * return value:
+ * 0 CQE returned,
+ * -1 CQE skipped, try again.
+ */
+int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
+ u8 *cqe_flushed, u64 *cookie, u32 *credit)
+{
+ int ret = 0;
+ struct t3_cqe *hw_cqe, read_cqe;
+
+ *cqe_flushed = 0;
+ *credit = 0;
+ hw_cqe = cxio_next_cqe(cq);
+
+ PDBG("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x"
+ " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+ __FUNCTION__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
+ CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe),
+ CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
+ CQE_WRID_LOW(*hw_cqe));
+
+ /*
+ * skip cqe's not affiliated with a QP.
+ */
+ if (wq == NULL) {
+ ret = -1;
+ goto skip_cqe;
+ }
+
+ /*
+ * Gotta tweak READ completions:
+ * 1) the cqe doesn't contain the sq_wptr from the wr.
+ * 2) opcode not reflected from the wr.
+ * 3) read_len not reflected from the wr.
+ * 4) cq_type is RQ_TYPE not SQ_TYPE.
+ */
+ if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) {
+
+ /*
+ * Don't write to the HWCQ, so create a new read req CQE
+ * in local memory.
+ */
+ create_read_req_cqe(wq, hw_cqe, &read_cqe);
+ hw_cqe = &read_cqe;
+ advance_oldest_read(wq);
+ }
+
+ /*
+ * T3A: Discard TERMINATE CQEs.
+ */
+ if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) {
+ ret = -1;
+ wq->error = 1;
+ goto skip_cqe;
+ }
+
+ if (CQE_STATUS(*hw_cqe) || wq->error) {
+ *cqe_flushed = wq->error;
+ wq->error = 1;
+
+ /*
+ * T3A inserts errors into the CQE. We cannot return
+ * these as work completions.
+ */
+ /* incoming write failures */
+ if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE)
+ && RQ_TYPE(*hw_cqe)) {
+ ret = -1;
+ goto skip_cqe;
+ }
+ /* incoming read request failures */
+ if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) {
+ ret = -1;
+ goto skip_cqe;
+ }
+
+ /* incoming SEND with no receive posted failures */
+ if ((CQE_OPCODE(*hw_cqe) == T3_SEND) && RQ_TYPE(*hw_cqe) &&
+ Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
+ ret = -1;
+ goto skip_cqe;
+ }
+ goto proc_cqe;
+ }
+
+ /*
+ * RECV completion.
+ */
+ if (RQ_TYPE(*hw_cqe)) {
+
+ /*
+ * HW only validates 4 bits of MSN. So we must validate that
+ * the MSN in the SEND is the next expected MSN. If its not,
+ * then we complete this with TPT_ERR_MSN and mark the wq in
+ * error.
+ */
+ if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) {
+ wq->error = 1;
+ hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN));
+ goto proc_cqe;
+ }
+ goto proc_cqe;
+ }
+
+ /*
+ * If we get here its a send completion.
+ *
+ * Handle out of order completion. These get stuffed
+ * in the SW SQ. Then the SW SQ is walked to move any
+ * now in-order completions into the SW CQ. This handles
+ * 2 cases:
+ * 1) reaping unsignaled WRs when the first subsequent
+ * signaled WR is completed.
+ * 2) out of order read completions.
+ */
+ if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) {
+ struct t3_swsq *sqp;
+
+ PDBG("%s out of order completion going in swsq at idx %ld\n",
+ __FUNCTION__,
+ Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2));
+ sqp = wq->sq +
+ Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2);
+ sqp->cqe = *hw_cqe;
+ sqp->complete = 1;
+ ret = -1;
+ goto flush_wq;
+ }
+
+proc_cqe:
+ *cqe = *hw_cqe;
+
+ /*
+ * Reap the associated WR(s) that are freed up with this
+ * completion.
+ */
+ if (SQ_TYPE(*hw_cqe)) {
+ wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
+ PDBG("%s completing sq idx %ld\n", __FUNCTION__,
+ Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
+ *cookie = (wq->sq +
+ Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id;
+ wq->sq_rptr++;
+ } else {
+ PDBG("%s completing rq idx %ld\n", __FUNCTION__,
+ Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
+ *cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
+ wq->rq_rptr++;
+ }
+
+flush_wq:
+ /*
+ * Flush any completed cqes that are now in-order.
+ */
+ flush_completed_wrs(wq, cq);
+
+skip_cqe:
+ if (SW_CQE(*hw_cqe)) {
+ PDBG("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n",
+ __FUNCTION__, cq, cq->cqid, cq->sw_rptr);
+ ++cq->sw_rptr;
+ } else {
+ PDBG("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n",
+ __FUNCTION__, cq, cq->cqid, cq->rptr);
+ ++cq->rptr;
+
+ /*
+ * T3A: compute credits.
+ */
+ if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1)))
+ || ((cq->rptr - cq->wptr) >= 128)) {
+ *credit = cq->rptr - cq->wptr;
+ cq->wptr = cq->rptr;
+ }
+ }
+ return ret;
+}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
new file mode 100644
index 000000000000..8ab04a7c6f6e
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __CXIO_HAL_H__
+#define __CXIO_HAL_H__
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+#include "t3_cpl.h"
+#include "t3cdev.h"
+#include "cxgb3_ctl_defs.h"
+#include "cxio_wr.h"
+
+#define T3_CTRL_QP_ID FW_RI_SGEEC_START
+#define T3_CTL_QP_TID FW_RI_TID_START
+#define T3_CTRL_QP_SIZE_LOG2 8
+#define T3_CTRL_CQ_ID 0
+
+/* TBD */
+#define T3_MAX_NUM_RI (1<<15)
+#define T3_MAX_NUM_QP (1<<15)
+#define T3_MAX_NUM_CQ (1<<15)
+#define T3_MAX_NUM_PD (1<<15)
+#define T3_MAX_PBL_SIZE 256
+#define T3_MAX_RQ_SIZE 1024
+#define T3_MAX_NUM_STAG (1<<15)
+
+#define T3_STAG_UNSET 0xffffffff
+
+#define T3_MAX_DEV_NAME_LEN 32
+
+struct cxio_hal_ctrl_qp {
+ u32 wptr;
+ u32 rptr;
+ struct mutex lock; /* for the wtpr, can sleep */
+ wait_queue_head_t waitq;/* wait for RspQ/CQE msg */
+ union t3_wr *workq; /* the work request queue */
+ dma_addr_t dma_addr; /* pci bus address of the workq */
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+ void __iomem *doorbell;
+};
+
+struct cxio_hal_resource {
+ struct kfifo *tpt_fifo;
+ spinlock_t tpt_fifo_lock;
+ struct kfifo *qpid_fifo;
+ spinlock_t qpid_fifo_lock;
+ struct kfifo *cqid_fifo;
+ spinlock_t cqid_fifo_lock;
+ struct kfifo *pdid_fifo;
+ spinlock_t pdid_fifo_lock;
+};
+
+struct cxio_qpid_list {
+ struct list_head entry;
+ u32 qpid;
+};
+
+struct cxio_ucontext {
+ struct list_head qpids;
+ struct mutex lock;
+};
+
+struct cxio_rdev {
+ char dev_name[T3_MAX_DEV_NAME_LEN];
+ struct t3cdev *t3cdev_p;
+ struct rdma_info rnic_info;
+ struct adap_ports port_info;
+ struct cxio_hal_resource *rscp;
+ struct cxio_hal_ctrl_qp ctrl_qp;
+ void *ulp;
+ unsigned long qpshift;
+ u32 qpnr;
+ u32 qpmask;
+ struct cxio_ucontext uctx;
+ struct gen_pool *pbl_pool;
+ struct gen_pool *rqt_pool;
+ struct list_head entry;
+};
+
+static inline int cxio_num_stags(struct cxio_rdev *rdev_p)
+{
+ return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5));
+}
+
+typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p,
+ struct sk_buff * skb);
+
+#define RSPQ_CQID(rsp) (be32_to_cpu(rsp->cq_ptrid) & 0xffff)
+#define RSPQ_CQPTR(rsp) ((be32_to_cpu(rsp->cq_ptrid) >> 16) & 0xffff)
+#define RSPQ_GENBIT(rsp) ((be32_to_cpu(rsp->flags) >> 16) & 1)
+#define RSPQ_OVERFLOW(rsp) ((be32_to_cpu(rsp->flags) >> 17) & 1)
+#define RSPQ_AN(rsp) ((be32_to_cpu(rsp->flags) >> 18) & 1)
+#define RSPQ_SE(rsp) ((be32_to_cpu(rsp->flags) >> 19) & 1)
+#define RSPQ_NOTIFY(rsp) ((be32_to_cpu(rsp->flags) >> 20) & 1)
+#define RSPQ_CQBRANCH(rsp) ((be32_to_cpu(rsp->flags) >> 21) & 1)
+#define RSPQ_CREDIT_THRESH(rsp) ((be32_to_cpu(rsp->flags) >> 22) & 1)
+
+struct respQ_msg_t {
+ __be32 flags; /* flit 0 */
+ __be32 cq_ptrid;
+ __be64 rsvd; /* flit 1 */
+ struct t3_cqe cqe; /* flits 2-3 */
+};
+
+enum t3_cq_opcode {
+ CQ_ARM_AN = 0x2,
+ CQ_ARM_SE = 0x6,
+ CQ_FORCE_AN = 0x3,
+ CQ_CREDIT_UPDATE = 0x7
+};
+
+int cxio_rdev_open(struct cxio_rdev *rdev);
+void cxio_rdev_close(struct cxio_rdev *rdev);
+int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
+ enum t3_cq_opcode op, u32 credit);
+int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev, u32 qpid);
+int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
+void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
+int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
+ struct cxio_ucontext *uctx);
+int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
+ struct cxio_ucontext *uctx);
+int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
+int cxio_allocate_stag(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
+ enum tpt_mem_perm perm, u32 * pbl_size, u32 * pbl_addr);
+int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
+ enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+ u8 page_size, __be64 *pbl, u32 *pbl_size,
+ u32 *pbl_addr);
+int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
+ enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+ u8 page_size, __be64 *pbl, u32 *pbl_size,
+ u32 *pbl_addr);
+int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
+ u32 pbl_addr);
+int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
+int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
+int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
+void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
+void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
+u32 cxio_hal_get_rhdl(void);
+void cxio_hal_put_rhdl(u32 rhdl);
+u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
+void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
+int __init cxio_hal_init(void);
+void __exit cxio_hal_exit(void);
+void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
+void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
+void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
+void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
+void cxio_flush_hw_cq(struct t3_cq *cq);
+int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
+ u8 *cqe_flushed, u64 *cookie, u32 *credit);
+
+#define MOD "iw_cxgb3: "
+#define PDBG(fmt, args...) pr_debug(MOD fmt, ## args)
+
+#ifdef DEBUG
+void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag);
+void cxio_dump_pbl(struct cxio_rdev *rev, u32 pbl_addr, uint len, u8 shift);
+void cxio_dump_wqe(union t3_wr *wqe);
+void cxio_dump_wce(struct t3_cqe *wce);
+void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents);
+void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid);
+#endif
+
+#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
new file mode 100644
index 000000000000..65bf577311aa
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/* Crude resource management */
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/kfifo.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include "cxio_resource.h"
+#include "cxio_hal.h"
+
+static struct kfifo *rhdl_fifo;
+static spinlock_t rhdl_fifo_lock;
+
+#define RANDOM_SIZE 16
+
+static int __cxio_init_resource_fifo(struct kfifo **fifo,
+ spinlock_t *fifo_lock,
+ u32 nr, u32 skip_low,
+ u32 skip_high,
+ int random)
+{
+ u32 i, j, entry = 0, idx;
+ u32 random_bytes;
+ u32 rarray[16];
+ spin_lock_init(fifo_lock);
+
+ *fifo = kfifo_alloc(nr * sizeof(u32), GFP_KERNEL, fifo_lock);
+ if (IS_ERR(*fifo))
+ return -ENOMEM;
+
+ for (i = 0; i < skip_low + skip_high; i++)
+ __kfifo_put(*fifo, (unsigned char *) &entry, sizeof(u32));
+ if (random) {
+ j = 0;
+ random_bytes = random32();
+ for (i = 0; i < RANDOM_SIZE; i++)
+ rarray[i] = i + skip_low;
+ for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) {
+ if (j >= RANDOM_SIZE) {
+ j = 0;
+ random_bytes = random32();
+ }
+ idx = (random_bytes >> (j * 2)) & 0xF;
+ __kfifo_put(*fifo,
+ (unsigned char *) &rarray[idx],
+ sizeof(u32));
+ rarray[idx] = i;
+ j++;
+ }
+ for (i = 0; i < RANDOM_SIZE; i++)
+ __kfifo_put(*fifo,
+ (unsigned char *) &rarray[i],
+ sizeof(u32));
+ } else
+ for (i = skip_low; i < nr - skip_high; i++)
+ __kfifo_put(*fifo, (unsigned char *) &i, sizeof(u32));
+
+ for (i = 0; i < skip_low + skip_high; i++)
+ kfifo_get(*fifo, (unsigned char *) &entry, sizeof(u32));
+ return 0;
+}
+
+static int cxio_init_resource_fifo(struct kfifo **fifo, spinlock_t * fifo_lock,
+ u32 nr, u32 skip_low, u32 skip_high)
+{
+ return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
+ skip_high, 0));
+}
+
+static int cxio_init_resource_fifo_random(struct kfifo **fifo,
+ spinlock_t * fifo_lock,
+ u32 nr, u32 skip_low, u32 skip_high)
+{
+
+ return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
+ skip_high, 1));
+}
+
+static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
+{
+ u32 i;
+
+ spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
+
+ rdev_p->rscp->qpid_fifo = kfifo_alloc(T3_MAX_NUM_QP * sizeof(u32),
+ GFP_KERNEL,
+ &rdev_p->rscp->qpid_fifo_lock);
+ if (IS_ERR(rdev_p->rscp->qpid_fifo))
+ return -ENOMEM;
+
+ for (i = 16; i < T3_MAX_NUM_QP; i++)
+ if (!(i & rdev_p->qpmask))
+ __kfifo_put(rdev_p->rscp->qpid_fifo,
+ (unsigned char *) &i, sizeof(u32));
+ return 0;
+}
+
+int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
+{
+ return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1,
+ 0);
+}
+
+void cxio_hal_destroy_rhdl_resource(void)
+{
+ kfifo_free(rhdl_fifo);
+}
+
+/* nr_* must be power of 2 */
+int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
+ u32 nr_tpt, u32 nr_pbl,
+ u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid)
+{
+ int err = 0;
+ struct cxio_hal_resource *rscp;
+
+ rscp = kmalloc(sizeof(*rscp), GFP_KERNEL);
+ if (!rscp)
+ return -ENOMEM;
+ rdev_p->rscp = rscp;
+ err = cxio_init_resource_fifo_random(&rscp->tpt_fifo,
+ &rscp->tpt_fifo_lock,
+ nr_tpt, 1, 0);
+ if (err)
+ goto tpt_err;
+ err = cxio_init_qpid_fifo(rdev_p);
+ if (err)
+ goto qpid_err;
+ err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock,
+ nr_cqid, 1, 0);
+ if (err)
+ goto cqid_err;
+ err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock,
+ nr_pdid, 1, 0);
+ if (err)
+ goto pdid_err;
+ return 0;
+pdid_err:
+ kfifo_free(rscp->cqid_fifo);
+cqid_err:
+ kfifo_free(rscp->qpid_fifo);
+qpid_err:
+ kfifo_free(rscp->tpt_fifo);
+tpt_err:
+ return -ENOMEM;
+}
+
+/*
+ * returns 0 if no resource available
+ */
+static inline u32 cxio_hal_get_resource(struct kfifo *fifo)
+{
+ u32 entry;
+ if (kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32)))
+ return entry;
+ else
+ return 0; /* fifo emptry */
+}
+
+static inline void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
+{
+ BUG_ON(kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32)) == 0);
+}
+
+u32 cxio_hal_get_rhdl(void)
+{
+ return cxio_hal_get_resource(rhdl_fifo);
+}
+
+void cxio_hal_put_rhdl(u32 rhdl)
+{
+ cxio_hal_put_resource(rhdl_fifo, rhdl);
+}
+
+u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
+{
+ return cxio_hal_get_resource(rscp->tpt_fifo);
+}
+
+void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
+{
+ cxio_hal_put_resource(rscp->tpt_fifo, stag);
+}
+
+u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
+{
+ u32 qpid = cxio_hal_get_resource(rscp->qpid_fifo);
+ PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
+ return qpid;
+}
+
+void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
+{
+ PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
+ cxio_hal_put_resource(rscp->qpid_fifo, qpid);
+}
+
+u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
+{
+ return cxio_hal_get_resource(rscp->cqid_fifo);
+}
+
+void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
+{
+ cxio_hal_put_resource(rscp->cqid_fifo, cqid);
+}
+
+u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
+{
+ return cxio_hal_get_resource(rscp->pdid_fifo);
+}
+
+void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
+{
+ cxio_hal_put_resource(rscp->pdid_fifo, pdid);
+}
+
+void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
+{
+ kfifo_free(rscp->tpt_fifo);
+ kfifo_free(rscp->cqid_fifo);
+ kfifo_free(rscp->qpid_fifo);
+ kfifo_free(rscp->pdid_fifo);
+ kfree(rscp);
+}
+
+/*
+ * PBL Memory Manager. Uses Linux generic allocator.
+ */
+
+#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */
+#define PBL_CHUNK 2*1024*1024
+
+u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
+{
+ unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size);
+ PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size);
+ return (u32)addr;
+}
+
+void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
+{
+ PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size);
+ gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size);
+}
+
+int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
+{
+ unsigned long i;
+ rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1);
+ if (rdev_p->pbl_pool)
+ for (i = rdev_p->rnic_info.pbl_base;
+ i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1;
+ i += PBL_CHUNK)
+ gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1);
+ return rdev_p->pbl_pool ? 0 : -ENOMEM;
+}
+
+void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
+{
+ gen_pool_destroy(rdev_p->pbl_pool);
+}
+
+/*
+ * RQT Memory Manager. Uses Linux generic allocator.
+ */
+
+#define MIN_RQT_SHIFT 10 /* 1KB == mini RQT size (16 entries) */
+#define RQT_CHUNK 2*1024*1024
+
+u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size)
+{
+ unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6);
+ PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size << 6);
+ return (u32)addr;
+}
+
+void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
+{
+ PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size << 6);
+ gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6);
+}
+
+int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p)
+{
+ unsigned long i;
+ rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1);
+ if (rdev_p->rqt_pool)
+ for (i = rdev_p->rnic_info.rqt_base;
+ i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1;
+ i += RQT_CHUNK)
+ gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1);
+ return rdev_p->rqt_pool ? 0 : -ENOMEM;
+}
+
+void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p)
+{
+ gen_pool_destroy(rdev_p->rqt_pool);
+}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h
new file mode 100644
index 000000000000..a2703a3d882d
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __CXIO_RESOURCE_H__
+#define __CXIO_RESOURCE_H__
+
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/kfifo.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/genalloc.h>
+#include "cxio_hal.h"
+
+extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl);
+extern void cxio_hal_destroy_rhdl_resource(void);
+extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
+ u32 nr_tpt, u32 nr_pbl,
+ u32 nr_rqt, u32 nr_qpid, u32 nr_cqid,
+ u32 nr_pdid);
+extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp);
+extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag);
+extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp);
+extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid);
+extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp);
+extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid);
+extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp);
+
+#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base )
+extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p);
+extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p);
+extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size);
+extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
+
+#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base )
+extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p);
+extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p);
+extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size);
+extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
+#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
new file mode 100644
index 000000000000..90d7b8972cb4
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -0,0 +1,684 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __CXIO_WR_H__
+#define __CXIO_WR_H__
+
+#include <asm/io.h>
+#include <linux/pci.h>
+#include <linux/timer.h>
+#include "firmware_exports.h"
+
+#define T3_MAX_SGE 4
+
+#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
+#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \
+ ((rptr)!=(wptr)) )
+#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1))
+#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<<size_log2)-((wptr)-(rptr)))
+#define Q_COUNT(rptr,wptr) ((wptr)-(rptr))
+#define Q_PTR2IDX(ptr,size_log2) (ptr & ((1UL<<size_log2)-1))
+
+static inline void ring_doorbell(void __iomem *doorbell, u32 qpid)
+{
+ writel(((1<<31) | qpid), doorbell);
+}
+
+#define SEQ32_GE(x,y) (!( (((u32) (x)) - ((u32) (y))) & 0x80000000 ))
+
+enum t3_wr_flags {
+ T3_COMPLETION_FLAG = 0x01,
+ T3_NOTIFY_FLAG = 0x02,
+ T3_SOLICITED_EVENT_FLAG = 0x04,
+ T3_READ_FENCE_FLAG = 0x08,
+ T3_LOCAL_FENCE_FLAG = 0x10
+} __attribute__ ((packed));
+
+enum t3_wr_opcode {
+ T3_WR_BP = FW_WROPCODE_RI_BYPASS,
+ T3_WR_SEND = FW_WROPCODE_RI_SEND,
+ T3_WR_WRITE = FW_WROPCODE_RI_RDMA_WRITE,
+ T3_WR_READ = FW_WROPCODE_RI_RDMA_READ,
+ T3_WR_INV_STAG = FW_WROPCODE_RI_LOCAL_INV,
+ T3_WR_BIND = FW_WROPCODE_RI_BIND_MW,
+ T3_WR_RCV = FW_WROPCODE_RI_RECEIVE,
+ T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT,
+ T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP
+} __attribute__ ((packed));
+
+enum t3_rdma_opcode {
+ T3_RDMA_WRITE, /* IETF RDMAP v1.0 ... */
+ T3_READ_REQ,
+ T3_READ_RESP,
+ T3_SEND,
+ T3_SEND_WITH_INV,
+ T3_SEND_WITH_SE,
+ T3_SEND_WITH_SE_INV,
+ T3_TERMINATE,
+ T3_RDMA_INIT, /* CHELSIO RI specific ... */
+ T3_BIND_MW,
+ T3_FAST_REGISTER,
+ T3_LOCAL_INV,
+ T3_QP_MOD,
+ T3_BYPASS
+} __attribute__ ((packed));
+
+static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop)
+{
+ switch (wrop) {
+ case T3_WR_BP: return T3_BYPASS;
+ case T3_WR_SEND: return T3_SEND;
+ case T3_WR_WRITE: return T3_RDMA_WRITE;
+ case T3_WR_READ: return T3_READ_REQ;
+ case T3_WR_INV_STAG: return T3_LOCAL_INV;
+ case T3_WR_BIND: return T3_BIND_MW;
+ case T3_WR_INIT: return T3_RDMA_INIT;
+ case T3_WR_QP_MOD: return T3_QP_MOD;
+ default: break;
+ }
+ return -1;
+}
+
+
+/* Work request id */
+union t3_wrid {
+ struct {
+ u32 hi;
+ u32 low;
+ } id0;
+ u64 id1;
+};
+
+#define WRID(wrid) (wrid.id1)
+#define WRID_GEN(wrid) (wrid.id0.wr_gen)
+#define WRID_IDX(wrid) (wrid.id0.wr_idx)
+#define WRID_LO(wrid) (wrid.id0.wr_lo)
+
+struct fw_riwrh {
+ __be32 op_seop_flags;
+ __be32 gen_tid_len;
+};
+
+#define S_FW_RIWR_OP 24
+#define M_FW_RIWR_OP 0xff
+#define V_FW_RIWR_OP(x) ((x) << S_FW_RIWR_OP)
+#define G_FW_RIWR_OP(x) ((((x) >> S_FW_RIWR_OP)) & M_FW_RIWR_OP)
+
+#define S_FW_RIWR_SOPEOP 22
+#define M_FW_RIWR_SOPEOP 0x3
+#define V_FW_RIWR_SOPEOP(x) ((x) << S_FW_RIWR_SOPEOP)
+
+#define S_FW_RIWR_FLAGS 8
+#define M_FW_RIWR_FLAGS 0x3fffff
+#define V_FW_RIWR_FLAGS(x) ((x) << S_FW_RIWR_FLAGS)
+#define G_FW_RIWR_FLAGS(x) ((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS)
+
+#define S_FW_RIWR_TID 8
+#define V_FW_RIWR_TID(x) ((x) << S_FW_RIWR_TID)
+
+#define S_FW_RIWR_LEN 0
+#define V_FW_RIWR_LEN(x) ((x) << S_FW_RIWR_LEN)
+
+#define S_FW_RIWR_GEN 31
+#define V_FW_RIWR_GEN(x) ((x) << S_FW_RIWR_GEN)
+
+struct t3_sge {
+ __be32 stag;
+ __be32 len;
+ __be64 to;
+};
+
+/* If num_sgle is zero, flit 5+ contains immediate data.*/
+struct t3_send_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+
+ u8 rdmaop; /* 2 */
+ u8 reserved[3];
+ __be32 rem_stag;
+ __be32 plen; /* 3 */
+ __be32 num_sgle;
+ struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */
+};
+
+struct t3_local_inv_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ __be32 stag; /* 2 */
+ __be32 reserved3;
+};
+
+struct t3_rdma_write_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ u8 rdmaop; /* 2 */
+ u8 reserved[3];
+ __be32 stag_sink;
+ __be64 to_sink; /* 3 */
+ __be32 plen; /* 4 */
+ __be32 num_sgle;
+ struct t3_sge sgl[T3_MAX_SGE]; /* 5+ */
+};
+
+struct t3_rdma_read_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ u8 rdmaop; /* 2 */
+ u8 reserved[3];
+ __be32 rem_stag;
+ __be64 rem_to; /* 3 */
+ __be32 local_stag; /* 4 */
+ __be32 local_len;
+ __be64 local_to; /* 5 */
+};
+
+enum t3_addr_type {
+ T3_VA_BASED_TO = 0x0,
+ T3_ZERO_BASED_TO = 0x1
+} __attribute__ ((packed));
+
+enum t3_mem_perms {
+ T3_MEM_ACCESS_LOCAL_READ = 0x1,
+ T3_MEM_ACCESS_LOCAL_WRITE = 0x2,
+ T3_MEM_ACCESS_REM_READ = 0x4,
+ T3_MEM_ACCESS_REM_WRITE = 0x8
+} __attribute__ ((packed));
+
+struct t3_bind_mw_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ u16 reserved; /* 2 */
+ u8 type;
+ u8 perms;
+ __be32 mr_stag;
+ __be32 mw_stag; /* 3 */
+ __be32 mw_len;
+ __be64 mw_va; /* 4 */
+ __be32 mr_pbl_addr; /* 5 */
+ u8 reserved2[3];
+ u8 mr_pagesz;
+};
+
+struct t3_receive_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ u8 pagesz[T3_MAX_SGE];
+ __be32 num_sgle; /* 2 */
+ struct t3_sge sgl[T3_MAX_SGE]; /* 3+ */
+ __be32 pbl_addr[T3_MAX_SGE];
+};
+
+struct t3_bypass_wr {
+ struct fw_riwrh wrh;
+ union t3_wrid wrid; /* 1 */
+};
+
+struct t3_modify_qp_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ __be32 flags; /* 2 */
+ __be32 quiesce; /* 2 */
+ __be32 max_ird; /* 3 */
+ __be32 max_ord; /* 3 */
+ __be64 sge_cmd; /* 4 */
+ __be64 ctx1; /* 5 */
+ __be64 ctx0; /* 6 */
+};
+
+enum t3_modify_qp_flags {
+ MODQP_QUIESCE = 0x01,
+ MODQP_MAX_IRD = 0x02,
+ MODQP_MAX_ORD = 0x04,
+ MODQP_WRITE_EC = 0x08,
+ MODQP_READ_EC = 0x10,
+};
+
+
+enum t3_mpa_attrs {
+ uP_RI_MPA_RX_MARKER_ENABLE = 0x1,
+ uP_RI_MPA_TX_MARKER_ENABLE = 0x2,
+ uP_RI_MPA_CRC_ENABLE = 0x4,
+ uP_RI_MPA_IETF_ENABLE = 0x8
+} __attribute__ ((packed));
+
+enum t3_qp_caps {
+ uP_RI_QP_RDMA_READ_ENABLE = 0x01,
+ uP_RI_QP_RDMA_WRITE_ENABLE = 0x02,
+ uP_RI_QP_BIND_ENABLE = 0x04,
+ uP_RI_QP_FAST_REGISTER_ENABLE = 0x08,
+ uP_RI_QP_STAG0_ENABLE = 0x10
+} __attribute__ ((packed));
+
+struct t3_rdma_init_attr {
+ u32 tid;
+ u32 qpid;
+ u32 pdid;
+ u32 scqid;
+ u32 rcqid;
+ u32 rq_addr;
+ u32 rq_size;
+ enum t3_mpa_attrs mpaattrs;
+ enum t3_qp_caps qpcaps;
+ u16 tcp_emss;
+ u32 ord;
+ u32 ird;
+ u64 qp_dma_addr;
+ u32 qp_dma_size;
+ u32 flags;
+};
+
+struct t3_rdma_init_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ __be32 qpid; /* 2 */
+ __be32 pdid;
+ __be32 scqid; /* 3 */
+ __be32 rcqid;
+ __be32 rq_addr; /* 4 */
+ __be32 rq_size;
+ u8 mpaattrs; /* 5 */
+ u8 qpcaps;
+ __be16 ulpdu_size;
+ __be32 flags; /* bits 31-1 - reservered */
+ /* bit 0 - set if RECV posted */
+ __be32 ord; /* 6 */
+ __be32 ird;
+ __be64 qp_dma_addr; /* 7 */
+ __be32 qp_dma_size; /* 8 */
+ u32 rsvd;
+};
+
+struct t3_genbit {
+ u64 flit[15];
+ __be64 genbit;
+};
+
+enum rdma_init_wr_flags {
+ RECVS_POSTED = 1,
+};
+
+union t3_wr {
+ struct t3_send_wr send;
+ struct t3_rdma_write_wr write;
+ struct t3_rdma_read_wr read;
+ struct t3_receive_wr recv;
+ struct t3_local_inv_wr local_inv;
+ struct t3_bind_mw_wr bind;
+ struct t3_bypass_wr bypass;
+ struct t3_rdma_init_wr init;
+ struct t3_modify_qp_wr qp_mod;
+ struct t3_genbit genbit;
+ u64 flit[16];
+};
+
+#define T3_SQ_CQE_FLIT 13
+#define T3_SQ_COOKIE_FLIT 14
+
+#define T3_RQ_COOKIE_FLIT 13
+#define T3_RQ_CQE_FLIT 14
+
+static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe)
+{
+ return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags));
+}
+
+static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op,
+ enum t3_wr_flags flags, u8 genbit, u32 tid,
+ u8 len)
+{
+ wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) |
+ V_FW_RIWR_SOPEOP(M_FW_RIWR_SOPEOP) |
+ V_FW_RIWR_FLAGS(flags));
+ wmb();
+ wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) |
+ V_FW_RIWR_TID(tid) |
+ V_FW_RIWR_LEN(len));
+ /* 2nd gen bit... */
+ ((union t3_wr *)wqe)->genbit.genbit = cpu_to_be64(genbit);
+}
+
+/*
+ * T3 ULP2_TX commands
+ */
+enum t3_utx_mem_op {
+ T3_UTX_MEM_READ = 2,
+ T3_UTX_MEM_WRITE = 3
+};
+
+/* T3 MC7 RDMA TPT entry format */
+
+enum tpt_mem_type {
+ TPT_NON_SHARED_MR = 0x0,
+ TPT_SHARED_MR = 0x1,
+ TPT_MW = 0x2,
+ TPT_MW_RELAXED_PROTECTION = 0x3
+};
+
+enum tpt_addr_type {
+ TPT_ZBTO = 0,
+ TPT_VATO = 1
+};
+
+enum tpt_mem_perm {
+ TPT_LOCAL_READ = 0x8,
+ TPT_LOCAL_WRITE = 0x4,
+ TPT_REMOTE_READ = 0x2,
+ TPT_REMOTE_WRITE = 0x1
+};
+
+struct tpt_entry {
+ __be32 valid_stag_pdid;
+ __be32 flags_pagesize_qpid;
+
+ __be32 rsvd_pbl_addr;
+ __be32 len;
+ __be32 va_hi;
+ __be32 va_low_or_fbo;
+
+ __be32 rsvd_bind_cnt_or_pstag;
+ __be32 rsvd_pbl_size;
+};
+
+#define S_TPT_VALID 31
+#define V_TPT_VALID(x) ((x) << S_TPT_VALID)
+#define F_TPT_VALID V_TPT_VALID(1U)
+
+#define S_TPT_STAG_KEY 23
+#define M_TPT_STAG_KEY 0xFF
+#define V_TPT_STAG_KEY(x) ((x) << S_TPT_STAG_KEY)
+#define G_TPT_STAG_KEY(x) (((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY)
+
+#define S_TPT_STAG_STATE 22
+#define V_TPT_STAG_STATE(x) ((x) << S_TPT_STAG_STATE)
+#define F_TPT_STAG_STATE V_TPT_STAG_STATE(1U)
+
+#define S_TPT_STAG_TYPE 20
+#define M_TPT_STAG_TYPE 0x3
+#define V_TPT_STAG_TYPE(x) ((x) << S_TPT_STAG_TYPE)
+#define G_TPT_STAG_TYPE(x) (((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE)
+
+#define S_TPT_PDID 0
+#define M_TPT_PDID 0xFFFFF
+#define V_TPT_PDID(x) ((x) << S_TPT_PDID)
+#define G_TPT_PDID(x) (((x) >> S_TPT_PDID) & M_TPT_PDID)
+
+#define S_TPT_PERM 28
+#define M_TPT_PERM 0xF
+#define V_TPT_PERM(x) ((x) << S_TPT_PERM)
+#define G_TPT_PERM(x) (((x) >> S_TPT_PERM) & M_TPT_PERM)
+
+#define S_TPT_REM_INV_DIS 27
+#define V_TPT_REM_INV_DIS(x) ((x) << S_TPT_REM_INV_DIS)
+#define F_TPT_REM_INV_DIS V_TPT_REM_INV_DIS(1U)
+
+#define S_TPT_ADDR_TYPE 26
+#define V_TPT_ADDR_TYPE(x) ((x) << S_TPT_ADDR_TYPE)
+#define F_TPT_ADDR_TYPE V_TPT_ADDR_TYPE(1U)
+
+#define S_TPT_MW_BIND_ENABLE 25
+#define V_TPT_MW_BIND_ENABLE(x) ((x) << S_TPT_MW_BIND_ENABLE)
+#define F_TPT_MW_BIND_ENABLE V_TPT_MW_BIND_ENABLE(1U)
+
+#define S_TPT_PAGE_SIZE 20
+#define M_TPT_PAGE_SIZE 0x1F
+#define V_TPT_PAGE_SIZE(x) ((x) << S_TPT_PAGE_SIZE)
+#define G_TPT_PAGE_SIZE(x) (((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE)
+
+#define S_TPT_PBL_ADDR 0
+#define M_TPT_PBL_ADDR 0x1FFFFFFF
+#define V_TPT_PBL_ADDR(x) ((x) << S_TPT_PBL_ADDR)
+#define G_TPT_PBL_ADDR(x) (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR)
+
+#define S_TPT_QPID 0
+#define M_TPT_QPID 0xFFFFF
+#define V_TPT_QPID(x) ((x) << S_TPT_QPID)
+#define G_TPT_QPID(x) (((x) >> S_TPT_QPID) & M_TPT_QPID)
+
+#define S_TPT_PSTAG 0
+#define M_TPT_PSTAG 0xFFFFFF
+#define V_TPT_PSTAG(x) ((x) << S_TPT_PSTAG)
+#define G_TPT_PSTAG(x) (((x) >> S_TPT_PSTAG) & M_TPT_PSTAG)
+
+#define S_TPT_PBL_SIZE 0
+#define M_TPT_PBL_SIZE 0xFFFFF
+#define V_TPT_PBL_SIZE(x) ((x) << S_TPT_PBL_SIZE)
+#define G_TPT_PBL_SIZE(x) (((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE)
+
+/*
+ * CQE defs
+ */
+struct t3_cqe {
+ __be32 header;
+ __be32 len;
+ union {
+ struct {
+ __be32 stag;
+ __be32 msn;
+ } rcqe;
+ struct {
+ u32 wrid_hi;
+ u32 wrid_low;
+ } scqe;
+ } u;
+};
+
+#define S_CQE_OOO 31
+#define M_CQE_OOO 0x1
+#define G_CQE_OOO(x) ((((x) >> S_CQE_OOO)) & M_CQE_OOO)
+#define V_CEQ_OOO(x) ((x)<<S_CQE_OOO)
+
+#define S_CQE_QPID 12
+#define M_CQE_QPID 0x7FFFF
+#define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID)
+#define V_CQE_QPID(x) ((x)<<S_CQE_QPID)
+
+#define S_CQE_SWCQE 11
+#define M_CQE_SWCQE 0x1
+#define G_CQE_SWCQE(x) ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE)
+#define V_CQE_SWCQE(x) ((x)<<S_CQE_SWCQE)
+
+#define S_CQE_GENBIT 10
+#define M_CQE_GENBIT 0x1
+#define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT)
+#define V_CQE_GENBIT(x) ((x)<<S_CQE_GENBIT)
+
+#define S_CQE_STATUS 5
+#define M_CQE_STATUS 0x1F
+#define G_CQE_STATUS(x) ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS)
+#define V_CQE_STATUS(x) ((x)<<S_CQE_STATUS)
+
+#define S_CQE_TYPE 4
+#define M_CQE_TYPE 0x1
+#define G_CQE_TYPE(x) ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE)
+#define V_CQE_TYPE(x) ((x)<<S_CQE_TYPE)
+
+#define S_CQE_OPCODE 0
+#define M_CQE_OPCODE 0xF
+#define G_CQE_OPCODE(x) ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE)
+#define V_CQE_OPCODE(x) ((x)<<S_CQE_OPCODE)
+
+#define SW_CQE(x) (G_CQE_SWCQE(be32_to_cpu((x).header)))
+#define CQE_OOO(x) (G_CQE_OOO(be32_to_cpu((x).header)))
+#define CQE_QPID(x) (G_CQE_QPID(be32_to_cpu((x).header)))
+#define CQE_GENBIT(x) (G_CQE_GENBIT(be32_to_cpu((x).header)))
+#define CQE_TYPE(x) (G_CQE_TYPE(be32_to_cpu((x).header)))
+#define SQ_TYPE(x) (CQE_TYPE((x)))
+#define RQ_TYPE(x) (!CQE_TYPE((x)))
+#define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x).header)))
+#define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x).header)))
+
+#define CQE_LEN(x) (be32_to_cpu((x).len))
+
+/* used for RQ completion processing */
+#define CQE_WRID_STAG(x) (be32_to_cpu((x).u.rcqe.stag))
+#define CQE_WRID_MSN(x) (be32_to_cpu((x).u.rcqe.msn))
+
+/* used for SQ completion processing */
+#define CQE_WRID_SQ_WPTR(x) ((x).u.scqe.wrid_hi)
+#define CQE_WRID_WPTR(x) ((x).u.scqe.wrid_low)
+
+/* generic accessor macros */
+#define CQE_WRID_HI(x) ((x).u.scqe.wrid_hi)
+#define CQE_WRID_LOW(x) ((x).u.scqe.wrid_low)
+
+#define TPT_ERR_SUCCESS 0x0
+#define TPT_ERR_STAG 0x1 /* STAG invalid: either the */
+ /* STAG is offlimt, being 0, */
+ /* or STAG_key mismatch */
+#define TPT_ERR_PDID 0x2 /* PDID mismatch */
+#define TPT_ERR_QPID 0x3 /* QPID mismatch */
+#define TPT_ERR_ACCESS 0x4 /* Invalid access right */
+#define TPT_ERR_WRAP 0x5 /* Wrap error */
+#define TPT_ERR_BOUND 0x6 /* base and bounds voilation */
+#define TPT_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */
+ /* shared memory region */
+#define TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */
+ /* shared memory region */
+#define TPT_ERR_ECC 0x9 /* ECC error detected */
+#define TPT_ERR_ECC_PSTAG 0xA /* ECC error detected when */
+ /* reading PSTAG for a MW */
+ /* Invalidate */
+#define TPT_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */
+ /* software error */
+#define TPT_ERR_SWFLUSH 0xC /* SW FLUSHED */
+#define TPT_ERR_CRC 0x10 /* CRC error */
+#define TPT_ERR_MARKER 0x11 /* Marker error */
+#define TPT_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */
+#define TPT_ERR_OUT_OF_RQE 0x13 /* out of RQE */
+#define TPT_ERR_DDP_VERSION 0x14 /* wrong DDP version */
+#define TPT_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */
+#define TPT_ERR_OPCODE 0x16 /* invalid rdma opcode */
+#define TPT_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */
+#define TPT_ERR_MSN 0x18 /* MSN error */
+#define TPT_ERR_TBIT 0x19 /* tag bit not set correctly */
+#define TPT_ERR_MO 0x1A /* MO not 0 for TERMINATE */
+ /* or READ_REQ */
+#define TPT_ERR_MSN_GAP 0x1B
+#define TPT_ERR_MSN_RANGE 0x1C
+#define TPT_ERR_IRD_OVERFLOW 0x1D
+#define TPT_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */
+ /* software error */
+#define TPT_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */
+ /* mismatch) */
+
+struct t3_swsq {
+ __u64 wr_id;
+ struct t3_cqe cqe;
+ __u32 sq_wptr;
+ __be32 read_len;
+ int opcode;
+ int complete;
+ int signaled;
+};
+
+/*
+ * A T3 WQ implements both the SQ and RQ.
+ */
+struct t3_wq {
+ union t3_wr *queue; /* DMA accessable memory */
+ dma_addr_t dma_addr; /* DMA address for HW */
+ DECLARE_PCI_UNMAP_ADDR(mapping) /* unmap kruft */
+ u32 error; /* 1 once we go to ERROR */
+ u32 qpid;
+ u32 wptr; /* idx to next available WR slot */
+ u32 size_log2; /* total wq size */
+ struct t3_swsq *sq; /* SW SQ */
+ struct t3_swsq *oldest_read; /* tracks oldest pending read */
+ u32 sq_wptr; /* sq_wptr - sq_rptr == count of */
+ u32 sq_rptr; /* pending wrs */
+ u32 sq_size_log2; /* sq size */
+ u64 *rq; /* SW RQ (holds consumer wr_ids */
+ u32 rq_wptr; /* rq_wptr - rq_rptr == count of */
+ u32 rq_rptr; /* pending wrs */
+ u64 *rq_oldest_wr; /* oldest wr on the SW RQ */
+ u32 rq_size_log2; /* rq size */
+ u32 rq_addr; /* rq adapter address */
+ void __iomem *doorbell; /* kernel db */
+ u64 udb; /* user db if any */
+};
+
+struct t3_cq {
+ u32 cqid;
+ u32 rptr;
+ u32 wptr;
+ u32 size_log2;
+ dma_addr_t dma_addr;
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+ struct t3_cqe *queue;
+ struct t3_cqe *sw_queue;
+ u32 sw_rptr;
+ u32 sw_wptr;
+};
+
+#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \
+ CQE_GENBIT(*cqe))
+
+static inline void cxio_set_wq_in_error(struct t3_wq *wq)
+{
+ wq->queue->flit[13] = 1;
+}
+
+static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
+{
+ struct t3_cqe *cqe;
+
+ cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
+ if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
+ return cqe;
+ return NULL;
+}
+
+static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq)
+{
+ struct t3_cqe *cqe;
+
+ if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
+ cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
+ return cqe;
+ }
+ return NULL;
+}
+
+static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq)
+{
+ struct t3_cqe *cqe;
+
+ if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
+ cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
+ return cqe;
+ }
+ cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
+ if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
+ return cqe;
+ return NULL;
+}
+
+#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
new file mode 100644
index 000000000000..0315c9d9fce9
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "cxgb3_offload.h"
+#include "iwch_provider.h"
+#include "iwch_user.h"
+#include "iwch.h"
+#include "iwch_cm.h"
+
+#define DRV_VERSION "1.1"
+
+MODULE_AUTHOR("Boyd Faulkner, Steve Wise");
+MODULE_DESCRIPTION("Chelsio T3 RDMA Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
+
+static void open_rnic_dev(struct t3cdev *);
+static void close_rnic_dev(struct t3cdev *);
+
+struct cxgb3_client t3c_client = {
+ .name = "iw_cxgb3",
+ .add = open_rnic_dev,
+ .remove = close_rnic_dev,
+ .handlers = t3c_handlers,
+ .redirect = iwch_ep_redirect
+};
+
+static LIST_HEAD(dev_list);
+static DEFINE_MUTEX(dev_mutex);
+
+static void rnic_init(struct iwch_dev *rnicp)
+{
+ PDBG("%s iwch_dev %p\n", __FUNCTION__, rnicp);
+ idr_init(&rnicp->cqidr);
+ idr_init(&rnicp->qpidr);
+ idr_init(&rnicp->mmidr);
+ spin_lock_init(&rnicp->lock);
+
+ rnicp->attr.vendor_id = 0x168;
+ rnicp->attr.vendor_part_id = 7;
+ rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
+ rnicp->attr.max_wrs = (1UL << 24) - 1;
+ rnicp->attr.max_sge_per_wr = T3_MAX_SGE;
+ rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE;
+ rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1;
+ rnicp->attr.max_cqes_per_cq = (1UL << 24) - 1;
+ rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev);
+ rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
+ rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
+ rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */
+ rnicp->attr.can_resize_wq = 0;
+ rnicp->attr.max_rdma_reads_per_qp = 8;
+ rnicp->attr.max_rdma_read_resources =
+ rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps;
+ rnicp->attr.max_rdma_read_qp_depth = 8; /* IRD */
+ rnicp->attr.max_rdma_read_depth =
+ rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps;
+ rnicp->attr.rq_overflow_handled = 0;
+ rnicp->attr.can_modify_ird = 0;
+ rnicp->attr.can_modify_ord = 0;
+ rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1;
+ rnicp->attr.stag0_value = 1;
+ rnicp->attr.zbva_support = 1;
+ rnicp->attr.local_invalidate_fence = 1;
+ rnicp->attr.cq_overflow_detection = 1;
+ return;
+}
+
+static void open_rnic_dev(struct t3cdev *tdev)
+{
+ struct iwch_dev *rnicp;
+ static int vers_printed;
+
+ PDBG("%s t3cdev %p\n", __FUNCTION__, tdev);
+ if (!vers_printed++)
+ printk(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
+ DRV_VERSION);
+ rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
+ if (!rnicp) {
+ printk(KERN_ERR MOD "Cannot allocate ib device\n");
+ return;
+ }
+ rnicp->rdev.ulp = rnicp;
+ rnicp->rdev.t3cdev_p = tdev;
+
+ mutex_lock(&dev_mutex);
+
+ if (cxio_rdev_open(&rnicp->rdev)) {
+ mutex_unlock(&dev_mutex);
+ printk(KERN_ERR MOD "Unable to open CXIO rdev\n");
+ ib_dealloc_device(&rnicp->ibdev);
+ return;
+ }
+
+ rnic_init(rnicp);
+
+ list_add_tail(&rnicp->entry, &dev_list);
+ mutex_unlock(&dev_mutex);
+
+ if (iwch_register_device(rnicp)) {
+ printk(KERN_ERR MOD "Unable to register device\n");
+ close_rnic_dev(tdev);
+ }
+ printk(KERN_INFO MOD "Initialized device %s\n",
+ pci_name(rnicp->rdev.rnic_info.pdev));
+ return;
+}
+
+static void close_rnic_dev(struct t3cdev *tdev)
+{
+ struct iwch_dev *dev, *tmp;
+ PDBG("%s t3cdev %p\n", __FUNCTION__, tdev);
+ mutex_lock(&dev_mutex);
+ list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
+ if (dev->rdev.t3cdev_p == tdev) {
+ list_del(&dev->entry);
+ iwch_unregister_device(dev);
+ cxio_rdev_close(&dev->rdev);
+ idr_destroy(&dev->cqidr);
+ idr_destroy(&dev->qpidr);
+ idr_destroy(&dev->mmidr);
+ ib_dealloc_device(&dev->ibdev);
+ break;
+ }
+ }
+ mutex_unlock(&dev_mutex);
+}
+
+static int __init iwch_init_module(void)
+{
+ int err;
+
+ err = cxio_hal_init();
+ if (err)
+ return err;
+ err = iwch_cm_init();
+ if (err)
+ return err;
+ cxio_register_ev_cb(iwch_ev_dispatch);
+ cxgb3_register_client(&t3c_client);
+ return 0;
+}
+
+static void __exit iwch_exit_module(void)
+{
+ cxgb3_unregister_client(&t3c_client);
+ cxio_unregister_ev_cb(iwch_ev_dispatch);
+ iwch_cm_term();
+ cxio_hal_exit();
+}
+
+module_init(iwch_init_module);
+module_exit(iwch_exit_module);
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
new file mode 100644
index 000000000000..caf4e6007a44
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IWCH_H__
+#define __IWCH_H__
+
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "cxio_hal.h"
+#include "cxgb3_offload.h"
+
+struct iwch_pd;
+struct iwch_cq;
+struct iwch_qp;
+struct iwch_mr;
+
+struct iwch_rnic_attributes {
+ u32 vendor_id;
+ u32 vendor_part_id;
+ u32 max_qps;
+ u32 max_wrs; /* Max for any SQ/RQ */
+ u32 max_sge_per_wr;
+ u32 max_sge_per_rdma_write_wr; /* for RDMA Write WR */
+ u32 max_cqs;
+ u32 max_cqes_per_cq;
+ u32 max_mem_regs;
+ u32 max_phys_buf_entries; /* for phys buf list */
+ u32 max_pds;
+
+ /*
+ * The memory page sizes supported by this RNIC.
+ * Bit position i in bitmap indicates page of
+ * size (4k)^i. Phys block list mode unsupported.
+ */
+ u32 mem_pgsizes_bitmask;
+ u8 can_resize_wq;
+
+ /*
+ * The maximum number of RDMA Reads that can be outstanding
+ * per QP with this RNIC as the target.
+ */
+ u32 max_rdma_reads_per_qp;
+
+ /*
+ * The maximum number of resources used for RDMA Reads
+ * by this RNIC with this RNIC as the target.
+ */
+ u32 max_rdma_read_resources;
+
+ /*
+ * The max depth per QP for initiation of RDMA Read
+ * by this RNIC.
+ */
+ u32 max_rdma_read_qp_depth;
+
+ /*
+ * The maximum depth for initiation of RDMA Read
+ * operations by this RNIC on all QPs
+ */
+ u32 max_rdma_read_depth;
+ u8 rq_overflow_handled;
+ u32 can_modify_ird;
+ u32 can_modify_ord;
+ u32 max_mem_windows;
+ u32 stag0_value;
+ u8 zbva_support;
+ u8 local_invalidate_fence;
+ u32 cq_overflow_detection;
+};
+
+struct iwch_dev {
+ struct ib_device ibdev;
+ struct cxio_rdev rdev;
+ u32 device_cap_flags;
+ struct iwch_rnic_attributes attr;
+ struct idr cqidr;
+ struct idr qpidr;
+ struct idr mmidr;
+ spinlock_t lock;
+ struct list_head entry;
+};
+
+static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct iwch_dev, ibdev);
+}
+
+static inline int t3b_device(const struct iwch_dev *rhp)
+{
+ return rhp->rdev.t3cdev_p->type == T3B;
+}
+
+static inline int t3a_device(const struct iwch_dev *rhp)
+{
+ return rhp->rdev.t3cdev_p->type == T3A;
+}
+
+static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid)
+{
+ return idr_find(&rhp->cqidr, cqid);
+}
+
+static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid)
+{
+ return idr_find(&rhp->qpidr, qpid);
+}
+
+static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid)
+{
+ return idr_find(&rhp->mmidr, mmid);
+}
+
+static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr,
+ void *handle, u32 id)
+{
+ int ret;
+ u32 newid;
+
+ do {
+ if (!idr_pre_get(idr, GFP_KERNEL)) {
+ return -ENOMEM;
+ }
+ spin_lock_irq(&rhp->lock);
+ ret = idr_get_new_above(idr, handle, id, &newid);
+ BUG_ON(newid != id);
+ spin_unlock_irq(&rhp->lock);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
+static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id)
+{
+ spin_lock_irq(&rhp->lock);
+ idr_remove(idr, id);
+ spin_unlock_irq(&rhp->lock);
+}
+
+extern struct cxgb3_client t3c_client;
+extern cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
+extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb);
+
+#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
new file mode 100644
index 000000000000..e5442e34b788
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -0,0 +1,2080 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+#include <linux/notifier.h>
+
+#include <net/neighbour.h>
+#include <net/netevent.h>
+#include <net/route.h>
+
+#include "tcb.h"
+#include "cxgb3_offload.h"
+#include "iwch.h"
+#include "iwch_provider.h"
+#include "iwch_cm.h"
+
+static char *states[] = {
+ "idle",
+ "listen",
+ "connecting",
+ "mpa_wait_req",
+ "mpa_req_sent",
+ "mpa_req_rcvd",
+ "mpa_rep_sent",
+ "fpdu_mode",
+ "aborting",
+ "closing",
+ "moribund",
+ "dead",
+ NULL,
+};
+
+static int ep_timeout_secs = 10;
+module_param(ep_timeout_secs, int, 0444);
+MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
+ "in seconds (default=10)");
+
+static int mpa_rev = 1;
+module_param(mpa_rev, int, 0444);
+MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
+ "1 is spec compliant. (default=1)");
+
+static int markers_enabled = 0;
+module_param(markers_enabled, int, 0444);
+MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
+
+static int crc_enabled = 1;
+module_param(crc_enabled, int, 0444);
+MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
+
+static int rcv_win = 256 * 1024;
+module_param(rcv_win, int, 0444);
+MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)");
+
+static int snd_win = 32 * 1024;
+module_param(snd_win, int, 0444);
+MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
+
+static unsigned int nocong = 0;
+module_param(nocong, uint, 0444);
+MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)");
+
+static unsigned int cong_flavor = 1;
+module_param(cong_flavor, uint, 0444);
+MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
+
+static void process_work(struct work_struct *work);
+static struct workqueue_struct *workq;
+static DECLARE_WORK(skb_work, process_work);
+
+static struct sk_buff_head rxq;
+static cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS];
+
+static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
+static void ep_timeout(unsigned long arg);
+static void connect_reply_upcall(struct iwch_ep *ep, int status);
+
+static void start_ep_timer(struct iwch_ep *ep)
+{
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ if (timer_pending(&ep->timer)) {
+ PDBG("%s stopped / restarted timer ep %p\n", __FUNCTION__, ep);
+ del_timer_sync(&ep->timer);
+ } else
+ get_ep(&ep->com);
+ ep->timer.expires = jiffies + ep_timeout_secs * HZ;
+ ep->timer.data = (unsigned long)ep;
+ ep->timer.function = ep_timeout;
+ add_timer(&ep->timer);
+}
+
+static void stop_ep_timer(struct iwch_ep *ep)
+{
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ del_timer_sync(&ep->timer);
+ put_ep(&ep->com);
+}
+
+static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
+{
+ struct cpl_tid_release *req;
+
+ skb = get_skb(skb, sizeof *req, GFP_KERNEL);
+ if (!skb)
+ return;
+ req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
+ skb->priority = CPL_PRIORITY_SETUP;
+ tdev->send(tdev, skb);
+ return;
+}
+
+int iwch_quiesce_tid(struct iwch_ep *ep)
+{
+ struct cpl_set_tcb_field *req;
+ struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+
+ if (!skb)
+ return -ENOMEM;
+ req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
+ req->reply = 0;
+ req->cpu_idx = 0;
+ req->word = htons(W_TCB_RX_QUIESCE);
+ req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
+ req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
+
+ skb->priority = CPL_PRIORITY_DATA;
+ ep->com.tdev->send(ep->com.tdev, skb);
+ return 0;
+}
+
+int iwch_resume_tid(struct iwch_ep *ep)
+{
+ struct cpl_set_tcb_field *req;
+ struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+
+ if (!skb)
+ return -ENOMEM;
+ req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
+ req->reply = 0;
+ req->cpu_idx = 0;
+ req->word = htons(W_TCB_RX_QUIESCE);
+ req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
+ req->val = 0;
+
+ skb->priority = CPL_PRIORITY_DATA;
+ ep->com.tdev->send(ep->com.tdev, skb);
+ return 0;
+}
+
+static void set_emss(struct iwch_ep *ep, u16 opt)
+{
+ PDBG("%s ep %p opt %u\n", __FUNCTION__, ep, opt);
+ ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
+ if (G_TCPOPT_TSTAMP(opt))
+ ep->emss -= 12;
+ if (ep->emss < 128)
+ ep->emss = 128;
+ PDBG("emss=%d\n", ep->emss);
+}
+
+static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
+{
+ unsigned long flags;
+ enum iwch_ep_state state;
+
+ spin_lock_irqsave(&epc->lock, flags);
+ state = epc->state;
+ spin_unlock_irqrestore(&epc->lock, flags);
+ return state;
+}
+
+static inline void __state_set(struct iwch_ep_common *epc,
+ enum iwch_ep_state new)
+{
+ epc->state = new;
+}
+
+static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&epc->lock, flags);
+ PDBG("%s - %s -> %s\n", __FUNCTION__, states[epc->state], states[new]);
+ __state_set(epc, new);
+ spin_unlock_irqrestore(&epc->lock, flags);
+ return;
+}
+
+static void *alloc_ep(int size, gfp_t gfp)
+{
+ struct iwch_ep_common *epc;
+
+ epc = kmalloc(size, gfp);
+ if (epc) {
+ memset(epc, 0, size);
+ kref_init(&epc->kref);
+ spin_lock_init(&epc->lock);
+ init_waitqueue_head(&epc->waitq);
+ }
+ PDBG("%s alloc ep %p\n", __FUNCTION__, epc);
+ return epc;
+}
+
+void __free_ep(struct kref *kref)
+{
+ struct iwch_ep_common *epc;
+ epc = container_of(kref, struct iwch_ep_common, kref);
+ PDBG("%s ep %p state %s\n", __FUNCTION__, epc, states[state_read(epc)]);
+ kfree(epc);
+}
+
+static void release_ep_resources(struct iwch_ep *ep)
+{
+ PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid);
+ cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
+ dst_release(ep->dst);
+ l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+ if (ep->com.tdev->type == T3B)
+ release_tid(ep->com.tdev, ep->hwtid, NULL);
+ put_ep(&ep->com);
+}
+
+static void process_work(struct work_struct *work)
+{
+ struct sk_buff *skb = NULL;
+ void *ep;
+ struct t3cdev *tdev;
+ int ret;
+
+ while ((skb = skb_dequeue(&rxq))) {
+ ep = *((void **) (skb->cb));
+ tdev = *((struct t3cdev **) (skb->cb + sizeof(void *)));
+ ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep);
+ if (ret & CPL_RET_BUF_DONE)
+ kfree_skb(skb);
+
+ /*
+ * ep was referenced in sched(), and is freed here.
+ */
+ put_ep((struct iwch_ep_common *)ep);
+ }
+}
+
+static int status2errno(int status)
+{
+ switch (status) {
+ case CPL_ERR_NONE:
+ return 0;
+ case CPL_ERR_CONN_RESET:
+ return -ECONNRESET;
+ case CPL_ERR_ARP_MISS:
+ return -EHOSTUNREACH;
+ case CPL_ERR_CONN_TIMEDOUT:
+ return -ETIMEDOUT;
+ case CPL_ERR_TCAM_FULL:
+ return -ENOMEM;
+ case CPL_ERR_CONN_EXIST:
+ return -EADDRINUSE;
+ default:
+ return -EIO;
+ }
+}
+
+/*
+ * Try and reuse skbs already allocated...
+ */
+static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
+{
+ if (skb) {
+ BUG_ON(skb_cloned(skb));
+ skb_trim(skb, 0);
+ skb_get(skb);
+ } else {
+ skb = alloc_skb(len, gfp);
+ }
+ return skb;
+}
+
+static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip,
+ __be32 peer_ip, __be16 local_port,
+ __be16 peer_port, u8 tos)
+{
+ struct rtable *rt;
+ struct flowi fl = {
+ .oif = 0,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = peer_ip,
+ .saddr = local_ip,
+ .tos = tos}
+ },
+ .proto = IPPROTO_TCP,
+ .uli_u = {
+ .ports = {
+ .sport = local_port,
+ .dport = peer_port}
+ }
+ };
+
+ if (ip_route_output_flow(&rt, &fl, NULL, 0))
+ return NULL;
+ return rt;
+}
+
+static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
+{
+ int i = 0;
+
+ while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
+ ++i;
+ return i;
+}
+
+static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
+{
+ PDBG("%s t3cdev %p\n", __FUNCTION__, dev);
+ kfree_skb(skb);
+}
+
+/*
+ * Handle an ARP failure for an active open.
+ */
+static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
+{
+ printk(KERN_ERR MOD "ARP failure duing connect\n");
+ kfree_skb(skb);
+}
+
+/*
+ * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
+ * and send it along.
+ */
+static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
+{
+ struct cpl_abort_req *req = cplhdr(skb);
+
+ PDBG("%s t3cdev %p\n", __FUNCTION__, dev);
+ req->cmd = CPL_ABORT_NO_RST;
+ cxgb3_ofld_send(dev, skb);
+}
+
+static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
+{
+ struct cpl_close_con_req *req;
+ struct sk_buff *skb;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ skb = get_skb(NULL, sizeof(*req), gfp);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - failed to alloc skb\n", __FUNCTION__);
+ return -ENOMEM;
+ }
+ skb->priority = CPL_PRIORITY_DATA;
+ set_arp_failure_handler(skb, arp_failure_discard);
+ req = (struct cpl_close_con_req *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
+ req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
+ l2t_send(ep->com.tdev, skb, ep->l2t);
+ return 0;
+}
+
+static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
+{
+ struct cpl_abort_req *req;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ skb = get_skb(skb, sizeof(*req), gfp);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
+ __FUNCTION__);
+ return -ENOMEM;
+ }
+ skb->priority = CPL_PRIORITY_DATA;
+ set_arp_failure_handler(skb, abort_arp_failure);
+ req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
+ req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
+ req->cmd = CPL_ABORT_SEND_RST;
+ l2t_send(ep->com.tdev, skb, ep->l2t);
+ return 0;
+}
+
+static int send_connect(struct iwch_ep *ep)
+{
+ struct cpl_act_open_req *req;
+ struct sk_buff *skb;
+ u32 opt0h, opt0l, opt2;
+ unsigned int mtu_idx;
+ int wscale;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+
+ skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
+ __FUNCTION__);
+ return -ENOMEM;
+ }
+ mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
+ wscale = compute_wscale(rcv_win);
+ opt0h = V_NAGLE(0) |
+ V_NO_CONG(nocong) |
+ V_KEEP_ALIVE(1) |
+ F_TCAM_BYPASS |
+ V_WND_SCALE(wscale) |
+ V_MSS_IDX(mtu_idx) |
+ V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
+ opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
+ opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor);
+ skb->priority = CPL_PRIORITY_SETUP;
+ set_arp_failure_handler(skb, act_open_req_arp_failure);
+
+ req = (struct cpl_act_open_req *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid));
+ req->local_port = ep->com.local_addr.sin_port;
+ req->peer_port = ep->com.remote_addr.sin_port;
+ req->local_ip = ep->com.local_addr.sin_addr.s_addr;
+ req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
+ req->opt0h = htonl(opt0h);
+ req->opt0l = htonl(opt0l);
+ req->params = 0;
+ req->opt2 = htonl(opt2);
+ l2t_send(ep->com.tdev, skb, ep->l2t);
+ return 0;
+}
+
+static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
+{
+ int mpalen;
+ struct tx_data_wr *req;
+ struct mpa_message *mpa;
+ int len;
+
+ PDBG("%s ep %p pd_len %d\n", __FUNCTION__, ep, ep->plen);
+
+ BUG_ON(skb_cloned(skb));
+
+ mpalen = sizeof(*mpa) + ep->plen;
+ if (skb->data + mpalen + sizeof(*req) > skb->end) {
+ kfree_skb(skb);
+ skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
+ if (!skb) {
+ connect_reply_upcall(ep, -ENOMEM);
+ return;
+ }
+ }
+ skb_trim(skb, 0);
+ skb_reserve(skb, sizeof(*req));
+ skb_put(skb, mpalen);
+ skb->priority = CPL_PRIORITY_DATA;
+ mpa = (struct mpa_message *) skb->data;
+ memset(mpa, 0, sizeof(*mpa));
+ memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
+ mpa->flags = (crc_enabled ? MPA_CRC : 0) |
+ (markers_enabled ? MPA_MARKERS : 0);
+ mpa->private_data_size = htons(ep->plen);
+ mpa->revision = mpa_rev;
+
+ if (ep->plen)
+ memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
+
+ /*
+ * Reference the mpa skb. This ensures the data area
+ * will remain in memory until the hw acks the tx.
+ * Function tx_ack() will deref it.
+ */
+ skb_get(skb);
+ set_arp_failure_handler(skb, arp_failure_discard);
+ skb->h.raw = skb->data;
+ len = skb->len;
+ req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
+ req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+ req->wr_lo = htonl(V_WR_TID(ep->hwtid));
+ req->len = htonl(len);
+ req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
+ V_TX_SNDBUF(snd_win>>15));
+ req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT);
+ req->sndseq = htonl(ep->snd_seq);
+ BUG_ON(ep->mpa_skb);
+ ep->mpa_skb = skb;
+ l2t_send(ep->com.tdev, skb, ep->l2t);
+ start_ep_timer(ep);
+ state_set(&ep->com, MPA_REQ_SENT);
+ return;
+}
+
+static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
+{
+ int mpalen;
+ struct tx_data_wr *req;
+ struct mpa_message *mpa;
+ struct sk_buff *skb;
+
+ PDBG("%s ep %p plen %d\n", __FUNCTION__, ep, plen);
+
+ mpalen = sizeof(*mpa) + plen;
+
+ skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __FUNCTION__);
+ return -ENOMEM;
+ }
+ skb_reserve(skb, sizeof(*req));
+ mpa = (struct mpa_message *) skb_put(skb, mpalen);
+ memset(mpa, 0, sizeof(*mpa));
+ memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
+ mpa->flags = MPA_REJECT;
+ mpa->revision = mpa_rev;
+ mpa->private_data_size = htons(plen);
+ if (plen)
+ memcpy(mpa->private_data, pdata, plen);
+
+ /*
+ * Reference the mpa skb again. This ensures the data area
+ * will remain in memory until the hw acks the tx.
+ * Function tx_ack() will deref it.
+ */
+ skb_get(skb);
+ skb->priority = CPL_PRIORITY_DATA;
+ set_arp_failure_handler(skb, arp_failure_discard);
+ skb->h.raw = skb->data;
+ req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
+ req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+ req->wr_lo = htonl(V_WR_TID(ep->hwtid));
+ req->len = htonl(mpalen);
+ req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
+ V_TX_SNDBUF(snd_win>>15));
+ req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT);
+ req->sndseq = htonl(ep->snd_seq);
+ BUG_ON(ep->mpa_skb);
+ ep->mpa_skb = skb;
+ l2t_send(ep->com.tdev, skb, ep->l2t);
+ return 0;
+}
+
+static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
+{
+ int mpalen;
+ struct tx_data_wr *req;
+ struct mpa_message *mpa;
+ int len;
+ struct sk_buff *skb;
+
+ PDBG("%s ep %p plen %d\n", __FUNCTION__, ep, plen);
+
+ mpalen = sizeof(*mpa) + plen;
+
+ skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __FUNCTION__);
+ return -ENOMEM;
+ }
+ skb->priority = CPL_PRIORITY_DATA;
+ skb_reserve(skb, sizeof(*req));
+ mpa = (struct mpa_message *) skb_put(skb, mpalen);
+ memset(mpa, 0, sizeof(*mpa));
+ memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
+ mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
+ (markers_enabled ? MPA_MARKERS : 0);
+ mpa->revision = mpa_rev;
+ mpa->private_data_size = htons(plen);
+ if (plen)
+ memcpy(mpa->private_data, pdata, plen);
+
+ /*
+ * Reference the mpa skb. This ensures the data area
+ * will remain in memory until the hw acks the tx.
+ * Function tx_ack() will deref it.
+ */
+ skb_get(skb);
+ set_arp_failure_handler(skb, arp_failure_discard);
+ skb->h.raw = skb->data;
+ len = skb->len;
+ req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
+ req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+ req->wr_lo = htonl(V_WR_TID(ep->hwtid));
+ req->len = htonl(len);
+ req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
+ V_TX_SNDBUF(snd_win>>15));
+ req->flags = htonl(F_TX_MORE | F_TX_IMM_ACK | F_TX_INIT);
+ req->sndseq = htonl(ep->snd_seq);
+ ep->mpa_skb = skb;
+ state_set(&ep->com, MPA_REP_SENT);
+ l2t_send(ep->com.tdev, skb, ep->l2t);
+ return 0;
+}
+
+static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+ struct cpl_act_establish *req = cplhdr(skb);
+ unsigned int tid = GET_TID(req);
+
+ PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, tid);
+
+ dst_confirm(ep->dst);
+
+ /* setup the hwtid for this connection */
+ ep->hwtid = tid;
+ cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
+
+ ep->snd_seq = ntohl(req->snd_isn);
+
+ set_emss(ep, ntohs(req->tcp_opt));
+
+ /* dealloc the atid */
+ cxgb3_free_atid(ep->com.tdev, ep->atid);
+
+ /* start MPA negotiation */
+ send_mpa_req(ep, skb);
+
+ return 0;
+}
+
+static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
+{
+ PDBG("%s ep %p\n", __FILE__, ep);
+ state_set(&ep->com, ABORTING);
+ send_abort(ep, skb, gfp);
+}
+
+static void close_complete_upcall(struct iwch_ep *ep)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CLOSE;
+ if (ep->com.cm_id) {
+ PDBG("close complete delivered ep %p cm_id %p tid %d\n",
+ ep, ep->com.cm_id, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ }
+}
+
+static void peer_close_upcall(struct iwch_ep *ep)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_DISCONNECT;
+ if (ep->com.cm_id) {
+ PDBG("peer close delivered ep %p cm_id %p tid %d\n",
+ ep, ep->com.cm_id, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ }
+}
+
+static void peer_abort_upcall(struct iwch_ep *ep)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CLOSE;
+ event.status = -ECONNRESET;
+ if (ep->com.cm_id) {
+ PDBG("abort delivered ep %p cm_id %p tid %d\n", ep,
+ ep->com.cm_id, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ }
+}
+
+static void connect_reply_upcall(struct iwch_ep *ep, int status)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p status %d\n", __FUNCTION__, ep, status);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CONNECT_REPLY;
+ event.status = status;
+ event.local_addr = ep->com.local_addr;
+ event.remote_addr = ep->com.remote_addr;
+
+ if ((status == 0) || (status == -ECONNREFUSED)) {
+ event.private_data_len = ep->plen;
+ event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+ }
+ if (ep->com.cm_id) {
+ PDBG("%s ep %p tid %d status %d\n", __FUNCTION__, ep,
+ ep->hwtid, status);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ }
+ if (status < 0) {
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ }
+}
+
+static void connect_request_upcall(struct iwch_ep *ep)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CONNECT_REQUEST;
+ event.local_addr = ep->com.local_addr;
+ event.remote_addr = ep->com.remote_addr;
+ event.private_data_len = ep->plen;
+ event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+ event.provider_data = ep;
+ if (state_read(&ep->parent_ep->com) != DEAD)
+ ep->parent_ep->com.cm_id->event_handler(
+ ep->parent_ep->com.cm_id,
+ &event);
+ put_ep(&ep->parent_ep->com);
+ ep->parent_ep = NULL;
+}
+
+static void established_upcall(struct iwch_ep *ep)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_ESTABLISHED;
+ if (ep->com.cm_id) {
+ PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ }
+}
+
+static int update_rx_credits(struct iwch_ep *ep, u32 credits)
+{
+ struct cpl_rx_data_ack *req;
+ struct sk_buff *skb;
+
+ PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits);
+ skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
+ return 0;
+ }
+
+ req = (struct cpl_rx_data_ack *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
+ req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
+ skb->priority = CPL_PRIORITY_ACK;
+ ep->com.tdev->send(ep->com.tdev, skb);
+ return credits;
+}
+
+static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
+{
+ struct mpa_message *mpa;
+ u16 plen;
+ struct iwch_qp_attributes attrs;
+ enum iwch_qp_attr_mask mask;
+ int err;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+
+ /*
+ * Stop mpa timer. If it expired, then the state has
+ * changed and we bail since ep_timeout already aborted
+ * the connection.
+ */
+ stop_ep_timer(ep);
+ if (state_read(&ep->com) != MPA_REQ_SENT)
+ return;
+
+ /*
+ * If we get more than the supported amount of private data
+ * then we must fail this connection.
+ */
+ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ /*
+ * copy the new data into our accumulation buffer.
+ */
+ memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len);
+ ep->mpa_pkt_len += skb->len;
+
+ /*
+ * if we don't even have the mpa message, then bail.
+ */
+ if (ep->mpa_pkt_len < sizeof(*mpa))
+ return;
+ mpa = (struct mpa_message *) ep->mpa_pkt;
+
+ /* Validate MPA header. */
+ if (mpa->revision != mpa_rev) {
+ err = -EPROTO;
+ goto err;
+ }
+ if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
+ err = -EPROTO;
+ goto err;
+ }
+
+ plen = ntohs(mpa->private_data_size);
+
+ /*
+ * Fail if there's too much private data.
+ */
+ if (plen > MPA_MAX_PRIVATE_DATA) {
+ err = -EPROTO;
+ goto err;
+ }
+
+ /*
+ * If plen does not account for pkt size
+ */
+ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
+ err = -EPROTO;
+ goto err;
+ }
+
+ ep->plen = (u8) plen;
+
+ /*
+ * If we don't have all the pdata yet, then bail.
+ * We'll continue process when more data arrives.
+ */
+ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
+ return;
+
+ if (mpa->flags & MPA_REJECT) {
+ err = -ECONNREFUSED;
+ goto err;
+ }
+
+ /*
+ * If we get here we have accumulated the entire mpa
+ * start reply message including private data. And
+ * the MPA header is valid.
+ */
+ state_set(&ep->com, FPDU_MODE);
+ ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
+ ep->mpa_attr.recv_marker_enabled = markers_enabled;
+ ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
+ ep->mpa_attr.version = mpa_rev;
+ PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
+ "xmit_marker_enabled=%d, version=%d\n", __FUNCTION__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+
+ attrs.mpa_attr = ep->mpa_attr;
+ attrs.max_ird = ep->ird;
+ attrs.max_ord = ep->ord;
+ attrs.llp_stream_handle = ep;
+ attrs.next_state = IWCH_QP_STATE_RTS;
+
+ mask = IWCH_QP_ATTR_NEXT_STATE |
+ IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
+ IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
+
+ /* bind QP and TID with INIT_WR */
+ err = iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, mask, &attrs, 1);
+ if (!err)
+ goto out;
+err:
+ abort_connection(ep, skb, GFP_KERNEL);
+out:
+ connect_reply_upcall(ep, err);
+ return;
+}
+
+static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
+{
+ struct mpa_message *mpa;
+ u16 plen;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+
+ /*
+ * Stop mpa timer. If it expired, then the state has
+ * changed and we bail since ep_timeout already aborted
+ * the connection.
+ */
+ stop_ep_timer(ep);
+ if (state_read(&ep->com) != MPA_REQ_WAIT)
+ return;
+
+ /*
+ * If we get more than the supported amount of private data
+ * then we must fail this connection.
+ */
+ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
+ abort_connection(ep, skb, GFP_KERNEL);
+ return;
+ }
+
+ PDBG("%s enter (%s line %u)\n", __FUNCTION__, __FILE__, __LINE__);
+
+ /*
+ * Copy the new data into our accumulation buffer.
+ */
+ memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len);
+ ep->mpa_pkt_len += skb->len;
+
+ /*
+ * If we don't even have the mpa message, then bail.
+ * We'll continue process when more data arrives.
+ */
+ if (ep->mpa_pkt_len < sizeof(*mpa))
+ return;
+ PDBG("%s enter (%s line %u)\n", __FUNCTION__, __FILE__, __LINE__);
+ mpa = (struct mpa_message *) ep->mpa_pkt;
+
+ /*
+ * Validate MPA Header.
+ */
+ if (mpa->revision != mpa_rev) {
+ abort_connection(ep, skb, GFP_KERNEL);
+ return;
+ }
+
+ if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
+ abort_connection(ep, skb, GFP_KERNEL);
+ return;
+ }
+
+ plen = ntohs(mpa->private_data_size);
+
+ /*
+ * Fail if there's too much private data.
+ */
+ if (plen > MPA_MAX_PRIVATE_DATA) {
+ abort_connection(ep, skb, GFP_KERNEL);
+ return;
+ }
+
+ /*
+ * If plen does not account for pkt size
+ */
+ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
+ abort_connection(ep, skb, GFP_KERNEL);
+ return;
+ }
+ ep->plen = (u8) plen;
+
+ /*
+ * If we don't have all the pdata yet, then bail.
+ */
+ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
+ return;
+
+ /*
+ * If we get here we have accumulated the entire mpa
+ * start reply message including private data.
+ */
+ ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
+ ep->mpa_attr.recv_marker_enabled = markers_enabled;
+ ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
+ ep->mpa_attr.version = mpa_rev;
+ PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
+ "xmit_marker_enabled=%d, version=%d\n", __FUNCTION__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+
+ state_set(&ep->com, MPA_REQ_RCVD);
+
+ /* drive upcall */
+ connect_request_upcall(ep);
+ return;
+}
+
+static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+ struct cpl_rx_data *hdr = cplhdr(skb);
+ unsigned int dlen = ntohs(hdr->len);
+
+ PDBG("%s ep %p dlen %u\n", __FUNCTION__, ep, dlen);
+
+ skb_pull(skb, sizeof(*hdr));
+ skb_trim(skb, dlen);
+
+ switch (state_read(&ep->com)) {
+ case MPA_REQ_SENT:
+ process_mpa_reply(ep, skb);
+ break;
+ case MPA_REQ_WAIT:
+ process_mpa_request(ep, skb);
+ break;
+ case MPA_REP_SENT:
+ break;
+ default:
+ printk(KERN_ERR MOD "%s Unexpected streaming data."
+ " ep %p state %d tid %d\n",
+ __FUNCTION__, ep, state_read(&ep->com), ep->hwtid);
+
+ /*
+ * The ep will timeout and inform the ULP of the failure.
+ * See ep_timeout().
+ */
+ break;
+ }
+
+ /* update RX credits */
+ update_rx_credits(ep, dlen);
+
+ return CPL_RET_BUF_DONE;
+}
+
+/*
+ * Upcall from the adapter indicating data has been transmitted.
+ * For us its just the single MPA request or reply. We can now free
+ * the skb holding the mpa message.
+ */
+static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+ struct cpl_wr_ack *hdr = cplhdr(skb);
+ unsigned int credits = ntohs(hdr->credits);
+ enum iwch_qp_attr_mask mask;
+
+ PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits);
+
+ if (credits == 0)
+ return CPL_RET_BUF_DONE;
+ BUG_ON(credits != 1);
+ BUG_ON(ep->mpa_skb == NULL);
+ kfree_skb(ep->mpa_skb);
+ ep->mpa_skb = NULL;
+ dst_confirm(ep->dst);
+ if (state_read(&ep->com) == MPA_REP_SENT) {
+ struct iwch_qp_attributes attrs;
+
+ /* bind QP to EP and move to RTS */
+ attrs.mpa_attr = ep->mpa_attr;
+ attrs.max_ird = ep->ord;
+ attrs.max_ord = ep->ord;
+ attrs.llp_stream_handle = ep;
+ attrs.next_state = IWCH_QP_STATE_RTS;
+
+ /* bind QP and TID with INIT_WR */
+ mask = IWCH_QP_ATTR_NEXT_STATE |
+ IWCH_QP_ATTR_LLP_STREAM_HANDLE |
+ IWCH_QP_ATTR_MPA_ATTR |
+ IWCH_QP_ATTR_MAX_IRD |
+ IWCH_QP_ATTR_MAX_ORD;
+
+ ep->com.rpl_err = iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, mask, &attrs, 1);
+
+ if (!ep->com.rpl_err) {
+ state_set(&ep->com, FPDU_MODE);
+ established_upcall(ep);
+ }
+
+ ep->com.rpl_done = 1;
+ PDBG("waking up ep %p\n", ep);
+ wake_up(&ep->com.waitq);
+ }
+ return CPL_RET_BUF_DONE;
+}
+
+static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+
+ close_complete_upcall(ep);
+ state_set(&ep->com, DEAD);
+ release_ep_resources(ep);
+ return CPL_RET_BUF_DONE;
+}
+
+static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+ struct cpl_act_open_rpl *rpl = cplhdr(skb);
+
+ PDBG("%s ep %p status %u errno %d\n", __FUNCTION__, ep, rpl->status,
+ status2errno(rpl->status));
+ connect_reply_upcall(ep, status2errno(rpl->status));
+ state_set(&ep->com, DEAD);
+ if (ep->com.tdev->type == T3B)
+ release_tid(ep->com.tdev, GET_TID(rpl), NULL);
+ cxgb3_free_atid(ep->com.tdev, ep->atid);
+ dst_release(ep->dst);
+ l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+ put_ep(&ep->com);
+ return CPL_RET_BUF_DONE;
+}
+
+static int listen_start(struct iwch_listen_ep *ep)
+{
+ struct sk_buff *skb;
+ struct cpl_pass_open_req *req;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "t3c_listen_start failed to alloc skb!\n");
+ return -ENOMEM;
+ }
+
+ req = (struct cpl_pass_open_req *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid));
+ req->local_port = ep->com.local_addr.sin_port;
+ req->local_ip = ep->com.local_addr.sin_addr.s_addr;
+ req->peer_port = 0;
+ req->peer_ip = 0;
+ req->peer_netmask = 0;
+ req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
+ req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10));
+ req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
+
+ skb->priority = 1;
+ ep->com.tdev->send(ep->com.tdev, skb);
+ return 0;
+}
+
+static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_listen_ep *ep = ctx;
+ struct cpl_pass_open_rpl *rpl = cplhdr(skb);
+
+ PDBG("%s ep %p status %d error %d\n", __FUNCTION__, ep,
+ rpl->status, status2errno(rpl->status));
+ ep->com.rpl_err = status2errno(rpl->status);
+ ep->com.rpl_done = 1;
+ wake_up(&ep->com.waitq);
+
+ return CPL_RET_BUF_DONE;
+}
+
+static int listen_stop(struct iwch_listen_ep *ep)
+{
+ struct sk_buff *skb;
+ struct cpl_close_listserv_req *req;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - failed to alloc skb\n", __FUNCTION__);
+ return -ENOMEM;
+ }
+ req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
+ skb->priority = 1;
+ ep->com.tdev->send(ep->com.tdev, skb);
+ return 0;
+}
+
+static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
+ void *ctx)
+{
+ struct iwch_listen_ep *ep = ctx;
+ struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ ep->com.rpl_err = status2errno(rpl->status);
+ ep->com.rpl_done = 1;
+ wake_up(&ep->com.waitq);
+ return CPL_RET_BUF_DONE;
+}
+
+static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
+{
+ struct cpl_pass_accept_rpl *rpl;
+ unsigned int mtu_idx;
+ u32 opt0h, opt0l, opt2;
+ int wscale;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ BUG_ON(skb_cloned(skb));
+ skb_trim(skb, sizeof(*rpl));
+ skb_get(skb);
+ mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
+ wscale = compute_wscale(rcv_win);
+ opt0h = V_NAGLE(0) |
+ V_NO_CONG(nocong) |
+ V_KEEP_ALIVE(1) |
+ F_TCAM_BYPASS |
+ V_WND_SCALE(wscale) |
+ V_MSS_IDX(mtu_idx) |
+ V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
+ opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
+ opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor);
+
+ rpl = cplhdr(skb);
+ rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid));
+ rpl->peer_ip = peer_ip;
+ rpl->opt0h = htonl(opt0h);
+ rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT);
+ rpl->opt2 = htonl(opt2);
+ rpl->rsvd = rpl->opt2; /* workaround for HW bug */
+ skb->priority = CPL_PRIORITY_SETUP;
+ l2t_send(ep->com.tdev, skb, ep->l2t);
+
+ return;
+}
+
+static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
+ struct sk_buff *skb)
+{
+ PDBG("%s t3cdev %p tid %u peer_ip %x\n", __FUNCTION__, tdev, hwtid,
+ peer_ip);
+ BUG_ON(skb_cloned(skb));
+ skb_trim(skb, sizeof(struct cpl_tid_release));
+ skb_get(skb);
+
+ if (tdev->type == T3B)
+ release_tid(tdev, hwtid, skb);
+ else {
+ struct cpl_pass_accept_rpl *rpl;
+
+ rpl = cplhdr(skb);
+ skb->priority = CPL_PRIORITY_SETUP;
+ rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+ hwtid));
+ rpl->peer_ip = peer_ip;
+ rpl->opt0h = htonl(F_TCAM_BYPASS);
+ rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
+ rpl->opt2 = 0;
+ rpl->rsvd = rpl->opt2;
+ tdev->send(tdev, skb);
+ }
+}
+
+static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *child_ep, *parent_ep = ctx;
+ struct cpl_pass_accept_req *req = cplhdr(skb);
+ unsigned int hwtid = GET_TID(req);
+ struct dst_entry *dst;
+ struct l2t_entry *l2t;
+ struct rtable *rt;
+ struct iff_mac tim;
+
+ PDBG("%s parent ep %p tid %u\n", __FUNCTION__, parent_ep, hwtid);
+
+ if (state_read(&parent_ep->com) != LISTEN) {
+ printk(KERN_ERR "%s - listening ep not in LISTEN\n",
+ __FUNCTION__);
+ goto reject;
+ }
+
+ /*
+ * Find the netdev for this connection request.
+ */
+ tim.mac_addr = req->dst_mac;
+ tim.vlan_tag = ntohs(req->vlan_tag);
+ if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
+ printk(KERN_ERR
+ "%s bad dst mac %02x %02x %02x %02x %02x %02x\n",
+ __FUNCTION__,
+ req->dst_mac[0],
+ req->dst_mac[1],
+ req->dst_mac[2],
+ req->dst_mac[3],
+ req->dst_mac[4],
+ req->dst_mac[5]);
+ goto reject;
+ }
+
+ /* Find output route */
+ rt = find_route(tdev,
+ req->local_ip,
+ req->peer_ip,
+ req->local_port,
+ req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
+ if (!rt) {
+ printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
+ __FUNCTION__);
+ goto reject;
+ }
+ dst = &rt->u.dst;
+ l2t = t3_l2t_get(tdev, dst->neighbour, dst->neighbour->dev);
+ if (!l2t) {
+ printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
+ __FUNCTION__);
+ dst_release(dst);
+ goto reject;
+ }
+ child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
+ if (!child_ep) {
+ printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
+ __FUNCTION__);
+ l2t_release(L2DATA(tdev), l2t);
+ dst_release(dst);
+ goto reject;
+ }
+ state_set(&child_ep->com, CONNECTING);
+ child_ep->com.tdev = tdev;
+ child_ep->com.cm_id = NULL;
+ child_ep->com.local_addr.sin_family = PF_INET;
+ child_ep->com.local_addr.sin_port = req->local_port;
+ child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
+ child_ep->com.remote_addr.sin_family = PF_INET;
+ child_ep->com.remote_addr.sin_port = req->peer_port;
+ child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
+ get_ep(&parent_ep->com);
+ child_ep->parent_ep = parent_ep;
+ child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid));
+ child_ep->l2t = l2t;
+ child_ep->dst = dst;
+ child_ep->hwtid = hwtid;
+ init_timer(&child_ep->timer);
+ cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid);
+ accept_cr(child_ep, req->peer_ip, skb);
+ goto out;
+reject:
+ reject_cr(tdev, hwtid, req->peer_ip, skb);
+out:
+ return CPL_RET_BUF_DONE;
+}
+
+static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+ struct cpl_pass_establish *req = cplhdr(skb);
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ ep->snd_seq = ntohl(req->snd_isn);
+
+ set_emss(ep, ntohs(req->tcp_opt));
+
+ dst_confirm(ep->dst);
+ state_set(&ep->com, MPA_REQ_WAIT);
+ start_ep_timer(ep);
+
+ return CPL_RET_BUF_DONE;
+}
+
+static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+ struct iwch_qp_attributes attrs;
+ unsigned long flags;
+ int disconnect = 1;
+ int release = 0;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ dst_confirm(ep->dst);
+
+ spin_lock_irqsave(&ep->com.lock, flags);
+ switch (ep->com.state) {
+ case MPA_REQ_WAIT:
+ __state_set(&ep->com, CLOSING);
+ break;
+ case MPA_REQ_SENT:
+ __state_set(&ep->com, CLOSING);
+ connect_reply_upcall(ep, -ECONNRESET);
+ break;
+ case MPA_REQ_RCVD:
+
+ /*
+ * We're gonna mark this puppy DEAD, but keep
+ * the reference on it until the ULP accepts or
+ * rejects the CR.
+ */
+ __state_set(&ep->com, CLOSING);
+ get_ep(&ep->com);
+ break;
+ case MPA_REP_SENT:
+ __state_set(&ep->com, CLOSING);
+ ep->com.rpl_done = 1;
+ ep->com.rpl_err = -ECONNRESET;
+ PDBG("waking up ep %p\n", ep);
+ wake_up(&ep->com.waitq);
+ break;
+ case FPDU_MODE:
+ __state_set(&ep->com, CLOSING);
+ attrs.next_state = IWCH_QP_STATE_CLOSING;
+ iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
+ peer_close_upcall(ep);
+ break;
+ case ABORTING:
+ disconnect = 0;
+ break;
+ case CLOSING:
+ start_ep_timer(ep);
+ __state_set(&ep->com, MORIBUND);
+ disconnect = 0;
+ break;
+ case MORIBUND:
+ stop_ep_timer(ep);
+ if (ep->com.cm_id && ep->com.qp) {
+ attrs.next_state = IWCH_QP_STATE_IDLE;
+ iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
+ }
+ close_complete_upcall(ep);
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ disconnect = 0;
+ break;
+ case DEAD:
+ disconnect = 0;
+ break;
+ default:
+ BUG_ON(1);
+ }
+ spin_unlock_irqrestore(&ep->com.lock, flags);
+ if (disconnect)
+ iwch_ep_disconnect(ep, 0, GFP_KERNEL);
+ if (release)
+ release_ep_resources(ep);
+ return CPL_RET_BUF_DONE;
+}
+
+/*
+ * Returns whether an ABORT_REQ_RSS message is a negative advice.
+ */
+static inline int is_neg_adv_abort(unsigned int status)
+{
+ return status == CPL_ERR_RTX_NEG_ADVICE ||
+ status == CPL_ERR_PERSIST_NEG_ADVICE;
+}
+
+static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct cpl_abort_req_rss *req = cplhdr(skb);
+ struct iwch_ep *ep = ctx;
+ struct cpl_abort_rpl *rpl;
+ struct sk_buff *rpl_skb;
+ struct iwch_qp_attributes attrs;
+ int ret;
+ int state;
+
+ if (is_neg_adv_abort(req->status)) {
+ PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep,
+ ep->hwtid);
+ t3_l2t_send_event(ep->com.tdev, ep->l2t);
+ return CPL_RET_BUF_DONE;
+ }
+
+ state = state_read(&ep->com);
+ PDBG("%s ep %p state %u\n", __FUNCTION__, ep, state);
+ switch (state) {
+ case CONNECTING:
+ break;
+ case MPA_REQ_WAIT:
+ break;
+ case MPA_REQ_SENT:
+ connect_reply_upcall(ep, -ECONNRESET);
+ break;
+ case MPA_REP_SENT:
+ ep->com.rpl_done = 1;
+ ep->com.rpl_err = -ECONNRESET;
+ PDBG("waking up ep %p\n", ep);
+ wake_up(&ep->com.waitq);
+ break;
+ case MPA_REQ_RCVD:
+
+ /*
+ * We're gonna mark this puppy DEAD, but keep
+ * the reference on it until the ULP accepts or
+ * rejects the CR.
+ */
+ get_ep(&ep->com);
+ break;
+ case MORIBUND:
+ stop_ep_timer(ep);
+ case FPDU_MODE:
+ case CLOSING:
+ if (ep->com.cm_id && ep->com.qp) {
+ attrs.next_state = IWCH_QP_STATE_ERROR;
+ ret = iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ if (ret)
+ printk(KERN_ERR MOD
+ "%s - qp <- error failed!\n",
+ __FUNCTION__);
+ }
+ peer_abort_upcall(ep);
+ break;
+ case ABORTING:
+ break;
+ case DEAD:
+ PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __FUNCTION__);
+ return CPL_RET_BUF_DONE;
+ default:
+ BUG_ON(1);
+ break;
+ }
+ dst_confirm(ep->dst);
+
+ rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
+ if (!rpl_skb) {
+ printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
+ __FUNCTION__);
+ dst_release(ep->dst);
+ l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+ put_ep(&ep->com);
+ return CPL_RET_BUF_DONE;
+ }
+ rpl_skb->priority = CPL_PRIORITY_DATA;
+ rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
+ rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
+ rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+ OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
+ rpl->cmd = CPL_ABORT_NO_RST;
+ ep->com.tdev->send(ep->com.tdev, rpl_skb);
+ if (state != ABORTING) {
+ state_set(&ep->com, DEAD);
+ release_ep_resources(ep);
+ }
+ return CPL_RET_BUF_DONE;
+}
+
+static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+ struct iwch_qp_attributes attrs;
+ unsigned long flags;
+ int release = 0;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ BUG_ON(!ep);
+
+ /* The cm_id may be null if we failed to connect */
+ spin_lock_irqsave(&ep->com.lock, flags);
+ switch (ep->com.state) {
+ case CLOSING:
+ start_ep_timer(ep);
+ __state_set(&ep->com, MORIBUND);
+ break;
+ case MORIBUND:
+ stop_ep_timer(ep);
+ if ((ep->com.cm_id) && (ep->com.qp)) {
+ attrs.next_state = IWCH_QP_STATE_IDLE;
+ iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp,
+ IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ }
+ close_complete_upcall(ep);
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ break;
+ case DEAD:
+ default:
+ BUG_ON(1);
+ break;
+ }
+ spin_unlock_irqrestore(&ep->com.lock, flags);
+ if (release)
+ release_ep_resources(ep);
+ return CPL_RET_BUF_DONE;
+}
+
+/*
+ * T3A does 3 things when a TERM is received:
+ * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
+ * 2) generate an async event on the QP with the TERMINATE opcode
+ * 3) post a TERMINATE opcde cqe into the associated CQ.
+ *
+ * For (1), we save the message in the qp for later consumer consumption.
+ * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
+ * For (3), we toss the CQE in cxio_poll_cq().
+ *
+ * terminate() handles case (1)...
+ */
+static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ skb_pull(skb, sizeof(struct cpl_rdma_terminate));
+ PDBG("%s saving %d bytes of term msg\n", __FUNCTION__, skb->len);
+ memcpy(ep->com.qp->attr.terminate_buffer, skb->data, skb->len);
+ ep->com.qp->attr.terminate_msg_len = skb->len;
+ ep->com.qp->attr.is_terminate_local = 0;
+ return CPL_RET_BUF_DONE;
+}
+
+static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct cpl_rdma_ec_status *rep = cplhdr(skb);
+ struct iwch_ep *ep = ctx;
+
+ PDBG("%s ep %p tid %u status %d\n", __FUNCTION__, ep, ep->hwtid,
+ rep->status);
+ if (rep->status) {
+ struct iwch_qp_attributes attrs;
+
+ printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n",
+ __FUNCTION__, ep->hwtid);
+ attrs.next_state = IWCH_QP_STATE_ERROR;
+ iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ abort_connection(ep, NULL, GFP_KERNEL);
+ }
+ return CPL_RET_BUF_DONE;
+}
+
+static void ep_timeout(unsigned long arg)
+{
+ struct iwch_ep *ep = (struct iwch_ep *)arg;
+ struct iwch_qp_attributes attrs;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ep->com.lock, flags);
+ PDBG("%s ep %p tid %u state %d\n", __FUNCTION__, ep, ep->hwtid,
+ ep->com.state);
+ switch (ep->com.state) {
+ case MPA_REQ_SENT:
+ connect_reply_upcall(ep, -ETIMEDOUT);
+ break;
+ case MPA_REQ_WAIT:
+ break;
+ case MORIBUND:
+ if (ep->com.cm_id && ep->com.qp) {
+ attrs.next_state = IWCH_QP_STATE_ERROR;
+ iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ }
+ break;
+ default:
+ BUG();
+ }
+ __state_set(&ep->com, CLOSING);
+ spin_unlock_irqrestore(&ep->com.lock, flags);
+ abort_connection(ep, NULL, GFP_ATOMIC);
+ put_ep(&ep->com);
+}
+
+int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+ int err;
+ struct iwch_ep *ep = to_ep(cm_id);
+ PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid);
+
+ if (state_read(&ep->com) == DEAD) {
+ put_ep(&ep->com);
+ return -ECONNRESET;
+ }
+ BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+ state_set(&ep->com, CLOSING);
+ if (mpa_rev == 0)
+ abort_connection(ep, NULL, GFP_KERNEL);
+ else {
+ err = send_mpa_reject(ep, pdata, pdata_len);
+ err = send_halfclose(ep, GFP_KERNEL);
+ }
+ return 0;
+}
+
+int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ int err;
+ struct iwch_qp_attributes attrs;
+ enum iwch_qp_attr_mask mask;
+ struct iwch_ep *ep = to_ep(cm_id);
+ struct iwch_dev *h = to_iwch_dev(cm_id->device);
+ struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
+
+ PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid);
+ if (state_read(&ep->com) == DEAD) {
+ put_ep(&ep->com);
+ return -ECONNRESET;
+ }
+
+ BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+ BUG_ON(!qp);
+
+ if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
+ (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
+ abort_connection(ep, NULL, GFP_KERNEL);
+ return -EINVAL;
+ }
+
+ cm_id->add_ref(cm_id);
+ ep->com.cm_id = cm_id;
+ ep->com.qp = qp;
+
+ ep->com.rpl_done = 0;
+ ep->com.rpl_err = 0;
+ ep->ird = conn_param->ird;
+ ep->ord = conn_param->ord;
+ PDBG("%s %d ird %d ord %d\n", __FUNCTION__, __LINE__, ep->ird, ep->ord);
+ get_ep(&ep->com);
+ err = send_mpa_reply(ep, conn_param->private_data,
+ conn_param->private_data_len);
+ if (err) {
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ cm_id->rem_ref(cm_id);
+ abort_connection(ep, NULL, GFP_KERNEL);
+ put_ep(&ep->com);
+ return err;
+ }
+
+ /* bind QP to EP and move to RTS */
+ attrs.mpa_attr = ep->mpa_attr;
+ attrs.max_ird = ep->ord;
+ attrs.max_ord = ep->ord;
+ attrs.llp_stream_handle = ep;
+ attrs.next_state = IWCH_QP_STATE_RTS;
+
+ /* bind QP and TID with INIT_WR */
+ mask = IWCH_QP_ATTR_NEXT_STATE |
+ IWCH_QP_ATTR_LLP_STREAM_HANDLE |
+ IWCH_QP_ATTR_MPA_ATTR |
+ IWCH_QP_ATTR_MAX_IRD |
+ IWCH_QP_ATTR_MAX_ORD;
+
+ err = iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, mask, &attrs, 1);
+
+ if (err) {
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ cm_id->rem_ref(cm_id);
+ abort_connection(ep, NULL, GFP_KERNEL);
+ } else {
+ state_set(&ep->com, FPDU_MODE);
+ established_upcall(ep);
+ }
+ put_ep(&ep->com);
+ return err;
+}
+
+int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ int err = 0;
+ struct iwch_dev *h = to_iwch_dev(cm_id->device);
+ struct iwch_ep *ep;
+ struct rtable *rt;
+
+ ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
+ if (!ep) {
+ printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __FUNCTION__);
+ err = -ENOMEM;
+ goto out;
+ }
+ init_timer(&ep->timer);
+ ep->plen = conn_param->private_data_len;
+ if (ep->plen)
+ memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
+ conn_param->private_data, ep->plen);
+ ep->ird = conn_param->ird;
+ ep->ord = conn_param->ord;
+ ep->com.tdev = h->rdev.t3cdev_p;
+
+ cm_id->add_ref(cm_id);
+ ep->com.cm_id = cm_id;
+ ep->com.qp = get_qhp(h, conn_param->qpn);
+ BUG_ON(!ep->com.qp);
+ PDBG("%s qpn 0x%x qp %p cm_id %p\n", __FUNCTION__, conn_param->qpn,
+ ep->com.qp, cm_id);
+
+ /*
+ * Allocate an active TID to initiate a TCP connection.
+ */
+ ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
+ if (ep->atid == -1) {
+ printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __FUNCTION__);
+ err = -ENOMEM;
+ goto fail2;
+ }
+
+ /* find a route */
+ rt = find_route(h->rdev.t3cdev_p,
+ cm_id->local_addr.sin_addr.s_addr,
+ cm_id->remote_addr.sin_addr.s_addr,
+ cm_id->local_addr.sin_port,
+ cm_id->remote_addr.sin_port, IPTOS_LOWDELAY);
+ if (!rt) {
+ printk(KERN_ERR MOD "%s - cannot find route.\n", __FUNCTION__);
+ err = -EHOSTUNREACH;
+ goto fail3;
+ }
+ ep->dst = &rt->u.dst;
+
+ /* get a l2t entry */
+ ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst->neighbour,
+ ep->dst->neighbour->dev);
+ if (!ep->l2t) {
+ printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __FUNCTION__);
+ err = -ENOMEM;
+ goto fail4;
+ }
+
+ state_set(&ep->com, CONNECTING);
+ ep->tos = IPTOS_LOWDELAY;
+ ep->com.local_addr = cm_id->local_addr;
+ ep->com.remote_addr = cm_id->remote_addr;
+
+ /* send connect request to rnic */
+ err = send_connect(ep);
+ if (!err)
+ goto out;
+
+ l2t_release(L2DATA(h->rdev.t3cdev_p), ep->l2t);
+fail4:
+ dst_release(ep->dst);
+fail3:
+ cxgb3_free_atid(ep->com.tdev, ep->atid);
+fail2:
+ put_ep(&ep->com);
+out:
+ return err;
+}
+
+int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+ int err = 0;
+ struct iwch_dev *h = to_iwch_dev(cm_id->device);
+ struct iwch_listen_ep *ep;
+
+
+ might_sleep();
+
+ ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
+ if (!ep) {
+ printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __FUNCTION__);
+ err = -ENOMEM;
+ goto fail1;
+ }
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+ ep->com.tdev = h->rdev.t3cdev_p;
+ cm_id->add_ref(cm_id);
+ ep->com.cm_id = cm_id;
+ ep->backlog = backlog;
+ ep->com.local_addr = cm_id->local_addr;
+
+ /*
+ * Allocate a server TID.
+ */
+ ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
+ if (ep->stid == -1) {
+ printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __FUNCTION__);
+ err = -ENOMEM;
+ goto fail2;
+ }
+
+ state_set(&ep->com, LISTEN);
+ err = listen_start(ep);
+ if (err)
+ goto fail3;
+
+ /* wait for pass_open_rpl */
+ wait_event(ep->com.waitq, ep->com.rpl_done);
+ err = ep->com.rpl_err;
+ if (!err) {
+ cm_id->provider_data = ep;
+ goto out;
+ }
+fail3:
+ cxgb3_free_stid(ep->com.tdev, ep->stid);
+fail2:
+ put_ep(&ep->com);
+fail1:
+out:
+ return err;
+}
+
+int iwch_destroy_listen(struct iw_cm_id *cm_id)
+{
+ int err;
+ struct iwch_listen_ep *ep = to_listen_ep(cm_id);
+
+ PDBG("%s ep %p\n", __FUNCTION__, ep);
+
+ might_sleep();
+ state_set(&ep->com, DEAD);
+ ep->com.rpl_done = 0;
+ ep->com.rpl_err = 0;
+ err = listen_stop(ep);
+ wait_event(ep->com.waitq, ep->com.rpl_done);
+ cxgb3_free_stid(ep->com.tdev, ep->stid);
+ err = ep->com.rpl_err;
+ cm_id->rem_ref(cm_id);
+ put_ep(&ep->com);
+ return err;
+}
+
+int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
+{
+ int ret=0;
+ unsigned long flags;
+ int close = 0;
+
+ spin_lock_irqsave(&ep->com.lock, flags);
+
+ PDBG("%s ep %p state %s, abrupt %d\n", __FUNCTION__, ep,
+ states[ep->com.state], abrupt);
+
+ if (ep->com.state == DEAD) {
+ PDBG("%s already dead ep %p\n", __FUNCTION__, ep);
+ goto out;
+ }
+
+ if (abrupt) {
+ if (ep->com.state != ABORTING) {
+ ep->com.state = ABORTING;
+ close = 1;
+ }
+ goto out;
+ }
+
+ switch (ep->com.state) {
+ case MPA_REQ_WAIT:
+ case MPA_REQ_SENT:
+ case MPA_REQ_RCVD:
+ case MPA_REP_SENT:
+ case FPDU_MODE:
+ ep->com.state = CLOSING;
+ close = 1;
+ break;
+ case CLOSING:
+ start_ep_timer(ep);
+ ep->com.state = MORIBUND;
+ close = 1;
+ break;
+ case MORIBUND:
+ break;
+ default:
+ BUG();
+ break;
+ }
+out:
+ spin_unlock_irqrestore(&ep->com.lock, flags);
+ if (close) {
+ if (abrupt)
+ ret = send_abort(ep, NULL, gfp);
+ else
+ ret = send_halfclose(ep, gfp);
+ }
+ return ret;
+}
+
+int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
+ struct l2t_entry *l2t)
+{
+ struct iwch_ep *ep = ctx;
+
+ if (ep->dst != old)
+ return 0;
+
+ PDBG("%s ep %p redirect to dst %p l2t %p\n", __FUNCTION__, ep, new,
+ l2t);
+ dst_hold(new);
+ l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+ ep->l2t = l2t;
+ dst_release(old);
+ ep->dst = new;
+ return 1;
+}
+
+/*
+ * All the CM events are handled on a work queue to have a safe context.
+ */
+static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep_common *epc = ctx;
+
+ get_ep(epc);
+
+ /*
+ * Save ctx and tdev in the skb->cb area.
+ */
+ *((void **) skb->cb) = ctx;
+ *((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev;
+
+ /*
+ * Queue the skb and schedule the worker thread.
+ */
+ skb_queue_tail(&rxq, skb);
+ queue_work(workq, &skb_work);
+ return 0;
+}
+
+int __init iwch_cm_init(void)
+{
+ skb_queue_head_init(&rxq);
+
+ workq = create_singlethread_workqueue("iw_cxgb3");
+ if (!workq)
+ return -ENOMEM;
+
+ /*
+ * All upcalls from the T3 Core go to sched() to
+ * schedule the processing on a work queue.
+ */
+ t3c_handlers[CPL_ACT_ESTABLISH] = sched;
+ t3c_handlers[CPL_ACT_OPEN_RPL] = sched;
+ t3c_handlers[CPL_RX_DATA] = sched;
+ t3c_handlers[CPL_TX_DMA_ACK] = sched;
+ t3c_handlers[CPL_ABORT_RPL_RSS] = sched;
+ t3c_handlers[CPL_ABORT_RPL] = sched;
+ t3c_handlers[CPL_PASS_OPEN_RPL] = sched;
+ t3c_handlers[CPL_CLOSE_LISTSRV_RPL] = sched;
+ t3c_handlers[CPL_PASS_ACCEPT_REQ] = sched;
+ t3c_handlers[CPL_PASS_ESTABLISH] = sched;
+ t3c_handlers[CPL_PEER_CLOSE] = sched;
+ t3c_handlers[CPL_CLOSE_CON_RPL] = sched;
+ t3c_handlers[CPL_ABORT_REQ_RSS] = sched;
+ t3c_handlers[CPL_RDMA_TERMINATE] = sched;
+ t3c_handlers[CPL_RDMA_EC_STATUS] = sched;
+
+ /*
+ * These are the real handlers that are called from a
+ * work queue.
+ */
+ work_handlers[CPL_ACT_ESTABLISH] = act_establish;
+ work_handlers[CPL_ACT_OPEN_RPL] = act_open_rpl;
+ work_handlers[CPL_RX_DATA] = rx_data;
+ work_handlers[CPL_TX_DMA_ACK] = tx_ack;
+ work_handlers[CPL_ABORT_RPL_RSS] = abort_rpl;
+ work_handlers[CPL_ABORT_RPL] = abort_rpl;
+ work_handlers[CPL_PASS_OPEN_RPL] = pass_open_rpl;
+ work_handlers[CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl;
+ work_handlers[CPL_PASS_ACCEPT_REQ] = pass_accept_req;
+ work_handlers[CPL_PASS_ESTABLISH] = pass_establish;
+ work_handlers[CPL_PEER_CLOSE] = peer_close;
+ work_handlers[CPL_ABORT_REQ_RSS] = peer_abort;
+ work_handlers[CPL_CLOSE_CON_RPL] = close_con_rpl;
+ work_handlers[CPL_RDMA_TERMINATE] = terminate;
+ work_handlers[CPL_RDMA_EC_STATUS] = ec_status;
+ return 0;
+}
+
+void __exit iwch_cm_term(void)
+{
+ flush_workqueue(workq);
+ destroy_workqueue(workq);
+}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
new file mode 100644
index 000000000000..0c6f281bd4a0
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _IWCH_CM_H_
+#define _IWCH_CM_H_
+
+#include <linux/inet.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <linux/kref.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/iw_cm.h>
+
+#include "cxgb3_offload.h"
+#include "iwch_provider.h"
+
+#define MPA_KEY_REQ "MPA ID Req Frame"
+#define MPA_KEY_REP "MPA ID Rep Frame"
+
+#define MPA_MAX_PRIVATE_DATA 256
+#define MPA_REV 0 /* XXX - amso1100 uses rev 0 ! */
+#define MPA_REJECT 0x20
+#define MPA_CRC 0x40
+#define MPA_MARKERS 0x80
+#define MPA_FLAGS_MASK 0xE0
+
+#define put_ep(ep) { \
+ PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __FUNCTION__, __LINE__, \
+ ep, atomic_read(&((ep)->kref.refcount))); \
+ kref_put(&((ep)->kref), __free_ep); \
+}
+
+#define get_ep(ep) { \
+ PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __FUNCTION__, __LINE__, \
+ ep, atomic_read(&((ep)->kref.refcount))); \
+ kref_get(&((ep)->kref)); \
+}
+
+struct mpa_message {
+ u8 key[16];
+ u8 flags;
+ u8 revision;
+ __be16 private_data_size;
+ u8 private_data[0];
+};
+
+struct terminate_message {
+ u8 layer_etype;
+ u8 ecode;
+ __be16 hdrct_rsvd;
+ u8 len_hdrs[0];
+};
+
+#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28)
+
+enum iwch_layers_types {
+ LAYER_RDMAP = 0x00,
+ LAYER_DDP = 0x10,
+ LAYER_MPA = 0x20,
+ RDMAP_LOCAL_CATA = 0x00,
+ RDMAP_REMOTE_PROT = 0x01,
+ RDMAP_REMOTE_OP = 0x02,
+ DDP_LOCAL_CATA = 0x00,
+ DDP_TAGGED_ERR = 0x01,
+ DDP_UNTAGGED_ERR = 0x02,
+ DDP_LLP = 0x03
+};
+
+enum iwch_rdma_ecodes {
+ RDMAP_INV_STAG = 0x00,
+ RDMAP_BASE_BOUNDS = 0x01,
+ RDMAP_ACC_VIOL = 0x02,
+ RDMAP_STAG_NOT_ASSOC = 0x03,
+ RDMAP_TO_WRAP = 0x04,
+ RDMAP_INV_VERS = 0x05,
+ RDMAP_INV_OPCODE = 0x06,
+ RDMAP_STREAM_CATA = 0x07,
+ RDMAP_GLOBAL_CATA = 0x08,
+ RDMAP_CANT_INV_STAG = 0x09,
+ RDMAP_UNSPECIFIED = 0xff
+};
+
+enum iwch_ddp_ecodes {
+ DDPT_INV_STAG = 0x00,
+ DDPT_BASE_BOUNDS = 0x01,
+ DDPT_STAG_NOT_ASSOC = 0x02,
+ DDPT_TO_WRAP = 0x03,
+ DDPT_INV_VERS = 0x04,
+ DDPU_INV_QN = 0x01,
+ DDPU_INV_MSN_NOBUF = 0x02,
+ DDPU_INV_MSN_RANGE = 0x03,
+ DDPU_INV_MO = 0x04,
+ DDPU_MSG_TOOBIG = 0x05,
+ DDPU_INV_VERS = 0x06
+};
+
+enum iwch_mpa_ecodes {
+ MPA_CRC_ERR = 0x02,
+ MPA_MARKER_ERR = 0x03
+};
+
+enum iwch_ep_state {
+ IDLE = 0,
+ LISTEN,
+ CONNECTING,
+ MPA_REQ_WAIT,
+ MPA_REQ_SENT,
+ MPA_REQ_RCVD,
+ MPA_REP_SENT,
+ FPDU_MODE,
+ ABORTING,
+ CLOSING,
+ MORIBUND,
+ DEAD,
+};
+
+struct iwch_ep_common {
+ struct iw_cm_id *cm_id;
+ struct iwch_qp *qp;
+ struct t3cdev *tdev;
+ enum iwch_ep_state state;
+ struct kref kref;
+ spinlock_t lock;
+ struct sockaddr_in local_addr;
+ struct sockaddr_in remote_addr;
+ wait_queue_head_t waitq;
+ int rpl_done;
+ int rpl_err;
+};
+
+struct iwch_listen_ep {
+ struct iwch_ep_common com;
+ unsigned int stid;
+ int backlog;
+};
+
+struct iwch_ep {
+ struct iwch_ep_common com;
+ struct iwch_ep *parent_ep;
+ struct timer_list timer;
+ unsigned int atid;
+ u32 hwtid;
+ u32 snd_seq;
+ struct l2t_entry *l2t;
+ struct dst_entry *dst;
+ struct sk_buff *mpa_skb;
+ struct iwch_mpa_attributes mpa_attr;
+ unsigned int mpa_pkt_len;
+ u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA];
+ u8 tos;
+ u16 emss;
+ u16 plen;
+ u32 ird;
+ u32 ord;
+};
+
+static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id)
+{
+ return cm_id->provider_data;
+}
+
+static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id)
+{
+ return cm_id->provider_data;
+}
+
+static inline int compute_wscale(int win)
+{
+ int wscale = 0;
+
+ while (wscale < 14 && (65535<<wscale) < win)
+ wscale++;
+ return wscale;
+}
+
+/* CM prototypes */
+
+int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int iwch_create_listen(struct iw_cm_id *cm_id, int backlog);
+int iwch_destroy_listen(struct iw_cm_id *cm_id);
+int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
+int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp);
+int iwch_quiesce_tid(struct iwch_ep *ep);
+int iwch_resume_tid(struct iwch_ep *ep);
+void __free_ep(struct kref *kref);
+void iwch_rearp(struct iwch_ep *ep);
+int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t);
+
+int __init iwch_cm_init(void);
+void __exit iwch_cm_term(void);
+
+#endif /* _IWCH_CM_H_ */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
new file mode 100644
index 000000000000..d7624c170ee7
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "iwch_provider.h"
+#include "iwch.h"
+
+/*
+ * Get one cq entry from cxio and map it to openib.
+ *
+ * Returns:
+ * 0 EMPTY;
+ * 1 cqe returned
+ * -EAGAIN caller must try again
+ * any other -errno fatal error
+ */
+static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
+ struct ib_wc *wc)
+{
+ struct iwch_qp *qhp = NULL;
+ struct t3_cqe cqe, *rd_cqe;
+ struct t3_wq *wq;
+ u32 credit = 0;
+ u8 cqe_flushed;
+ u64 cookie;
+ int ret = 1;
+
+ rd_cqe = cxio_next_cqe(&chp->cq);
+
+ if (!rd_cqe)
+ return 0;
+
+ qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
+ if (!qhp)
+ wq = NULL;
+ else {
+ spin_lock(&qhp->lock);
+ wq = &(qhp->wq);
+ }
+ ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
+ &credit);
+ if (t3a_device(chp->rhp) && credit) {
+ PDBG("%s updating %d cq credits on id %d\n", __FUNCTION__,
+ credit, chp->cq.cqid);
+ cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit);
+ }
+
+ if (ret) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ ret = 1;
+
+ wc->wr_id = cookie;
+ wc->qp = &qhp->ibqp;
+ wc->vendor_err = CQE_STATUS(cqe);
+
+ PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x "
+ "lo 0x%x cookie 0x%llx\n", __FUNCTION__,
+ CQE_QPID(cqe), CQE_TYPE(cqe),
+ CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
+ CQE_WRID_LOW(cqe), (unsigned long long) cookie);
+
+ if (CQE_TYPE(cqe) == 0) {
+ if (!CQE_STATUS(cqe))
+ wc->byte_len = CQE_LEN(cqe);
+ else
+ wc->byte_len = 0;
+ wc->opcode = IB_WC_RECV;
+ } else {
+ switch (CQE_OPCODE(cqe)) {
+ case T3_RDMA_WRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case T3_READ_REQ:
+ wc->opcode = IB_WC_RDMA_READ;
+ wc->byte_len = CQE_LEN(cqe);
+ break;
+ case T3_SEND:
+ case T3_SEND_WITH_SE:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case T3_BIND_MW:
+ wc->opcode = IB_WC_BIND_MW;
+ break;
+
+ /* these aren't supported yet */
+ case T3_SEND_WITH_INV:
+ case T3_SEND_WITH_SE_INV:
+ case T3_LOCAL_INV:
+ case T3_FAST_REGISTER:
+ default:
+ printk(KERN_ERR MOD "Unexpected opcode %d "
+ "in the CQE received for QPID=0x%0x\n",
+ CQE_OPCODE(cqe), CQE_QPID(cqe));
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (cqe_flushed)
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ else {
+
+ switch (CQE_STATUS(cqe)) {
+ case TPT_ERR_SUCCESS:
+ wc->status = IB_WC_SUCCESS;
+ break;
+ case TPT_ERR_STAG:
+ wc->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case TPT_ERR_PDID:
+ wc->status = IB_WC_LOC_PROT_ERR;
+ break;
+ case TPT_ERR_QPID:
+ case TPT_ERR_ACCESS:
+ wc->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case TPT_ERR_WRAP:
+ wc->status = IB_WC_GENERAL_ERR;
+ break;
+ case TPT_ERR_BOUND:
+ wc->status = IB_WC_LOC_LEN_ERR;
+ break;
+ case TPT_ERR_INVALIDATE_SHARED_MR:
+ case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+ wc->status = IB_WC_MW_BIND_ERR;
+ break;
+ case TPT_ERR_CRC:
+ case TPT_ERR_MARKER:
+ case TPT_ERR_PDU_LEN_ERR:
+ case TPT_ERR_OUT_OF_RQE:
+ case TPT_ERR_DDP_VERSION:
+ case TPT_ERR_RDMA_VERSION:
+ case TPT_ERR_DDP_QUEUE_NUM:
+ case TPT_ERR_MSN:
+ case TPT_ERR_TBIT:
+ case TPT_ERR_MO:
+ case TPT_ERR_MSN_RANGE:
+ case TPT_ERR_IRD_OVERFLOW:
+ case TPT_ERR_OPCODE:
+ wc->status = IB_WC_FATAL_ERR;
+ break;
+ case TPT_ERR_SWFLUSH:
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ break;
+ default:
+ printk(KERN_ERR MOD "Unexpected cqe_status 0x%x for "
+ "QPID=0x%0x\n", CQE_STATUS(cqe), CQE_QPID(cqe));
+ ret = -EINVAL;
+ }
+ }
+out:
+ if (wq)
+ spin_unlock(&qhp->lock);
+ return ret;
+}
+
+int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct iwch_dev *rhp;
+ struct iwch_cq *chp;
+ unsigned long flags;
+ int npolled;
+ int err = 0;
+
+ chp = to_iwch_cq(ibcq);
+ rhp = chp->rhp;
+
+ spin_lock_irqsave(&chp->lock, flags);
+ for (npolled = 0; npolled < num_entries; ++npolled) {
+#ifdef DEBUG
+ int i=0;
+#endif
+
+ /*
+ * Because T3 can post CQEs that are _not_ associated
+ * with a WR, we might have to poll again after removing
+ * one of these.
+ */
+ do {
+ err = iwch_poll_cq_one(rhp, chp, wc + npolled);
+#ifdef DEBUG
+ BUG_ON(++i > 1000);
+#endif
+ } while (err == -EAGAIN);
+ if (err <= 0)
+ break;
+ }
+ spin_unlock_irqrestore(&chp->lock, flags);
+
+ if (err < 0)
+ return err;
+ else {
+ return npolled;
+ }
+}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
new file mode 100644
index 000000000000..54362afbf72f
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/slab.h>
+#include <linux/mman.h>
+#include <net/sock.h>
+#include "iwch_provider.h"
+#include "iwch.h"
+#include "iwch_cm.h"
+#include "cxio_hal.h"
+#include "cxio_wr.h"
+
+static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
+ struct respQ_msg_t *rsp_msg,
+ enum ib_event_type ib_event,
+ int send_term)
+{
+ struct ib_event event;
+ struct iwch_qp_attributes attrs;
+ struct iwch_qp *qhp;
+
+ printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x "
+ "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
+ CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
+ CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
+ CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+
+ spin_lock(&rnicp->lock);
+ qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
+
+ if (!qhp) {
+ printk(KERN_ERR "%s unaffiliated error 0x%x qpid 0x%x\n",
+ __FUNCTION__, CQE_STATUS(rsp_msg->cqe),
+ CQE_QPID(rsp_msg->cqe));
+ spin_unlock(&rnicp->lock);
+ return;
+ }
+
+ if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
+ (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
+ PDBG("%s AE received after RTS - "
+ "qp state %d qpid 0x%x status 0x%x\n", __FUNCTION__,
+ qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe));
+ spin_unlock(&rnicp->lock);
+ return;
+ }
+
+ atomic_inc(&qhp->refcnt);
+ spin_unlock(&rnicp->lock);
+
+ event.event = ib_event;
+ event.device = chp->ibcq.device;
+ if (ib_event == IB_EVENT_CQ_ERR)
+ event.element.cq = &chp->ibcq;
+ else
+ event.element.qp = &qhp->ibqp;
+
+ if (qhp->ibqp.event_handler)
+ (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
+
+ if (qhp->attr.state == IWCH_QP_STATE_RTS) {
+ attrs.next_state = IWCH_QP_STATE_TERMINATE;
+ iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ if (send_term)
+ iwch_post_terminate(qhp, rsp_msg);
+ }
+
+ if (atomic_dec_and_test(&qhp->refcnt))
+ wake_up(&qhp->wait);
+}
+
+void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
+{
+ struct iwch_dev *rnicp;
+ struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
+ struct iwch_cq *chp;
+ struct iwch_qp *qhp;
+ u32 cqid = RSPQ_CQID(rsp_msg);
+
+ rnicp = (struct iwch_dev *) rdev_p->ulp;
+ spin_lock(&rnicp->lock);
+ chp = get_chp(rnicp, cqid);
+ qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
+ if (!chp || !qhp) {
+ printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d "
+ "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x \n",
+ cqid, CQE_QPID(rsp_msg->cqe),
+ CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
+ CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
+ CQE_WRID_LOW(rsp_msg->cqe));
+ spin_unlock(&rnicp->lock);
+ goto out;
+ }
+ iwch_qp_add_ref(&qhp->ibqp);
+ atomic_inc(&chp->refcnt);
+ spin_unlock(&rnicp->lock);
+
+ /*
+ * 1) completion of our sending a TERMINATE.
+ * 2) incoming TERMINATE message.
+ */
+ if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) &&
+ (CQE_STATUS(rsp_msg->cqe) == 0)) {
+ if (SQ_TYPE(rsp_msg->cqe)) {
+ PDBG("%s QPID 0x%x ep %p disconnecting\n",
+ __FUNCTION__, qhp->wq.qpid, qhp->ep);
+ iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
+ } else {
+ PDBG("%s post REQ_ERR AE QPID 0x%x\n", __FUNCTION__,
+ qhp->wq.qpid);
+ post_qp_event(rnicp, chp, rsp_msg,
+ IB_EVENT_QP_REQ_ERR, 0);
+ iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
+ }
+ goto done;
+ }
+
+ /* Bad incoming Read request */
+ if (SQ_TYPE(rsp_msg->cqe) &&
+ (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) {
+ post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
+ goto done;
+ }
+
+ /* Bad incoming write */
+ if (RQ_TYPE(rsp_msg->cqe) &&
+ (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) {
+ post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
+ goto done;
+ }
+
+ switch (CQE_STATUS(rsp_msg->cqe)) {
+
+ /* Completion Events */
+ case TPT_ERR_SUCCESS:
+
+ /*
+ * Confirm the destination entry if this is a RECV completion.
+ */
+ if (qhp->ep && SQ_TYPE(rsp_msg->cqe))
+ dst_confirm(qhp->ep->dst);
+ (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+ break;
+
+ case TPT_ERR_STAG:
+ case TPT_ERR_PDID:
+ case TPT_ERR_QPID:
+ case TPT_ERR_ACCESS:
+ case TPT_ERR_WRAP:
+ case TPT_ERR_BOUND:
+ case TPT_ERR_INVALIDATE_SHARED_MR:
+ case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+ printk(KERN_ERR "%s - CQE Err qpid 0x%x opcode %d status 0x%x "
+ "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
+ CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
+ CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
+ CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+ (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+ post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1);
+ break;
+
+ /* Device Fatal Errors */
+ case TPT_ERR_ECC:
+ case TPT_ERR_ECC_PSTAG:
+ case TPT_ERR_INTERNAL_ERR:
+ post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1);
+ break;
+
+ /* QP Fatal Errors */
+ case TPT_ERR_OUT_OF_RQE:
+ case TPT_ERR_PBL_ADDR_BOUND:
+ case TPT_ERR_CRC:
+ case TPT_ERR_MARKER:
+ case TPT_ERR_PDU_LEN_ERR:
+ case TPT_ERR_DDP_VERSION:
+ case TPT_ERR_RDMA_VERSION:
+ case TPT_ERR_OPCODE:
+ case TPT_ERR_DDP_QUEUE_NUM:
+ case TPT_ERR_MSN:
+ case TPT_ERR_TBIT:
+ case TPT_ERR_MO:
+ case TPT_ERR_MSN_GAP:
+ case TPT_ERR_MSN_RANGE:
+ case TPT_ERR_RQE_ADDR_BOUND:
+ case TPT_ERR_IRD_OVERFLOW:
+ post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
+ break;
+
+ default:
+ printk(KERN_ERR MOD "Unknown T3 status 0x%x QPID 0x%x\n",
+ CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid);
+ post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
+ break;
+ }
+done:
+ if (atomic_dec_and_test(&chp->refcnt))
+ wake_up(&chp->wait);
+ iwch_qp_rem_ref(&qhp->ibqp);
+out:
+ dev_kfree_skb_irq(skb);
+}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
new file mode 100644
index 000000000000..a6c2c4ba29e6
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <asm/byteorder.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+
+#include "cxio_hal.h"
+#include "iwch.h"
+#include "iwch_provider.h"
+
+int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ struct iwch_mr *mhp,
+ int shift,
+ __be64 *page_list)
+{
+ u32 stag;
+ u32 mmid;
+
+
+ if (cxio_register_phys_mem(&rhp->rdev,
+ &stag, mhp->attr.pdid,
+ mhp->attr.perms,
+ mhp->attr.zbva,
+ mhp->attr.va_fbo,
+ mhp->attr.len,
+ shift-12,
+ page_list,
+ &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
+ return -ENOMEM;
+ mhp->attr.state = 1;
+ mhp->attr.stag = stag;
+ mmid = stag >> 8;
+ mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+ PDBG("%s mmid 0x%x mhp %p\n", __FUNCTION__, mmid, mhp);
+ return 0;
+}
+
+int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ struct iwch_mr *mhp,
+ int shift,
+ __be64 *page_list,
+ int npages)
+{
+ u32 stag;
+ u32 mmid;
+
+
+ /* We could support this... */
+ if (npages > mhp->attr.pbl_size)
+ return -ENOMEM;
+
+ stag = mhp->attr.stag;
+ if (cxio_reregister_phys_mem(&rhp->rdev,
+ &stag, mhp->attr.pdid,
+ mhp->attr.perms,
+ mhp->attr.zbva,
+ mhp->attr.va_fbo,
+ mhp->attr.len,
+ shift-12,
+ page_list,
+ &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
+ return -ENOMEM;
+ mhp->attr.state = 1;
+ mhp->attr.stag = stag;
+ mmid = stag >> 8;
+ mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+ PDBG("%s mmid 0x%x mhp %p\n", __FUNCTION__, mmid, mhp);
+ return 0;
+}
+
+int build_phys_page_list(struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ u64 *iova_start,
+ u64 *total_size,
+ int *npages,
+ int *shift,
+ __be64 **page_list)
+{
+ u64 mask;
+ int i, j, n;
+
+ mask = 0;
+ *total_size = 0;
+ for (i = 0; i < num_phys_buf; ++i) {
+ if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
+ return -EINVAL;
+ if (i != 0 && i != num_phys_buf - 1 &&
+ (buffer_list[i].size & ~PAGE_MASK))
+ return -EINVAL;
+ *total_size += buffer_list[i].size;
+ if (i > 0)
+ mask |= buffer_list[i].addr;
+ }
+
+ if (*total_size > 0xFFFFFFFFULL)
+ return -ENOMEM;
+
+ /* Find largest page shift we can use to cover buffers */
+ for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
+ if (num_phys_buf > 1) {
+ if ((1ULL << *shift) & mask)
+ break;
+ } else
+ if (1ULL << *shift >=
+ buffer_list[0].size +
+ (buffer_list[0].addr & ((1ULL << *shift) - 1)))
+ break;
+
+ buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
+ buffer_list[0].addr &= ~0ull << *shift;
+
+ *npages = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ *npages += (buffer_list[i].size +
+ (1ULL << *shift) - 1) >> *shift;
+
+ if (!*npages)
+ return -EINVAL;
+
+ *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
+ if (!*page_list)
+ return -ENOMEM;
+
+ n = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ for (j = 0;
+ j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
+ ++j)
+ (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
+ ((u64) j << *shift));
+
+ PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
+ __FUNCTION__, (unsigned long long) *iova_start,
+ (unsigned long long) mask, *shift, (unsigned long long) *total_size,
+ *npages);
+
+ return 0;
+
+}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
new file mode 100644
index 000000000000..2aef122f9955
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -0,0 +1,1202 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/ethtool.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "cxio_hal.h"
+#include "iwch.h"
+#include "iwch_provider.h"
+#include "iwch_cm.h"
+#include "iwch_user.h"
+
+static int iwch_modify_port(struct ib_device *ibdev,
+ u8 port, int port_modify_mask,
+ struct ib_port_modify *props)
+{
+ return -ENOSYS;
+}
+
+static struct ib_ah *iwch_ah_create(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr)
+{
+ return ERR_PTR(-ENOSYS);
+}
+
+static int iwch_ah_destroy(struct ib_ah *ah)
+{
+ return -ENOSYS;
+}
+
+static int iwch_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ return -ENOSYS;
+}
+
+static int iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ return -ENOSYS;
+}
+
+static int iwch_process_mad(struct ib_device *ibdev,
+ int mad_flags,
+ u8 port_num,
+ struct ib_wc *in_wc,
+ struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ return -ENOSYS;
+}
+
+static int iwch_dealloc_ucontext(struct ib_ucontext *context)
+{
+ struct iwch_dev *rhp = to_iwch_dev(context->device);
+ struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
+ struct iwch_mm_entry *mm, *tmp;
+
+ PDBG("%s context %p\n", __FUNCTION__, context);
+ list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
+ kfree(mm);
+ cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
+ kfree(ucontext);
+ return 0;
+}
+
+static struct ib_ucontext *iwch_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct iwch_ucontext *context;
+ struct iwch_dev *rhp = to_iwch_dev(ibdev);
+
+ PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+ cxio_init_ucontext(&rhp->rdev, &context->uctx);
+ INIT_LIST_HEAD(&context->mmaps);
+ spin_lock_init(&context->mmap_lock);
+ return &context->ibucontext;
+}
+
+static int iwch_destroy_cq(struct ib_cq *ib_cq)
+{
+ struct iwch_cq *chp;
+
+ PDBG("%s ib_cq %p\n", __FUNCTION__, ib_cq);
+ chp = to_iwch_cq(ib_cq);
+
+ remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
+ atomic_dec(&chp->refcnt);
+ wait_event(chp->wait, !atomic_read(&chp->refcnt));
+
+ cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
+ kfree(chp);
+ return 0;
+}
+
+static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries,
+ struct ib_ucontext *ib_context,
+ struct ib_udata *udata)
+{
+ struct iwch_dev *rhp;
+ struct iwch_cq *chp;
+ struct iwch_create_cq_resp uresp;
+ struct iwch_create_cq_req ureq;
+ struct iwch_ucontext *ucontext = NULL;
+
+ PDBG("%s ib_dev %p entries %d\n", __FUNCTION__, ibdev, entries);
+ rhp = to_iwch_dev(ibdev);
+ chp = kzalloc(sizeof(*chp), GFP_KERNEL);
+ if (!chp)
+ return ERR_PTR(-ENOMEM);
+
+ if (ib_context) {
+ ucontext = to_iwch_ucontext(ib_context);
+ if (!t3a_device(rhp)) {
+ if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) {
+ kfree(chp);
+ return ERR_PTR(-EFAULT);
+ }
+ chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr;
+ }
+ }
+
+ if (t3a_device(rhp)) {
+
+ /*
+ * T3A: Add some fluff to handle extra CQEs inserted
+ * for various errors.
+ * Additional CQE possibilities:
+ * TERMINATE,
+ * incoming RDMA WRITE Failures
+ * incoming RDMA READ REQUEST FAILUREs
+ * NOTE: We cannot ensure the CQ won't overflow.
+ */
+ entries += 16;
+ }
+ entries = roundup_pow_of_two(entries);
+ chp->cq.size_log2 = ilog2(entries);
+
+ if (cxio_create_cq(&rhp->rdev, &chp->cq)) {
+ kfree(chp);
+ return ERR_PTR(-ENOMEM);
+ }
+ chp->rhp = rhp;
+ chp->ibcq.cqe = (1 << chp->cq.size_log2) - 1;
+ spin_lock_init(&chp->lock);
+ atomic_set(&chp->refcnt, 1);
+ init_waitqueue_head(&chp->wait);
+ insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
+
+ if (ucontext) {
+ struct iwch_mm_entry *mm;
+
+ mm = kmalloc(sizeof *mm, GFP_KERNEL);
+ if (!mm) {
+ iwch_destroy_cq(&chp->ibcq);
+ return ERR_PTR(-ENOMEM);
+ }
+ uresp.cqid = chp->cq.cqid;
+ uresp.size_log2 = chp->cq.size_log2;
+ spin_lock(&ucontext->mmap_lock);
+ uresp.key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ spin_unlock(&ucontext->mmap_lock);
+ if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+ kfree(mm);
+ iwch_destroy_cq(&chp->ibcq);
+ return ERR_PTR(-EFAULT);
+ }
+ mm->key = uresp.key;
+ mm->addr = virt_to_phys(chp->cq.queue);
+ mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
+ sizeof (struct t3_cqe));
+ insert_mmap(ucontext, mm);
+ }
+ PDBG("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n",
+ chp->cq.cqid, chp, (1 << chp->cq.size_log2),
+ (unsigned long long) chp->cq.dma_addr);
+ return &chp->ibcq;
+}
+
+static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
+{
+#ifdef notyet
+ struct iwch_cq *chp = to_iwch_cq(cq);
+ struct t3_cq oldcq, newcq;
+ int ret;
+
+ PDBG("%s ib_cq %p cqe %d\n", __FUNCTION__, cq, cqe);
+
+ /* We don't downsize... */
+ if (cqe <= cq->cqe)
+ return 0;
+
+ /* create new t3_cq with new size */
+ cqe = roundup_pow_of_two(cqe+1);
+ newcq.size_log2 = ilog2(cqe);
+
+ /* Dont allow resize to less than the current wce count */
+ if (cqe < Q_COUNT(chp->cq.rptr, chp->cq.wptr)) {
+ return -ENOMEM;
+ }
+
+ /* Quiesce all QPs using this CQ */
+ ret = iwch_quiesce_qps(chp);
+ if (ret) {
+ return ret;
+ }
+
+ ret = cxio_create_cq(&chp->rhp->rdev, &newcq);
+ if (ret) {
+ return ret;
+ }
+
+ /* copy CQEs */
+ memcpy(newcq.queue, chp->cq.queue, (1 << chp->cq.size_log2) *
+ sizeof(struct t3_cqe));
+
+ /* old iwch_qp gets new t3_cq but keeps old cqid */
+ oldcq = chp->cq;
+ chp->cq = newcq;
+ chp->cq.cqid = oldcq.cqid;
+
+ /* resize new t3_cq to update the HW context */
+ ret = cxio_resize_cq(&chp->rhp->rdev, &chp->cq);
+ if (ret) {
+ chp->cq = oldcq;
+ return ret;
+ }
+ chp->ibcq.cqe = (1<<chp->cq.size_log2) - 1;
+
+ /* destroy old t3_cq */
+ oldcq.cqid = newcq.cqid;
+ ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq);
+ if (ret) {
+ printk(KERN_ERR MOD "%s - cxio_destroy_cq failed %d\n",
+ __FUNCTION__, ret);
+ }
+
+ /* add user hooks here */
+
+ /* resume qps */
+ ret = iwch_resume_qps(chp);
+ return ret;
+#else
+ return -ENOSYS;
+#endif
+}
+
+static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+{
+ struct iwch_dev *rhp;
+ struct iwch_cq *chp;
+ enum t3_cq_opcode cq_op;
+ int err;
+ unsigned long flag;
+ u32 rptr;
+
+ chp = to_iwch_cq(ibcq);
+ rhp = chp->rhp;
+ if (notify == IB_CQ_SOLICITED)
+ cq_op = CQ_ARM_SE;
+ else
+ cq_op = CQ_ARM_AN;
+ if (chp->user_rptr_addr) {
+ if (get_user(rptr, chp->user_rptr_addr))
+ return -EFAULT;
+ spin_lock_irqsave(&chp->lock, flag);
+ chp->cq.rptr = rptr;
+ } else
+ spin_lock_irqsave(&chp->lock, flag);
+ PDBG("%s rptr 0x%x\n", __FUNCTION__, chp->cq.rptr);
+ err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
+ spin_unlock_irqrestore(&chp->lock, flag);
+ if (err)
+ printk(KERN_ERR MOD "Error %d rearming CQID 0x%x\n", err,
+ chp->cq.cqid);
+ return err;
+}
+
+static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ int len = vma->vm_end - vma->vm_start;
+ u32 key = vma->vm_pgoff << PAGE_SHIFT;
+ struct cxio_rdev *rdev_p;
+ int ret = 0;
+ struct iwch_mm_entry *mm;
+ struct iwch_ucontext *ucontext;
+
+ PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __FUNCTION__, vma->vm_pgoff,
+ key, len);
+
+ if (vma->vm_start & (PAGE_SIZE-1)) {
+ return -EINVAL;
+ }
+
+ rdev_p = &(to_iwch_dev(context->device)->rdev);
+ ucontext = to_iwch_ucontext(context);
+
+ mm = remove_mmap(ucontext, key, len);
+ if (!mm)
+ return -EINVAL;
+ kfree(mm);
+
+ if ((mm->addr >= rdev_p->rnic_info.udbell_physbase) &&
+ (mm->addr < (rdev_p->rnic_info.udbell_physbase +
+ rdev_p->rnic_info.udbell_len))) {
+
+ /*
+ * Map T3 DB register.
+ */
+ if (vma->vm_flags & VM_READ) {
+ return -EPERM;
+ }
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
+ vma->vm_flags &= ~VM_MAYREAD;
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ mm->addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ } else {
+
+ /*
+ * Map WQ or CQ contig dma memory...
+ */
+ ret = remap_pfn_range(vma, vma->vm_start,
+ mm->addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ }
+
+ return ret;
+}
+
+static int iwch_deallocate_pd(struct ib_pd *pd)
+{
+ struct iwch_dev *rhp;
+ struct iwch_pd *php;
+
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+ PDBG("%s ibpd %p pdid 0x%x\n", __FUNCTION__, pd, php->pdid);
+ cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
+ kfree(php);
+ return 0;
+}
+
+static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct iwch_pd *php;
+ u32 pdid;
+ struct iwch_dev *rhp;
+
+ PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
+ rhp = (struct iwch_dev *) ibdev;
+ pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
+ if (!pdid)
+ return ERR_PTR(-EINVAL);
+ php = kzalloc(sizeof(*php), GFP_KERNEL);
+ if (!php) {
+ cxio_hal_put_pdid(rhp->rdev.rscp, pdid);
+ return ERR_PTR(-ENOMEM);
+ }
+ php->pdid = pdid;
+ php->rhp = rhp;
+ if (context) {
+ if (ib_copy_to_udata(udata, &php->pdid, sizeof (__u32))) {
+ iwch_deallocate_pd(&php->ibpd);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+ PDBG("%s pdid 0x%0x ptr 0x%p\n", __FUNCTION__, pdid, php);
+ return &php->ibpd;
+}
+
+static int iwch_dereg_mr(struct ib_mr *ib_mr)
+{
+ struct iwch_dev *rhp;
+ struct iwch_mr *mhp;
+ u32 mmid;
+
+ PDBG("%s ib_mr %p\n", __FUNCTION__, ib_mr);
+ /* There can be no memory windows */
+ if (atomic_read(&ib_mr->usecnt))
+ return -EINVAL;
+
+ mhp = to_iwch_mr(ib_mr);
+ rhp = mhp->rhp;
+ mmid = mhp->attr.stag >> 8;
+ cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+ remove_handle(rhp, &rhp->mmidr, mmid);
+ if (mhp->kva)
+ kfree((void *) (unsigned long) mhp->kva);
+ PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp);
+ kfree(mhp);
+ return 0;
+}
+
+static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ int acc,
+ u64 *iova_start)
+{
+ __be64 *page_list;
+ int shift;
+ u64 total_size;
+ int npages;
+ struct iwch_dev *rhp;
+ struct iwch_pd *php;
+ struct iwch_mr *mhp;
+ int ret;
+
+ PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+
+ acc = iwch_convert_access(acc);
+
+
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+
+ /* First check that we have enough alignment */
+ if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (num_phys_buf > 1 &&
+ ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
+ &total_size, &npages, &shift, &page_list);
+ if (ret)
+ goto err;
+
+ mhp->rhp = rhp;
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.zbva = 0;
+
+ /* NOTE: TPT perms are backwards from BIND WR perms! */
+ mhp->attr.perms = (acc & 0x1) << 3;
+ mhp->attr.perms |= (acc & 0x2) << 1;
+ mhp->attr.perms |= (acc & 0x4) >> 1;
+ mhp->attr.perms |= (acc & 0x8) >> 3;
+
+ mhp->attr.va_fbo = *iova_start;
+ mhp->attr.page_size = shift - 12;
+
+ mhp->attr.len = (u32) total_size;
+ mhp->attr.pbl_size = npages;
+ ret = iwch_register_mem(rhp, php, mhp, shift, page_list);
+ kfree(page_list);
+ if (ret) {
+ goto err;
+ }
+ return &mhp->ibmr;
+err:
+ kfree(mhp);
+ return ERR_PTR(ret);
+
+}
+
+static int iwch_reregister_phys_mem(struct ib_mr *mr,
+ int mr_rereg_mask,
+ struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ int acc, u64 * iova_start)
+{
+
+ struct iwch_mr mh, *mhp;
+ struct iwch_pd *php;
+ struct iwch_dev *rhp;
+ int new_acc;
+ __be64 *page_list = NULL;
+ int shift = 0;
+ u64 total_size;
+ int npages;
+ int ret;
+
+ PDBG("%s ib_mr %p ib_pd %p\n", __FUNCTION__, mr, pd);
+
+ /* There can be no memory windows */
+ if (atomic_read(&mr->usecnt))
+ return -EINVAL;
+
+ mhp = to_iwch_mr(mr);
+ rhp = mhp->rhp;
+ php = to_iwch_pd(mr->pd);
+
+ /* make sure we are on the same adapter */
+ if (rhp != php->rhp)
+ return -EINVAL;
+
+ new_acc = mhp->attr.perms;
+
+ memcpy(&mh, mhp, sizeof *mhp);
+
+ if (mr_rereg_mask & IB_MR_REREG_PD)
+ php = to_iwch_pd(pd);
+ if (mr_rereg_mask & IB_MR_REREG_ACCESS)
+ mh.attr.perms = iwch_convert_access(acc);
+ if (mr_rereg_mask & IB_MR_REREG_TRANS)
+ ret = build_phys_page_list(buffer_list, num_phys_buf,
+ iova_start,
+ &total_size, &npages,
+ &shift, &page_list);
+
+ ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages);
+ kfree(page_list);
+ if (ret) {
+ return ret;
+ }
+ if (mr_rereg_mask & IB_MR_REREG_PD)
+ mhp->attr.pdid = php->pdid;
+ if (mr_rereg_mask & IB_MR_REREG_ACCESS)
+ mhp->attr.perms = acc;
+ if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+ mhp->attr.zbva = 0;
+ mhp->attr.va_fbo = *iova_start;
+ mhp->attr.page_size = shift - 12;
+ mhp->attr.len = (u32) total_size;
+ mhp->attr.pbl_size = npages;
+ }
+
+ return 0;
+}
+
+
+static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
+ int acc, struct ib_udata *udata)
+{
+ __be64 *pages;
+ int shift, n, len;
+ int i, j, k;
+ int err = 0;
+ struct ib_umem_chunk *chunk;
+ struct iwch_dev *rhp;
+ struct iwch_pd *php;
+ struct iwch_mr *mhp;
+ struct iwch_reg_user_mr_resp uresp;
+
+ PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
+ shift = ffs(region->page_size) - 1;
+
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+
+ n = 0;
+ list_for_each_entry(chunk, &region->chunk_list, list)
+ n += chunk->nents;
+
+ pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
+ if (!pages) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ acc = iwch_convert_access(acc);
+
+ i = n = 0;
+
+ list_for_each_entry(chunk, &region->chunk_list, list)
+ for (j = 0; j < chunk->nmap; ++j) {
+ len = sg_dma_len(&chunk->page_list[j]) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = cpu_to_be64(sg_dma_address(
+ &chunk->page_list[j]) +
+ region->page_size * k);
+ }
+ }
+
+ mhp->rhp = rhp;
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.zbva = 0;
+ mhp->attr.perms = (acc & 0x1) << 3;
+ mhp->attr.perms |= (acc & 0x2) << 1;
+ mhp->attr.perms |= (acc & 0x4) >> 1;
+ mhp->attr.perms |= (acc & 0x8) >> 3;
+ mhp->attr.va_fbo = region->virt_base;
+ mhp->attr.page_size = shift - 12;
+ mhp->attr.len = (u32) region->length;
+ mhp->attr.pbl_size = i;
+ err = iwch_register_mem(rhp, php, mhp, shift, pages);
+ kfree(pages);
+ if (err)
+ goto err;
+
+ if (udata && t3b_device(rhp)) {
+ uresp.pbl_addr = (mhp->attr.pbl_addr -
+ rhp->rdev.rnic_info.pbl_base) >> 3;
+ PDBG("%s user resp pbl_addr 0x%x\n", __FUNCTION__,
+ uresp.pbl_addr);
+
+ if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+ iwch_dereg_mr(&mhp->ibmr);
+ err = -EFAULT;
+ goto err;
+ }
+ }
+
+ return &mhp->ibmr;
+
+err:
+ kfree(mhp);
+ return ERR_PTR(err);
+}
+
+static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct ib_phys_buf bl;
+ u64 kva;
+ struct ib_mr *ibmr;
+
+ PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
+
+ /*
+ * T3 only supports 32 bits of size.
+ */
+ bl.size = 0xffffffff;
+ bl.addr = 0;
+ kva = 0;
+ ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva);
+ return ibmr;
+}
+
+static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
+{
+ struct iwch_dev *rhp;
+ struct iwch_pd *php;
+ struct iwch_mw *mhp;
+ u32 mmid;
+ u32 stag = 0;
+ int ret;
+
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+ ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid);
+ if (ret) {
+ kfree(mhp);
+ return ERR_PTR(ret);
+ }
+ mhp->rhp = rhp;
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.type = TPT_MW;
+ mhp->attr.stag = stag;
+ mmid = (stag) >> 8;
+ insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+ PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __FUNCTION__, mmid, mhp, stag);
+ return &(mhp->ibmw);
+}
+
+static int iwch_dealloc_mw(struct ib_mw *mw)
+{
+ struct iwch_dev *rhp;
+ struct iwch_mw *mhp;
+ u32 mmid;
+
+ mhp = to_iwch_mw(mw);
+ rhp = mhp->rhp;
+ mmid = (mw->rkey) >> 8;
+ cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
+ remove_handle(rhp, &rhp->mmidr, mmid);
+ kfree(mhp);
+ PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __FUNCTION__, mw, mmid, mhp);
+ return 0;
+}
+
+static int iwch_destroy_qp(struct ib_qp *ib_qp)
+{
+ struct iwch_dev *rhp;
+ struct iwch_qp *qhp;
+ struct iwch_qp_attributes attrs;
+ struct iwch_ucontext *ucontext;
+
+ qhp = to_iwch_qp(ib_qp);
+ rhp = qhp->rhp;
+
+ if (qhp->attr.state == IWCH_QP_STATE_RTS) {
+ attrs.next_state = IWCH_QP_STATE_ERROR;
+ iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0);
+ }
+ wait_event(qhp->wait, !qhp->ep);
+
+ remove_handle(rhp, &rhp->qpidr, qhp->wq.qpid);
+
+ atomic_dec(&qhp->refcnt);
+ wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
+
+ ucontext = ib_qp->uobject ? to_iwch_ucontext(ib_qp->uobject->context)
+ : NULL;
+ cxio_destroy_qp(&rhp->rdev, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+
+ PDBG("%s ib_qp %p qpid 0x%0x qhp %p\n", __FUNCTION__,
+ ib_qp, qhp->wq.qpid, qhp);
+ kfree(qhp);
+ return 0;
+}
+
+static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
+{
+ struct iwch_dev *rhp;
+ struct iwch_qp *qhp;
+ struct iwch_pd *php;
+ struct iwch_cq *schp;
+ struct iwch_cq *rchp;
+ struct iwch_create_qp_resp uresp;
+ int wqsize, sqsize, rqsize;
+ struct iwch_ucontext *ucontext;
+
+ PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
+ if (attrs->qp_type != IB_QPT_RC)
+ return ERR_PTR(-EINVAL);
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+ schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid);
+ rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid);
+ if (!schp || !rchp)
+ return ERR_PTR(-EINVAL);
+
+ /* The RQT size must be # of entries + 1 rounded up to a power of two */
+ rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr);
+ if (rqsize == attrs->cap.max_recv_wr)
+ rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1);
+
+ /* T3 doesn't support RQT depth < 16 */
+ if (rqsize < 16)
+ rqsize = 16;
+
+ if (rqsize > T3_MAX_RQ_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * NOTE: The SQ and total WQ sizes don't need to be
+ * a power of two. However, all the code assumes
+ * they are. EG: Q_FREECNT() and friends.
+ */
+ sqsize = roundup_pow_of_two(attrs->cap.max_send_wr);
+ wqsize = roundup_pow_of_two(rqsize + sqsize);
+ PDBG("%s wqsize %d sqsize %d rqsize %d\n", __FUNCTION__,
+ wqsize, sqsize, rqsize);
+ qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
+ if (!qhp)
+ return ERR_PTR(-ENOMEM);
+ qhp->wq.size_log2 = ilog2(wqsize);
+ qhp->wq.rq_size_log2 = ilog2(rqsize);
+ qhp->wq.sq_size_log2 = ilog2(sqsize);
+ ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL;
+ if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) {
+ kfree(qhp);
+ return ERR_PTR(-ENOMEM);
+ }
+ attrs->cap.max_recv_wr = rqsize - 1;
+ attrs->cap.max_send_wr = sqsize;
+ qhp->rhp = rhp;
+ qhp->attr.pd = php->pdid;
+ qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid;
+ qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid;
+ qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
+ qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
+ qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
+ qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
+ qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+ qhp->attr.state = IWCH_QP_STATE_IDLE;
+ qhp->attr.next_state = IWCH_QP_STATE_IDLE;
+
+ /*
+ * XXX - These don't get passed in from the openib user
+ * at create time. The CM sets them via a QP modify.
+ * Need to fix... I think the CM should
+ */
+ qhp->attr.enable_rdma_read = 1;
+ qhp->attr.enable_rdma_write = 1;
+ qhp->attr.enable_bind = 1;
+ qhp->attr.max_ord = 1;
+ qhp->attr.max_ird = 1;
+
+ spin_lock_init(&qhp->lock);
+ init_waitqueue_head(&qhp->wait);
+ atomic_set(&qhp->refcnt, 1);
+ insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid);
+
+ if (udata) {
+
+ struct iwch_mm_entry *mm1, *mm2;
+
+ mm1 = kmalloc(sizeof *mm1, GFP_KERNEL);
+ if (!mm1) {
+ iwch_destroy_qp(&qhp->ibqp);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
+ if (!mm2) {
+ kfree(mm1);
+ iwch_destroy_qp(&qhp->ibqp);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ uresp.qpid = qhp->wq.qpid;
+ uresp.size_log2 = qhp->wq.size_log2;
+ uresp.sq_size_log2 = qhp->wq.sq_size_log2;
+ uresp.rq_size_log2 = qhp->wq.rq_size_log2;
+ spin_lock(&ucontext->mmap_lock);
+ uresp.key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ uresp.db_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ spin_unlock(&ucontext->mmap_lock);
+ if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+ kfree(mm1);
+ kfree(mm2);
+ iwch_destroy_qp(&qhp->ibqp);
+ return ERR_PTR(-EFAULT);
+ }
+ mm1->key = uresp.key;
+ mm1->addr = virt_to_phys(qhp->wq.queue);
+ mm1->len = PAGE_ALIGN(wqsize * sizeof (union t3_wr));
+ insert_mmap(ucontext, mm1);
+ mm2->key = uresp.db_key;
+ mm2->addr = qhp->wq.udb & PAGE_MASK;
+ mm2->len = PAGE_SIZE;
+ insert_mmap(ucontext, mm2);
+ }
+ qhp->ibqp.qp_num = qhp->wq.qpid;
+ init_timer(&(qhp->timer));
+ PDBG("%s sq_num_entries %d, rq_num_entries %d "
+ "qpid 0x%0x qhp %p dma_addr 0x%llx size %d\n",
+ __FUNCTION__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
+ qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr,
+ 1 << qhp->wq.size_log2);
+ return &qhp->ibqp;
+}
+
+static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct iwch_dev *rhp;
+ struct iwch_qp *qhp;
+ enum iwch_qp_attr_mask mask = 0;
+ struct iwch_qp_attributes attrs;
+
+ PDBG("%s ib_qp %p\n", __FUNCTION__, ibqp);
+
+ /* iwarp does not support the RTR state */
+ if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
+ attr_mask &= ~IB_QP_STATE;
+
+ /* Make sure we still have something left to do */
+ if (!attr_mask)
+ return 0;
+
+ memset(&attrs, 0, sizeof attrs);
+ qhp = to_iwch_qp(ibqp);
+ rhp = qhp->rhp;
+
+ attrs.next_state = iwch_convert_state(attr->qp_state);
+ attrs.enable_rdma_read = (attr->qp_access_flags &
+ IB_ACCESS_REMOTE_READ) ? 1 : 0;
+ attrs.enable_rdma_write = (attr->qp_access_flags &
+ IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+ attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
+
+
+ mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0;
+ mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
+ (IWCH_QP_ATTR_ENABLE_RDMA_READ |
+ IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
+ IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0;
+
+ return iwch_modify_qp(rhp, qhp, mask, &attrs, 0);
+}
+
+void iwch_qp_add_ref(struct ib_qp *qp)
+{
+ PDBG("%s ib_qp %p\n", __FUNCTION__, qp);
+ atomic_inc(&(to_iwch_qp(qp)->refcnt));
+}
+
+void iwch_qp_rem_ref(struct ib_qp *qp)
+{
+ PDBG("%s ib_qp %p\n", __FUNCTION__, qp);
+ if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt)))
+ wake_up(&(to_iwch_qp(qp)->wait));
+}
+
+struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
+{
+ PDBG("%s ib_dev %p qpn 0x%x\n", __FUNCTION__, dev, qpn);
+ return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
+}
+
+
+static int iwch_query_pkey(struct ib_device *ibdev,
+ u8 port, u16 index, u16 * pkey)
+{
+ PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
+ *pkey = 0;
+ return 0;
+}
+
+static int iwch_query_gid(struct ib_device *ibdev, u8 port,
+ int index, union ib_gid *gid)
+{
+ struct iwch_dev *dev;
+
+ PDBG("%s ibdev %p, port %d, index %d, gid %p\n",
+ __FUNCTION__, ibdev, port, index, gid);
+ dev = to_iwch_dev(ibdev);
+ BUG_ON(port == 0 || port > 2);
+ memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+ memcpy(&(gid->raw[0]), dev->rdev.port_info.lldevs[port-1]->dev_addr, 6);
+ return 0;
+}
+
+static int iwch_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+
+ struct iwch_dev *dev;
+ PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
+
+ dev = to_iwch_dev(ibdev);
+ memset(props, 0, sizeof *props);
+ memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
+ props->device_cap_flags = dev->device_cap_flags;
+ props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
+ props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
+ props->max_mr_size = ~0ull;
+ props->max_qp = dev->attr.max_qps;
+ props->max_qp_wr = dev->attr.max_wrs;
+ props->max_sge = dev->attr.max_sge_per_wr;
+ props->max_sge_rd = 1;
+ props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp;
+ props->max_cq = dev->attr.max_cqs;
+ props->max_cqe = dev->attr.max_cqes_per_cq;
+ props->max_mr = dev->attr.max_mem_regs;
+ props->max_pd = dev->attr.max_pds;
+ props->local_ca_ack_delay = 0;
+
+ return 0;
+}
+
+static int iwch_query_port(struct ib_device *ibdev,
+ u8 port, struct ib_port_attr *props)
+{
+ PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
+ props->max_mtu = IB_MTU_4096;
+ props->lid = 0;
+ props->lmc = 0;
+ props->sm_lid = 0;
+ props->sm_sl = 0;
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = 0;
+ props->port_cap_flags =
+ IB_PORT_CM_SUP |
+ IB_PORT_SNMP_TUNNEL_SUP |
+ IB_PORT_REINIT_SUP |
+ IB_PORT_DEVICE_MGMT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+ props->gid_tbl_len = 1;
+ props->pkey_tbl_len = 1;
+ props->qkey_viol_cntr = 0;
+ props->active_width = 2;
+ props->active_speed = 2;
+ props->max_msg_sz = -1;
+
+ return 0;
+}
+
+static ssize_t show_rev(struct class_device *cdev, char *buf)
+{
+ struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
+ ibdev.class_dev);
+ PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev);
+ return sprintf(buf, "%d\n", dev->rdev.t3cdev_p->type);
+}
+
+static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
+{
+ struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
+ ibdev.class_dev);
+ struct ethtool_drvinfo info;
+ struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
+
+ PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev);
+ lldev->ethtool_ops->get_drvinfo(lldev, &info);
+ return sprintf(buf, "%s\n", info.fw_version);
+}
+
+static ssize_t show_hca(struct class_device *cdev, char *buf)
+{
+ struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
+ ibdev.class_dev);
+ struct ethtool_drvinfo info;
+ struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
+
+ PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev);
+ lldev->ethtool_ops->get_drvinfo(lldev, &info);
+ return sprintf(buf, "%s\n", info.driver);
+}
+
+static ssize_t show_board(struct class_device *cdev, char *buf)
+{
+ struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
+ ibdev.class_dev);
+ PDBG("%s class dev 0x%p\n", __FUNCTION__, dev);
+ return sprintf(buf, "%x.%x\n", dev->rdev.rnic_info.pdev->vendor,
+ dev->rdev.rnic_info.pdev->device);
+}
+
+static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct class_device_attribute *iwch_class_attributes[] = {
+ &class_device_attr_hw_rev,
+ &class_device_attr_fw_ver,
+ &class_device_attr_hca_type,
+ &class_device_attr_board_id
+};
+
+int iwch_register_device(struct iwch_dev *dev)
+{
+ int ret;
+ int i;
+
+ PDBG("%s iwch_dev %p\n", __FUNCTION__, dev);
+ strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
+ memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
+ memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
+ dev->ibdev.owner = THIS_MODULE;
+ dev->device_cap_flags =
+ (IB_DEVICE_ZERO_STAG |
+ IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+
+ dev->ibdev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV);
+ dev->ibdev.node_type = RDMA_NODE_RNIC;
+ memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
+ dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
+ dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
+ dev->ibdev.class_dev.dev = &(dev->rdev.rnic_info.pdev->dev);
+ dev->ibdev.query_device = iwch_query_device;
+ dev->ibdev.query_port = iwch_query_port;
+ dev->ibdev.modify_port = iwch_modify_port;
+ dev->ibdev.query_pkey = iwch_query_pkey;
+ dev->ibdev.query_gid = iwch_query_gid;
+ dev->ibdev.alloc_ucontext = iwch_alloc_ucontext;
+ dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext;
+ dev->ibdev.mmap = iwch_mmap;
+ dev->ibdev.alloc_pd = iwch_allocate_pd;
+ dev->ibdev.dealloc_pd = iwch_deallocate_pd;
+ dev->ibdev.create_ah = iwch_ah_create;
+ dev->ibdev.destroy_ah = iwch_ah_destroy;
+ dev->ibdev.create_qp = iwch_create_qp;
+ dev->ibdev.modify_qp = iwch_ib_modify_qp;
+ dev->ibdev.destroy_qp = iwch_destroy_qp;
+ dev->ibdev.create_cq = iwch_create_cq;
+ dev->ibdev.destroy_cq = iwch_destroy_cq;
+ dev->ibdev.resize_cq = iwch_resize_cq;
+ dev->ibdev.poll_cq = iwch_poll_cq;
+ dev->ibdev.get_dma_mr = iwch_get_dma_mr;
+ dev->ibdev.reg_phys_mr = iwch_register_phys_mem;
+ dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem;
+ dev->ibdev.reg_user_mr = iwch_reg_user_mr;
+ dev->ibdev.dereg_mr = iwch_dereg_mr;
+ dev->ibdev.alloc_mw = iwch_alloc_mw;
+ dev->ibdev.bind_mw = iwch_bind_mw;
+ dev->ibdev.dealloc_mw = iwch_dealloc_mw;
+
+ dev->ibdev.attach_mcast = iwch_multicast_attach;
+ dev->ibdev.detach_mcast = iwch_multicast_detach;
+ dev->ibdev.process_mad = iwch_process_mad;
+
+ dev->ibdev.req_notify_cq = iwch_arm_cq;
+ dev->ibdev.post_send = iwch_post_send;
+ dev->ibdev.post_recv = iwch_post_receive;
+
+
+ dev->ibdev.iwcm =
+ (struct iw_cm_verbs *) kmalloc(sizeof(struct iw_cm_verbs),
+ GFP_KERNEL);
+ dev->ibdev.iwcm->connect = iwch_connect;
+ dev->ibdev.iwcm->accept = iwch_accept_cr;
+ dev->ibdev.iwcm->reject = iwch_reject_cr;
+ dev->ibdev.iwcm->create_listen = iwch_create_listen;
+ dev->ibdev.iwcm->destroy_listen = iwch_destroy_listen;
+ dev->ibdev.iwcm->add_ref = iwch_qp_add_ref;
+ dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
+ dev->ibdev.iwcm->get_qp = iwch_get_qp;
+
+ ret = ib_register_device(&dev->ibdev);
+ if (ret)
+ goto bail1;
+
+ for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
+ ret = class_device_create_file(&dev->ibdev.class_dev,
+ iwch_class_attributes[i]);
+ if (ret) {
+ goto bail2;
+ }
+ }
+ return 0;
+bail2:
+ ib_unregister_device(&dev->ibdev);
+bail1:
+ return ret;
+}
+
+void iwch_unregister_device(struct iwch_dev *dev)
+{
+ int i;
+
+ PDBG("%s iwch_dev %p\n", __FUNCTION__, dev);
+ for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
+ class_device_remove_file(&dev->ibdev.class_dev,
+ iwch_class_attributes[i]);
+ ib_unregister_device(&dev->ibdev);
+ return;
+}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
new file mode 100644
index 000000000000..2af3e93b607f
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -0,0 +1,366 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IWCH_PROVIDER_H__
+#define __IWCH_PROVIDER_H__
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <rdma/ib_verbs.h>
+#include <asm/types.h>
+#include "t3cdev.h"
+#include "iwch.h"
+#include "cxio_wr.h"
+#include "cxio_hal.h"
+
+struct iwch_pd {
+ struct ib_pd ibpd;
+ u32 pdid;
+ struct iwch_dev *rhp;
+};
+
+static inline struct iwch_pd *to_iwch_pd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct iwch_pd, ibpd);
+}
+
+struct tpt_attributes {
+ u32 stag;
+ u32 state:1;
+ u32 type:2;
+ u32 rsvd:1;
+ enum tpt_mem_perm perms;
+ u32 remote_invaliate_disable:1;
+ u32 zbva:1;
+ u32 mw_bind_enable:1;
+ u32 page_size:5;
+
+ u32 pdid;
+ u32 qpid;
+ u32 pbl_addr;
+ u32 len;
+ u64 va_fbo;
+ u32 pbl_size;
+};
+
+struct iwch_mr {
+ struct ib_mr ibmr;
+ struct iwch_dev *rhp;
+ u64 kva;
+ struct tpt_attributes attr;
+};
+
+typedef struct iwch_mw iwch_mw_handle;
+
+static inline struct iwch_mr *to_iwch_mr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct iwch_mr, ibmr);
+}
+
+struct iwch_mw {
+ struct ib_mw ibmw;
+ struct iwch_dev *rhp;
+ u64 kva;
+ struct tpt_attributes attr;
+};
+
+static inline struct iwch_mw *to_iwch_mw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct iwch_mw, ibmw);
+}
+
+struct iwch_cq {
+ struct ib_cq ibcq;
+ struct iwch_dev *rhp;
+ struct t3_cq cq;
+ spinlock_t lock;
+ atomic_t refcnt;
+ wait_queue_head_t wait;
+ u32 __user *user_rptr_addr;
+};
+
+static inline struct iwch_cq *to_iwch_cq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct iwch_cq, ibcq);
+}
+
+enum IWCH_QP_FLAGS {
+ QP_QUIESCED = 0x01
+};
+
+struct iwch_mpa_attributes {
+ u8 recv_marker_enabled;
+ u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */
+ u8 crc_enabled;
+ u8 version; /* 0 or 1 */
+};
+
+struct iwch_qp_attributes {
+ u32 scq;
+ u32 rcq;
+ u32 sq_num_entries;
+ u32 rq_num_entries;
+ u32 sq_max_sges;
+ u32 sq_max_sges_rdma_write;
+ u32 rq_max_sges;
+ u32 state;
+ u8 enable_rdma_read;
+ u8 enable_rdma_write; /* enable inbound Read Resp. */
+ u8 enable_bind;
+ u8 enable_mmid0_fastreg; /* Enable STAG0 + Fast-register */
+ /*
+ * Next QP state. If specify the current state, only the
+ * QP attributes will be modified.
+ */
+ u32 max_ord;
+ u32 max_ird;
+ u32 pd; /* IN */
+ u32 next_state;
+ char terminate_buffer[52];
+ u32 terminate_msg_len;
+ u8 is_terminate_local;
+ struct iwch_mpa_attributes mpa_attr; /* IN-OUT */
+ struct iwch_ep *llp_stream_handle;
+ char *stream_msg_buf; /* Last stream msg. before Idle -> RTS */
+ u32 stream_msg_buf_len; /* Only on Idle -> RTS */
+};
+
+struct iwch_qp {
+ struct ib_qp ibqp;
+ struct iwch_dev *rhp;
+ struct iwch_ep *ep;
+ struct iwch_qp_attributes attr;
+ struct t3_wq wq;
+ spinlock_t lock;
+ atomic_t refcnt;
+ wait_queue_head_t wait;
+ enum IWCH_QP_FLAGS flags;
+ struct timer_list timer;
+};
+
+static inline int qp_quiesced(struct iwch_qp *qhp)
+{
+ return qhp->flags & QP_QUIESCED;
+}
+
+static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct iwch_qp, ibqp);
+}
+
+void iwch_qp_add_ref(struct ib_qp *qp);
+void iwch_qp_rem_ref(struct ib_qp *qp);
+struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn);
+
+struct iwch_ucontext {
+ struct ib_ucontext ibucontext;
+ struct cxio_ucontext uctx;
+ u32 key;
+ spinlock_t mmap_lock;
+ struct list_head mmaps;
+};
+
+static inline struct iwch_ucontext *to_iwch_ucontext(struct ib_ucontext *c)
+{
+ return container_of(c, struct iwch_ucontext, ibucontext);
+}
+
+struct iwch_mm_entry {
+ struct list_head entry;
+ u64 addr;
+ u32 key;
+ unsigned len;
+};
+
+static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext,
+ u32 key, unsigned len)
+{
+ struct list_head *pos, *nxt;
+ struct iwch_mm_entry *mm;
+
+ spin_lock(&ucontext->mmap_lock);
+ list_for_each_safe(pos, nxt, &ucontext->mmaps) {
+
+ mm = list_entry(pos, struct iwch_mm_entry, entry);
+ if (mm->key == key && mm->len == len) {
+ list_del_init(&mm->entry);
+ spin_unlock(&ucontext->mmap_lock);
+ PDBG("%s key 0x%x addr 0x%llx len %d\n", __FUNCTION__,
+ key, (unsigned long long) mm->addr, mm->len);
+ return mm;
+ }
+ }
+ spin_unlock(&ucontext->mmap_lock);
+ return NULL;
+}
+
+static inline void insert_mmap(struct iwch_ucontext *ucontext,
+ struct iwch_mm_entry *mm)
+{
+ spin_lock(&ucontext->mmap_lock);
+ PDBG("%s key 0x%x addr 0x%llx len %d\n", __FUNCTION__,
+ mm->key, (unsigned long long) mm->addr, mm->len);
+ list_add_tail(&mm->entry, &ucontext->mmaps);
+ spin_unlock(&ucontext->mmap_lock);
+}
+
+enum iwch_qp_attr_mask {
+ IWCH_QP_ATTR_NEXT_STATE = 1 << 0,
+ IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
+ IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
+ IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
+ IWCH_QP_ATTR_MAX_ORD = 1 << 11,
+ IWCH_QP_ATTR_MAX_IRD = 1 << 12,
+ IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22,
+ IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23,
+ IWCH_QP_ATTR_MPA_ATTR = 1 << 24,
+ IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25,
+ IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ |
+ IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
+ IWCH_QP_ATTR_MAX_ORD |
+ IWCH_QP_ATTR_MAX_IRD |
+ IWCH_QP_ATTR_LLP_STREAM_HANDLE |
+ IWCH_QP_ATTR_STREAM_MSG_BUFFER |
+ IWCH_QP_ATTR_MPA_ATTR |
+ IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE)
+};
+
+int iwch_modify_qp(struct iwch_dev *rhp,
+ struct iwch_qp *qhp,
+ enum iwch_qp_attr_mask mask,
+ struct iwch_qp_attributes *attrs,
+ int internal);
+
+enum iwch_qp_state {
+ IWCH_QP_STATE_IDLE,
+ IWCH_QP_STATE_RTS,
+ IWCH_QP_STATE_ERROR,
+ IWCH_QP_STATE_TERMINATE,
+ IWCH_QP_STATE_CLOSING,
+ IWCH_QP_STATE_TOT
+};
+
+static inline int iwch_convert_state(enum ib_qp_state ib_state)
+{
+ switch (ib_state) {
+ case IB_QPS_RESET:
+ case IB_QPS_INIT:
+ return IWCH_QP_STATE_IDLE;
+ case IB_QPS_RTS:
+ return IWCH_QP_STATE_RTS;
+ case IB_QPS_SQD:
+ return IWCH_QP_STATE_CLOSING;
+ case IB_QPS_SQE:
+ return IWCH_QP_STATE_TERMINATE;
+ case IB_QPS_ERR:
+ return IWCH_QP_STATE_ERROR;
+ default:
+ return -1;
+ }
+}
+
+enum iwch_mem_perms {
+ IWCH_MEM_ACCESS_LOCAL_READ = 1 << 0,
+ IWCH_MEM_ACCESS_LOCAL_WRITE = 1 << 1,
+ IWCH_MEM_ACCESS_REMOTE_READ = 1 << 2,
+ IWCH_MEM_ACCESS_REMOTE_WRITE = 1 << 3,
+ IWCH_MEM_ACCESS_ATOMICS = 1 << 4,
+ IWCH_MEM_ACCESS_BINDING = 1 << 5,
+ IWCH_MEM_ACCESS_LOCAL =
+ (IWCH_MEM_ACCESS_LOCAL_READ | IWCH_MEM_ACCESS_LOCAL_WRITE),
+ IWCH_MEM_ACCESS_REMOTE =
+ (IWCH_MEM_ACCESS_REMOTE_WRITE | IWCH_MEM_ACCESS_REMOTE_READ)
+ /* cannot go beyond 1 << 31 */
+} __attribute__ ((packed));
+
+static inline u32 iwch_convert_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_WRITE ? IWCH_MEM_ACCESS_REMOTE_WRITE : 0)
+ | (acc & IB_ACCESS_REMOTE_READ ? IWCH_MEM_ACCESS_REMOTE_READ : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? IWCH_MEM_ACCESS_LOCAL_WRITE : 0) |
+ (acc & IB_ACCESS_MW_BIND ? IWCH_MEM_ACCESS_BINDING : 0) |
+ IWCH_MEM_ACCESS_LOCAL_READ;
+}
+
+enum iwch_mmid_state {
+ IWCH_STAG_STATE_VALID,
+ IWCH_STAG_STATE_INVALID
+};
+
+enum iwch_qp_query_flags {
+ IWCH_QP_QUERY_CONTEXT_NONE = 0x0, /* No ctx; Only attrs */
+ IWCH_QP_QUERY_CONTEXT_GET = 0x1, /* Get ctx + attrs */
+ IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2, /* Not Supported */
+
+ /*
+ * Quiesce QP context; Consumer
+ * will NOT replay outstanding WR
+ */
+ IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4,
+ IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8,
+ IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */
+};
+
+int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+int iwch_bind_mw(struct ib_qp *qp,
+ struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind);
+int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
+int iwch_register_device(struct iwch_dev *dev);
+void iwch_unregister_device(struct iwch_dev *dev);
+int iwch_quiesce_qps(struct iwch_cq *chp);
+int iwch_resume_qps(struct iwch_cq *chp);
+void stop_read_rep_timer(struct iwch_qp *qhp);
+int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ struct iwch_mr *mhp,
+ int shift,
+ __be64 *page_list);
+int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ struct iwch_mr *mhp,
+ int shift,
+ __be64 *page_list,
+ int npages);
+int build_phys_page_list(struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ u64 *iova_start,
+ u64 *total_size,
+ int *npages,
+ int *shift,
+ __be64 **page_list);
+
+
+#define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
+
+#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
new file mode 100644
index 000000000000..4dda2f6da2de
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -0,0 +1,1008 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "iwch_provider.h"
+#include "iwch.h"
+#include "iwch_cm.h"
+#include "cxio_hal.h"
+
+#define NO_SUPPORT -1
+
+static inline int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 * flit_cnt)
+{
+ int i;
+ u32 plen;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->send.rdmaop = T3_SEND_WITH_SE;
+ else
+ wqe->send.rdmaop = T3_SEND;
+ wqe->send.rem_stag = 0;
+ break;
+#if 0 /* Not currently supported */
+ case TYPE_SEND_INVALIDATE:
+ case TYPE_SEND_INVALIDATE_IMMEDIATE:
+ wqe->send.rdmaop = T3_SEND_WITH_INV;
+ wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
+ break;
+ case TYPE_SEND_SE_INVALIDATE:
+ wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
+ wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
+ break;
+#endif
+ default:
+ break;
+ }
+ if (wr->num_sge > T3_MAX_SGE)
+ return -EINVAL;
+ wqe->send.reserved[0] = 0;
+ wqe->send.reserved[1] = 0;
+ wqe->send.reserved[2] = 0;
+ if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+ plen = 4;
+ wqe->send.sgl[0].stag = wr->imm_data;
+ wqe->send.sgl[0].len = __constant_cpu_to_be32(0);
+ wqe->send.num_sgle = __constant_cpu_to_be32(0);
+ *flit_cnt = 5;
+ } else {
+ plen = 0;
+ for (i = 0; i < wr->num_sge; i++) {
+ if ((plen + wr->sg_list[i].length) < plen) {
+ return -EMSGSIZE;
+ }
+ plen += wr->sg_list[i].length;
+ wqe->send.sgl[i].stag =
+ cpu_to_be32(wr->sg_list[i].lkey);
+ wqe->send.sgl[i].len =
+ cpu_to_be32(wr->sg_list[i].length);
+ wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
+ }
+ wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
+ *flit_cnt = 4 + ((wr->num_sge) << 1);
+ }
+ wqe->send.plen = cpu_to_be32(plen);
+ return 0;
+}
+
+static inline int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 *flit_cnt)
+{
+ int i;
+ u32 plen;
+ if (wr->num_sge > T3_MAX_SGE)
+ return -EINVAL;
+ wqe->write.rdmaop = T3_RDMA_WRITE;
+ wqe->write.reserved[0] = 0;
+ wqe->write.reserved[1] = 0;
+ wqe->write.reserved[2] = 0;
+ wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
+ wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
+
+ if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
+ plen = 4;
+ wqe->write.sgl[0].stag = wr->imm_data;
+ wqe->write.sgl[0].len = __constant_cpu_to_be32(0);
+ wqe->write.num_sgle = __constant_cpu_to_be32(0);
+ *flit_cnt = 6;
+ } else {
+ plen = 0;
+ for (i = 0; i < wr->num_sge; i++) {
+ if ((plen + wr->sg_list[i].length) < plen) {
+ return -EMSGSIZE;
+ }
+ plen += wr->sg_list[i].length;
+ wqe->write.sgl[i].stag =
+ cpu_to_be32(wr->sg_list[i].lkey);
+ wqe->write.sgl[i].len =
+ cpu_to_be32(wr->sg_list[i].length);
+ wqe->write.sgl[i].to =
+ cpu_to_be64(wr->sg_list[i].addr);
+ }
+ wqe->write.num_sgle = cpu_to_be32(wr->num_sge);
+ *flit_cnt = 5 + ((wr->num_sge) << 1);
+ }
+ wqe->write.plen = cpu_to_be32(plen);
+ return 0;
+}
+
+static inline int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 *flit_cnt)
+{
+ if (wr->num_sge > 1)
+ return -EINVAL;
+ wqe->read.rdmaop = T3_READ_REQ;
+ wqe->read.reserved[0] = 0;
+ wqe->read.reserved[1] = 0;
+ wqe->read.reserved[2] = 0;
+ wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
+ wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr);
+ wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
+ wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length);
+ wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr);
+ *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
+ return 0;
+}
+
+/*
+ * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
+ */
+static inline int iwch_sgl2pbl_map(struct iwch_dev *rhp,
+ struct ib_sge *sg_list, u32 num_sgle,
+ u32 * pbl_addr, u8 * page_size)
+{
+ int i;
+ struct iwch_mr *mhp;
+ u32 offset;
+ for (i = 0; i < num_sgle; i++) {
+
+ mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
+ if (!mhp) {
+ PDBG("%s %d\n", __FUNCTION__, __LINE__);
+ return -EIO;
+ }
+ if (!mhp->attr.state) {
+ PDBG("%s %d\n", __FUNCTION__, __LINE__);
+ return -EIO;
+ }
+ if (mhp->attr.zbva) {
+ PDBG("%s %d\n", __FUNCTION__, __LINE__);
+ return -EIO;
+ }
+
+ if (sg_list[i].addr < mhp->attr.va_fbo) {
+ PDBG("%s %d\n", __FUNCTION__, __LINE__);
+ return -EINVAL;
+ }
+ if (sg_list[i].addr + ((u64) sg_list[i].length) <
+ sg_list[i].addr) {
+ PDBG("%s %d\n", __FUNCTION__, __LINE__);
+ return -EINVAL;
+ }
+ if (sg_list[i].addr + ((u64) sg_list[i].length) >
+ mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
+ PDBG("%s %d\n", __FUNCTION__, __LINE__);
+ return -EINVAL;
+ }
+ offset = sg_list[i].addr - mhp->attr.va_fbo;
+ offset += ((u32) mhp->attr.va_fbo) %
+ (1UL << (12 + mhp->attr.page_size));
+ pbl_addr[i] = ((mhp->attr.pbl_addr -
+ rhp->rdev.rnic_info.pbl_base) >> 3) +
+ (offset >> (12 + mhp->attr.page_size));
+ page_size[i] = mhp->attr.page_size;
+ }
+ return 0;
+}
+
+static inline int iwch_build_rdma_recv(struct iwch_dev *rhp,
+ union t3_wr *wqe,
+ struct ib_recv_wr *wr)
+{
+ int i, err = 0;
+ u32 pbl_addr[4];
+ u8 page_size[4];
+ if (wr->num_sge > T3_MAX_SGE)
+ return -EINVAL;
+ err = iwch_sgl2pbl_map(rhp, wr->sg_list, wr->num_sge, pbl_addr,
+ page_size);
+ if (err)
+ return err;
+ wqe->recv.pagesz[0] = page_size[0];
+ wqe->recv.pagesz[1] = page_size[1];
+ wqe->recv.pagesz[2] = page_size[2];
+ wqe->recv.pagesz[3] = page_size[3];
+ wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
+ for (i = 0; i < wr->num_sge; i++) {
+ wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
+ wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
+
+ /* to in the WQE == the offset into the page */
+ wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) %
+ (1UL << (12 + page_size[i])));
+
+ /* pbl_addr is the adapters address in the PBL */
+ wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
+ }
+ for (; i < T3_MAX_SGE; i++) {
+ wqe->recv.sgl[i].stag = 0;
+ wqe->recv.sgl[i].len = 0;
+ wqe->recv.sgl[i].to = 0;
+ wqe->recv.pbl_addr[i] = 0;
+ }
+ return 0;
+}
+
+int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ int err = 0;
+ u8 t3_wr_flit_cnt;
+ enum t3_wr_opcode t3_wr_opcode = 0;
+ enum t3_wr_flags t3_wr_flags;
+ struct iwch_qp *qhp;
+ u32 idx;
+ union t3_wr *wqe;
+ u32 num_wrs;
+ unsigned long flag;
+ struct t3_swsq *sqp;
+
+ qhp = to_iwch_qp(ibqp);
+ spin_lock_irqsave(&qhp->lock, flag);
+ if (qhp->attr.state > IWCH_QP_STATE_RTS) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -EINVAL;
+ }
+ num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
+ qhp->wq.sq_size_log2);
+ if (num_wrs <= 0) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -ENOMEM;
+ }
+ while (wr) {
+ if (num_wrs == 0) {
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+ idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+ wqe = (union t3_wr *) (qhp->wq.queue + idx);
+ t3_wr_flags = 0;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
+ if (wr->send_flags & IB_SEND_FENCE)
+ t3_wr_flags |= T3_READ_FENCE_FLAG;
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ t3_wr_flags |= T3_COMPLETION_FLAG;
+ sqp = qhp->wq.sq +
+ Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ t3_wr_opcode = T3_WR_SEND;
+ err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
+ break;
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ t3_wr_opcode = T3_WR_WRITE;
+ err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
+ break;
+ case IB_WR_RDMA_READ:
+ t3_wr_opcode = T3_WR_READ;
+ t3_wr_flags = 0; /* T3 reads are always signaled */
+ err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
+ if (err)
+ break;
+ sqp->read_len = wqe->read.local_len;
+ if (!qhp->wq.oldest_read)
+ qhp->wq.oldest_read = sqp;
+ break;
+ default:
+ PDBG("%s post of type=%d TBD!\n", __FUNCTION__,
+ wr->opcode);
+ err = -EINVAL;
+ }
+ if (err) {
+ *bad_wr = wr;
+ break;
+ }
+ wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
+ sqp->wr_id = wr->wr_id;
+ sqp->opcode = wr2opcode(t3_wr_opcode);
+ sqp->sq_wptr = qhp->wq.sq_wptr;
+ sqp->complete = 0;
+ sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED);
+
+ build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
+ Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
+ 0, t3_wr_flit_cnt);
+ PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
+ __FUNCTION__, (unsigned long long) wr->wr_id, idx,
+ Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
+ sqp->opcode);
+ wr = wr->next;
+ num_wrs--;
+ ++(qhp->wq.wptr);
+ ++(qhp->wq.sq_wptr);
+ }
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+ return err;
+}
+
+int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ int err = 0;
+ struct iwch_qp *qhp;
+ u32 idx;
+ union t3_wr *wqe;
+ u32 num_wrs;
+ unsigned long flag;
+
+ qhp = to_iwch_qp(ibqp);
+ spin_lock_irqsave(&qhp->lock, flag);
+ if (qhp->attr.state > IWCH_QP_STATE_RTS) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -EINVAL;
+ }
+ num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
+ qhp->wq.rq_size_log2) - 1;
+ if (!wr) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -EINVAL;
+ }
+ while (wr) {
+ idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+ wqe = (union t3_wr *) (qhp->wq.queue + idx);
+ if (num_wrs)
+ err = iwch_build_rdma_recv(qhp->rhp, wqe, wr);
+ else
+ err = -ENOMEM;
+ if (err) {
+ *bad_wr = wr;
+ break;
+ }
+ qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] =
+ wr->wr_id;
+ build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
+ Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
+ 0, sizeof(struct t3_receive_wr) >> 3);
+ PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
+ "wqe %p \n", __FUNCTION__, (unsigned long long) wr->wr_id,
+ idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
+ ++(qhp->wq.rq_wptr);
+ ++(qhp->wq.wptr);
+ wr = wr->next;
+ num_wrs--;
+ }
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+ return err;
+}
+
+int iwch_bind_mw(struct ib_qp *qp,
+ struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind)
+{
+ struct iwch_dev *rhp;
+ struct iwch_mw *mhp;
+ struct iwch_qp *qhp;
+ union t3_wr *wqe;
+ u32 pbl_addr;
+ u8 page_size;
+ u32 num_wrs;
+ unsigned long flag;
+ struct ib_sge sgl;
+ int err=0;
+ enum t3_wr_flags t3_wr_flags;
+ u32 idx;
+ struct t3_swsq *sqp;
+
+ qhp = to_iwch_qp(qp);
+ mhp = to_iwch_mw(mw);
+ rhp = qhp->rhp;
+
+ spin_lock_irqsave(&qhp->lock, flag);
+ if (qhp->attr.state > IWCH_QP_STATE_RTS) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -EINVAL;
+ }
+ num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
+ qhp->wq.sq_size_log2);
+ if ((num_wrs) <= 0) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -ENOMEM;
+ }
+ idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+ PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __FUNCTION__, idx,
+ mw, mw_bind);
+ wqe = (union t3_wr *) (qhp->wq.queue + idx);
+
+ t3_wr_flags = 0;
+ if (mw_bind->send_flags & IB_SEND_SIGNALED)
+ t3_wr_flags = T3_COMPLETION_FLAG;
+
+ sgl.addr = mw_bind->addr;
+ sgl.lkey = mw_bind->mr->lkey;
+ sgl.length = mw_bind->length;
+ wqe->bind.reserved = 0;
+ wqe->bind.type = T3_VA_BASED_TO;
+
+ /* TBD: check perms */
+ wqe->bind.perms = iwch_convert_access(mw_bind->mw_access_flags);
+ wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey);
+ wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
+ wqe->bind.mw_len = cpu_to_be32(mw_bind->length);
+ wqe->bind.mw_va = cpu_to_be64(mw_bind->addr);
+ err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
+ if (err) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return err;
+ }
+ wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
+ sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
+ sqp->wr_id = mw_bind->wr_id;
+ sqp->opcode = T3_BIND_MW;
+ sqp->sq_wptr = qhp->wq.sq_wptr;
+ sqp->complete = 0;
+ sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
+ wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
+ wqe->bind.mr_pagesz = page_size;
+ wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id;
+ build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
+ Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
+ sizeof(struct t3_bind_mw_wr) >> 3);
+ ++(qhp->wq.wptr);
+ ++(qhp->wq.sq_wptr);
+ spin_unlock_irqrestore(&qhp->lock, flag);
+
+ ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+
+ return err;
+}
+
+static inline void build_term_codes(int t3err, u8 *layer_type, u8 *ecode,
+ int tagged)
+{
+ switch (t3err) {
+ case TPT_ERR_STAG:
+ if (tagged == 1) {
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_INV_STAG;
+ } else if (tagged == 2) {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_INV_STAG;
+ }
+ break;
+ case TPT_ERR_PDID:
+ case TPT_ERR_QPID:
+ case TPT_ERR_ACCESS:
+ if (tagged == 1) {
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_STAG_NOT_ASSOC;
+ } else if (tagged == 2) {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_STAG_NOT_ASSOC;
+ }
+ break;
+ case TPT_ERR_WRAP:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_TO_WRAP;
+ break;
+ case TPT_ERR_BOUND:
+ if (tagged == 1) {
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_BASE_BOUNDS;
+ } else if (tagged == 2) {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_BASE_BOUNDS;
+ } else {
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_MSG_TOOBIG;
+ }
+ break;
+ case TPT_ERR_INVALIDATE_SHARED_MR:
+ case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_CANT_INV_STAG;
+ break;
+ case TPT_ERR_ECC:
+ case TPT_ERR_ECC_PSTAG:
+ case TPT_ERR_INTERNAL_ERR:
+ *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
+ *ecode = 0;
+ break;
+ case TPT_ERR_OUT_OF_RQE:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_MSN_NOBUF;
+ break;
+ case TPT_ERR_PBL_ADDR_BOUND:
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_BASE_BOUNDS;
+ break;
+ case TPT_ERR_CRC:
+ *layer_type = LAYER_MPA|DDP_LLP;
+ *ecode = MPA_CRC_ERR;
+ break;
+ case TPT_ERR_MARKER:
+ *layer_type = LAYER_MPA|DDP_LLP;
+ *ecode = MPA_MARKER_ERR;
+ break;
+ case TPT_ERR_PDU_LEN_ERR:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_MSG_TOOBIG;
+ break;
+ case TPT_ERR_DDP_VERSION:
+ if (tagged) {
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_INV_VERS;
+ } else {
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_VERS;
+ }
+ break;
+ case TPT_ERR_RDMA_VERSION:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_INV_VERS;
+ break;
+ case TPT_ERR_OPCODE:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_INV_OPCODE;
+ break;
+ case TPT_ERR_DDP_QUEUE_NUM:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_QN;
+ break;
+ case TPT_ERR_MSN:
+ case TPT_ERR_MSN_GAP:
+ case TPT_ERR_MSN_RANGE:
+ case TPT_ERR_IRD_OVERFLOW:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_MSN_RANGE;
+ break;
+ case TPT_ERR_TBIT:
+ *layer_type = LAYER_DDP|DDP_LOCAL_CATA;
+ *ecode = 0;
+ break;
+ case TPT_ERR_MO:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_MO;
+ break;
+ default:
+ *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
+ *ecode = 0;
+ break;
+ }
+}
+
+/*
+ * This posts a TERMINATE with layer=RDMA, type=catastrophic.
+ */
+int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
+{
+ union t3_wr *wqe;
+ struct terminate_message *term;
+ int status;
+ int tagged = 0;
+ struct sk_buff *skb;
+
+ PDBG("%s %d\n", __FUNCTION__, __LINE__);
+ skb = alloc_skb(40, GFP_ATOMIC);
+ if (!skb) {
+ printk(KERN_ERR "%s cannot send TERMINATE!\n", __FUNCTION__);
+ return -ENOMEM;
+ }
+ wqe = (union t3_wr *)skb_put(skb, 40);
+ memset(wqe, 0, 40);
+ wqe->send.rdmaop = T3_TERMINATE;
+
+ /* immediate data length */
+ wqe->send.plen = htonl(4);
+
+ /* immediate data starts here. */
+ term = (struct terminate_message *)wqe->send.sgl;
+ if (rsp_msg) {
+ status = CQE_STATUS(rsp_msg->cqe);
+ if (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)
+ tagged = 1;
+ if ((CQE_OPCODE(rsp_msg->cqe) == T3_READ_REQ) ||
+ (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP))
+ tagged = 2;
+ } else {
+ status = TPT_ERR_INTERNAL_ERR;
+ }
+ build_term_codes(status, &term->layer_etype, &term->ecode, tagged);
+ build_fw_riwrh((void *)wqe, T3_WR_SEND,
+ T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 1,
+ qhp->ep->hwtid, 5);
+ skb->priority = CPL_PRIORITY_DATA;
+ return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
+}
+
+/*
+ * Assumes qhp lock is held.
+ */
+static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
+{
+ struct iwch_cq *rchp, *schp;
+ int count;
+
+ rchp = get_chp(qhp->rhp, qhp->attr.rcq);
+ schp = get_chp(qhp->rhp, qhp->attr.scq);
+
+ PDBG("%s qhp %p rchp %p schp %p\n", __FUNCTION__, qhp, rchp, schp);
+ /* take a ref on the qhp since we must release the lock */
+ atomic_inc(&qhp->refcnt);
+ spin_unlock_irqrestore(&qhp->lock, *flag);
+
+ /* locking heirarchy: cq lock first, then qp lock. */
+ spin_lock_irqsave(&rchp->lock, *flag);
+ spin_lock(&qhp->lock);
+ cxio_flush_hw_cq(&rchp->cq);
+ cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
+ cxio_flush_rq(&qhp->wq, &rchp->cq, count);
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&rchp->lock, *flag);
+
+ /* locking heirarchy: cq lock first, then qp lock. */
+ spin_lock_irqsave(&schp->lock, *flag);
+ spin_lock(&qhp->lock);
+ cxio_flush_hw_cq(&schp->cq);
+ cxio_count_scqes(&schp->cq, &qhp->wq, &count);
+ cxio_flush_sq(&qhp->wq, &schp->cq, count);
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&schp->lock, *flag);
+
+ /* deref */
+ if (atomic_dec_and_test(&qhp->refcnt))
+ wake_up(&qhp->wait);
+
+ spin_lock_irqsave(&qhp->lock, *flag);
+}
+
+static inline void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
+{
+ if (t3b_device(qhp->rhp))
+ cxio_set_wq_in_error(&qhp->wq);
+ else
+ __flush_qp(qhp, flag);
+}
+
+
+/*
+ * Return non zero if at least one RECV was pre-posted.
+ */
+static inline int rqes_posted(struct iwch_qp *qhp)
+{
+ return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV;
+}
+
+static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
+ enum iwch_qp_attr_mask mask,
+ struct iwch_qp_attributes *attrs)
+{
+ struct t3_rdma_init_attr init_attr;
+ int ret;
+
+ init_attr.tid = qhp->ep->hwtid;
+ init_attr.qpid = qhp->wq.qpid;
+ init_attr.pdid = qhp->attr.pd;
+ init_attr.scqid = qhp->attr.scq;
+ init_attr.rcqid = qhp->attr.rcq;
+ init_attr.rq_addr = qhp->wq.rq_addr;
+ init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
+ init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE |
+ qhp->attr.mpa_attr.recv_marker_enabled |
+ (qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
+ (qhp->attr.mpa_attr.crc_enabled << 2);
+
+ /*
+ * XXX - The IWCM doesn't quite handle getting these
+ * attrs set before going into RTS. For now, just turn
+ * them on always...
+ */
+#if 0
+ init_attr.qpcaps = qhp->attr.enableRdmaRead |
+ (qhp->attr.enableRdmaWrite << 1) |
+ (qhp->attr.enableBind << 2) |
+ (qhp->attr.enable_stag0_fastreg << 3) |
+ (qhp->attr.enable_stag0_fastreg << 4);
+#else
+ init_attr.qpcaps = 0x1f;
+#endif
+ init_attr.tcp_emss = qhp->ep->emss;
+ init_attr.ord = qhp->attr.max_ord;
+ init_attr.ird = qhp->attr.max_ird;
+ init_attr.qp_dma_addr = qhp->wq.dma_addr;
+ init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
+ init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
+ PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
+ "flags 0x%x qpcaps 0x%x\n", __FUNCTION__,
+ init_attr.rq_addr, init_attr.rq_size,
+ init_attr.flags, init_attr.qpcaps);
+ ret = cxio_rdma_init(&rhp->rdev, &init_attr);
+ PDBG("%s ret %d\n", __FUNCTION__, ret);
+ return ret;
+}
+
+int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
+ enum iwch_qp_attr_mask mask,
+ struct iwch_qp_attributes *attrs,
+ int internal)
+{
+ int ret = 0;
+ struct iwch_qp_attributes newattr = qhp->attr;
+ unsigned long flag;
+ int disconnect = 0;
+ int terminate = 0;
+ int abort = 0;
+ int free = 0;
+ struct iwch_ep *ep = NULL;
+
+ PDBG("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __FUNCTION__,
+ qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
+ (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
+
+ spin_lock_irqsave(&qhp->lock, flag);
+
+ /* Process attr changes if in IDLE */
+ if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
+ if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
+ ret = -EIO;
+ goto out;
+ }
+ if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
+ newattr.enable_rdma_read = attrs->enable_rdma_read;
+ if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
+ newattr.enable_rdma_write = attrs->enable_rdma_write;
+ if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
+ newattr.enable_bind = attrs->enable_bind;
+ if (mask & IWCH_QP_ATTR_MAX_ORD) {
+ if (attrs->max_ord >
+ rhp->attr.max_rdma_read_qp_depth) {
+ ret = -EINVAL;
+ goto out;
+ }
+ newattr.max_ord = attrs->max_ord;
+ }
+ if (mask & IWCH_QP_ATTR_MAX_IRD) {
+ if (attrs->max_ird >
+ rhp->attr.max_rdma_reads_per_qp) {
+ ret = -EINVAL;
+ goto out;
+ }
+ newattr.max_ird = attrs->max_ird;
+ }
+ qhp->attr = newattr;
+ }
+
+ if (!(mask & IWCH_QP_ATTR_NEXT_STATE))
+ goto out;
+ if (qhp->attr.state == attrs->next_state)
+ goto out;
+
+ switch (qhp->attr.state) {
+ case IWCH_QP_STATE_IDLE:
+ switch (attrs->next_state) {
+ case IWCH_QP_STATE_RTS:
+ if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ qhp->attr.mpa_attr = attrs->mpa_attr;
+ qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
+ qhp->ep = qhp->attr.llp_stream_handle;
+ qhp->attr.state = IWCH_QP_STATE_RTS;
+
+ /*
+ * Ref the endpoint here and deref when we
+ * disassociate the endpoint from the QP. This
+ * happens in CLOSING->IDLE transition or *->ERROR
+ * transition.
+ */
+ get_ep(&qhp->ep->com);
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ ret = rdma_init(rhp, qhp, mask, attrs);
+ spin_lock_irqsave(&qhp->lock, flag);
+ if (ret)
+ goto err;
+ break;
+ case IWCH_QP_STATE_ERROR:
+ qhp->attr.state = IWCH_QP_STATE_ERROR;
+ flush_qp(qhp, &flag);
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ case IWCH_QP_STATE_RTS:
+ switch (attrs->next_state) {
+ case IWCH_QP_STATE_CLOSING:
+ BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
+ qhp->attr.state = IWCH_QP_STATE_CLOSING;
+ if (!internal) {
+ abort=0;
+ disconnect = 1;
+ ep = qhp->ep;
+ }
+ break;
+ case IWCH_QP_STATE_TERMINATE:
+ qhp->attr.state = IWCH_QP_STATE_TERMINATE;
+ if (t3b_device(qhp->rhp))
+ cxio_set_wq_in_error(&qhp->wq);
+ if (!internal)
+ terminate = 1;
+ break;
+ case IWCH_QP_STATE_ERROR:
+ qhp->attr.state = IWCH_QP_STATE_ERROR;
+ if (!internal) {
+ abort=1;
+ disconnect = 1;
+ ep = qhp->ep;
+ }
+ goto err;
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ case IWCH_QP_STATE_CLOSING:
+ if (!internal) {
+ ret = -EINVAL;
+ goto out;
+ }
+ switch (attrs->next_state) {
+ case IWCH_QP_STATE_IDLE:
+ qhp->attr.state = IWCH_QP_STATE_IDLE;
+ qhp->attr.llp_stream_handle = NULL;
+ put_ep(&qhp->ep->com);
+ qhp->ep = NULL;
+ wake_up(&qhp->wait);
+ break;
+ case IWCH_QP_STATE_ERROR:
+ goto err;
+ default:
+ ret = -EINVAL;
+ goto err;
+ }
+ break;
+ case IWCH_QP_STATE_ERROR:
+ if (attrs->next_state != IWCH_QP_STATE_IDLE) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) ||
+ !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ qhp->attr.state = IWCH_QP_STATE_IDLE;
+ memset(&qhp->attr, 0, sizeof(qhp->attr));
+ break;
+ case IWCH_QP_STATE_TERMINATE:
+ if (!internal) {
+ ret = -EINVAL;
+ goto out;
+ }
+ goto err;
+ break;
+ default:
+ printk(KERN_ERR "%s in a bad state %d\n",
+ __FUNCTION__, qhp->attr.state);
+ ret = -EINVAL;
+ goto err;
+ break;
+ }
+ goto out;
+err:
+ PDBG("%s disassociating ep %p qpid 0x%x\n", __FUNCTION__, qhp->ep,
+ qhp->wq.qpid);
+
+ /* disassociate the LLP connection */
+ qhp->attr.llp_stream_handle = NULL;
+ ep = qhp->ep;
+ qhp->ep = NULL;
+ qhp->attr.state = IWCH_QP_STATE_ERROR;
+ free=1;
+ wake_up(&qhp->wait);
+ BUG_ON(!ep);
+ flush_qp(qhp, &flag);
+out:
+ spin_unlock_irqrestore(&qhp->lock, flag);
+
+ if (terminate)
+ iwch_post_terminate(qhp, NULL);
+
+ /*
+ * If disconnect is 1, then we need to initiate a disconnect
+ * on the EP. This can be a normal close (RTS->CLOSING) or
+ * an abnormal close (RTS/CLOSING->ERROR).
+ */
+ if (disconnect)
+ iwch_ep_disconnect(ep, abort, GFP_KERNEL);
+
+ /*
+ * If free is 1, then we've disassociated the EP from the QP
+ * and we need to dereference the EP.
+ */
+ if (free)
+ put_ep(&ep->com);
+
+ PDBG("%s exit state %d\n", __FUNCTION__, qhp->attr.state);
+ return ret;
+}
+
+static int quiesce_qp(struct iwch_qp *qhp)
+{
+ spin_lock_irq(&qhp->lock);
+ iwch_quiesce_tid(qhp->ep);
+ qhp->flags |= QP_QUIESCED;
+ spin_unlock_irq(&qhp->lock);
+ return 0;
+}
+
+static int resume_qp(struct iwch_qp *qhp)
+{
+ spin_lock_irq(&qhp->lock);
+ iwch_resume_tid(qhp->ep);
+ qhp->flags &= ~QP_QUIESCED;
+ spin_unlock_irq(&qhp->lock);
+ return 0;
+}
+
+int iwch_quiesce_qps(struct iwch_cq *chp)
+{
+ int i;
+ struct iwch_qp *qhp;
+
+ for (i=0; i < T3_MAX_NUM_QP; i++) {
+ qhp = get_qhp(chp->rhp, i);
+ if (!qhp)
+ continue;
+ if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
+ quiesce_qp(qhp);
+ continue;
+ }
+ if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp))
+ quiesce_qp(qhp);
+ }
+ return 0;
+}
+
+int iwch_resume_qps(struct iwch_cq *chp)
+{
+ int i;
+ struct iwch_qp *qhp;
+
+ for (i=0; i < T3_MAX_NUM_QP; i++) {
+ qhp = get_qhp(chp->rhp, i);
+ if (!qhp)
+ continue;
+ if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
+ resume_qp(qhp);
+ continue;
+ }
+ if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))
+ resume_qp(qhp);
+ }
+ return 0;
+}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h
new file mode 100644
index 000000000000..cb7086f558c1
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_user.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IWCH_USER_H__
+#define __IWCH_USER_H__
+
+#define IWCH_UVERBS_ABI_VERSION 1
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+struct iwch_create_cq_req {
+ __u64 user_rptr_addr;
+};
+
+struct iwch_create_cq_resp {
+ __u64 key;
+ __u32 cqid;
+ __u32 size_log2;
+};
+
+struct iwch_create_qp_resp {
+ __u64 key;
+ __u64 db_key;
+ __u32 qpid;
+ __u32 size_log2;
+ __u32 sq_size_log2;
+ __u32 rq_size_log2;
+};
+
+struct iwch_reg_user_mr_resp {
+ __u32 pbl_addr;
+};
+#endif
diff --git a/drivers/infiniband/hw/cxgb3/tcb.h b/drivers/infiniband/hw/cxgb3/tcb.h
new file mode 100644
index 000000000000..c702dc199e18
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/tcb.h
@@ -0,0 +1,632 @@
+/*
+ * Copyright (c) 2007 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _TCB_DEFS_H
+#define _TCB_DEFS_H
+
+#define W_TCB_T_STATE 0
+#define S_TCB_T_STATE 0
+#define M_TCB_T_STATE 0xfULL
+#define V_TCB_T_STATE(x) ((x) << S_TCB_T_STATE)
+
+#define W_TCB_TIMER 0
+#define S_TCB_TIMER 4
+#define M_TCB_TIMER 0x1ULL
+#define V_TCB_TIMER(x) ((x) << S_TCB_TIMER)
+
+#define W_TCB_DACK_TIMER 0
+#define S_TCB_DACK_TIMER 5
+#define M_TCB_DACK_TIMER 0x1ULL
+#define V_TCB_DACK_TIMER(x) ((x) << S_TCB_DACK_TIMER)
+
+#define W_TCB_DEL_FLAG 0
+#define S_TCB_DEL_FLAG 6
+#define M_TCB_DEL_FLAG 0x1ULL
+#define V_TCB_DEL_FLAG(x) ((x) << S_TCB_DEL_FLAG)
+
+#define W_TCB_L2T_IX 0
+#define S_TCB_L2T_IX 7
+#define M_TCB_L2T_IX 0x7ffULL
+#define V_TCB_L2T_IX(x) ((x) << S_TCB_L2T_IX)
+
+#define W_TCB_SMAC_SEL 0
+#define S_TCB_SMAC_SEL 18
+#define M_TCB_SMAC_SEL 0x3ULL
+#define V_TCB_SMAC_SEL(x) ((x) << S_TCB_SMAC_SEL)
+
+#define W_TCB_TOS 0
+#define S_TCB_TOS 20
+#define M_TCB_TOS 0x3fULL
+#define V_TCB_TOS(x) ((x) << S_TCB_TOS)
+
+#define W_TCB_MAX_RT 0
+#define S_TCB_MAX_RT 26
+#define M_TCB_MAX_RT 0xfULL
+#define V_TCB_MAX_RT(x) ((x) << S_TCB_MAX_RT)
+
+#define W_TCB_T_RXTSHIFT 0
+#define S_TCB_T_RXTSHIFT 30
+#define M_TCB_T_RXTSHIFT 0xfULL
+#define V_TCB_T_RXTSHIFT(x) ((x) << S_TCB_T_RXTSHIFT)
+
+#define W_TCB_T_DUPACKS 1
+#define S_TCB_T_DUPACKS 2
+#define M_TCB_T_DUPACKS 0xfULL
+#define V_TCB_T_DUPACKS(x) ((x) << S_TCB_T_DUPACKS)
+
+#define W_TCB_T_MAXSEG 1
+#define S_TCB_T_MAXSEG 6
+#define M_TCB_T_MAXSEG 0xfULL
+#define V_TCB_T_MAXSEG(x) ((x) << S_TCB_T_MAXSEG)
+
+#define W_TCB_T_FLAGS1 1
+#define S_TCB_T_FLAGS1 10
+#define M_TCB_T_FLAGS1 0xffffffffULL
+#define V_TCB_T_FLAGS1(x) ((x) << S_TCB_T_FLAGS1)
+
+#define W_TCB_T_MIGRATION 1
+#define S_TCB_T_MIGRATION 20
+#define M_TCB_T_MIGRATION 0x1ULL
+#define V_TCB_T_MIGRATION(x) ((x) << S_TCB_T_MIGRATION)
+
+#define W_TCB_T_FLAGS2 2
+#define S_TCB_T_FLAGS2 10
+#define M_TCB_T_FLAGS2 0x7fULL
+#define V_TCB_T_FLAGS2(x) ((x) << S_TCB_T_FLAGS2)
+
+#define W_TCB_SND_SCALE 2
+#define S_TCB_SND_SCALE 17
+#define M_TCB_SND_SCALE 0xfULL
+#define V_TCB_SND_SCALE(x) ((x) << S_TCB_SND_SCALE)
+
+#define W_TCB_RCV_SCALE 2
+#define S_TCB_RCV_SCALE 21
+#define M_TCB_RCV_SCALE 0xfULL
+#define V_TCB_RCV_SCALE(x) ((x) << S_TCB_RCV_SCALE)
+
+#define W_TCB_SND_UNA_RAW 2
+#define S_TCB_SND_UNA_RAW 25
+#define M_TCB_SND_UNA_RAW 0x7ffffffULL
+#define V_TCB_SND_UNA_RAW(x) ((x) << S_TCB_SND_UNA_RAW)
+
+#define W_TCB_SND_NXT_RAW 3
+#define S_TCB_SND_NXT_RAW 20
+#define M_TCB_SND_NXT_RAW 0x7ffffffULL
+#define V_TCB_SND_NXT_RAW(x) ((x) << S_TCB_SND_NXT_RAW)
+
+#define W_TCB_RCV_NXT 4
+#define S_TCB_RCV_NXT 15
+#define M_TCB_RCV_NXT 0xffffffffULL
+#define V_TCB_RCV_NXT(x) ((x) << S_TCB_RCV_NXT)
+
+#define W_TCB_RCV_ADV 5
+#define S_TCB_RCV_ADV 15
+#define M_TCB_RCV_ADV 0xffffULL
+#define V_TCB_RCV_ADV(x) ((x) << S_TCB_RCV_ADV)
+
+#define W_TCB_SND_MAX_RAW 5
+#define S_TCB_SND_MAX_RAW 31
+#define M_TCB_SND_MAX_RAW 0x7ffffffULL
+#define V_TCB_SND_MAX_RAW(x) ((x) << S_TCB_SND_MAX_RAW)
+
+#define W_TCB_SND_CWND 6
+#define S_TCB_SND_CWND 26
+#define M_TCB_SND_CWND 0x7ffffffULL
+#define V_TCB_SND_CWND(x) ((x) << S_TCB_SND_CWND)
+
+#define W_TCB_SND_SSTHRESH 7
+#define S_TCB_SND_SSTHRESH 21
+#define M_TCB_SND_SSTHRESH 0x7ffffffULL
+#define V_TCB_SND_SSTHRESH(x) ((x) << S_TCB_SND_SSTHRESH)
+
+#define W_TCB_T_RTT_TS_RECENT_AGE 8
+#define S_TCB_T_RTT_TS_RECENT_AGE 16
+#define M_TCB_T_RTT_TS_RECENT_AGE 0xffffffffULL
+#define V_TCB_T_RTT_TS_RECENT_AGE(x) ((x) << S_TCB_T_RTT_TS_RECENT_AGE)
+
+#define W_TCB_T_RTSEQ_RECENT 9
+#define S_TCB_T_RTSEQ_RECENT 16
+#define M_TCB_T_RTSEQ_RECENT 0xffffffffULL
+#define V_TCB_T_RTSEQ_RECENT(x) ((x) << S_TCB_T_RTSEQ_RECENT)
+
+#define W_TCB_T_SRTT 10
+#define S_TCB_T_SRTT 16
+#define M_TCB_T_SRTT 0xffffULL
+#define V_TCB_T_SRTT(x) ((x) << S_TCB_T_SRTT)
+
+#define W_TCB_T_RTTVAR 11
+#define S_TCB_T_RTTVAR 0
+#define M_TCB_T_RTTVAR 0xffffULL
+#define V_TCB_T_RTTVAR(x) ((x) << S_TCB_T_RTTVAR)
+
+#define W_TCB_TS_LAST_ACK_SENT_RAW 11
+#define S_TCB_TS_LAST_ACK_SENT_RAW 16
+#define M_TCB_TS_LAST_ACK_SENT_RAW 0x7ffffffULL
+#define V_TCB_TS_LAST_ACK_SENT_RAW(x) ((x) << S_TCB_TS_LAST_ACK_SENT_RAW)
+
+#define W_TCB_DIP 12
+#define S_TCB_DIP 11
+#define M_TCB_DIP 0xffffffffULL
+#define V_TCB_DIP(x) ((x) << S_TCB_DIP)
+
+#define W_TCB_SIP 13
+#define S_TCB_SIP 11
+#define M_TCB_SIP 0xffffffffULL
+#define V_TCB_SIP(x) ((x) << S_TCB_SIP)
+
+#define W_TCB_DP 14
+#define S_TCB_DP 11
+#define M_TCB_DP 0xffffULL
+#define V_TCB_DP(x) ((x) << S_TCB_DP)
+
+#define W_TCB_SP 14
+#define S_TCB_SP 27
+#define M_TCB_SP 0xffffULL
+#define V_TCB_SP(x) ((x) << S_TCB_SP)
+
+#define W_TCB_TIMESTAMP 15
+#define S_TCB_TIMESTAMP 11
+#define M_TCB_TIMESTAMP 0xffffffffULL
+#define V_TCB_TIMESTAMP(x) ((x) << S_TCB_TIMESTAMP)
+
+#define W_TCB_TIMESTAMP_OFFSET 16
+#define S_TCB_TIMESTAMP_OFFSET 11
+#define M_TCB_TIMESTAMP_OFFSET 0xfULL
+#define V_TCB_TIMESTAMP_OFFSET(x) ((x) << S_TCB_TIMESTAMP_OFFSET)
+
+#define W_TCB_TX_MAX 16
+#define S_TCB_TX_MAX 15
+#define M_TCB_TX_MAX 0xffffffffULL
+#define V_TCB_TX_MAX(x) ((x) << S_TCB_TX_MAX)
+
+#define W_TCB_TX_HDR_PTR_RAW 17
+#define S_TCB_TX_HDR_PTR_RAW 15
+#define M_TCB_TX_HDR_PTR_RAW 0x1ffffULL
+#define V_TCB_TX_HDR_PTR_RAW(x) ((x) << S_TCB_TX_HDR_PTR_RAW)
+
+#define W_TCB_TX_LAST_PTR_RAW 18
+#define S_TCB_TX_LAST_PTR_RAW 0
+#define M_TCB_TX_LAST_PTR_RAW 0x1ffffULL
+#define V_TCB_TX_LAST_PTR_RAW(x) ((x) << S_TCB_TX_LAST_PTR_RAW)
+
+#define W_TCB_TX_COMPACT 18
+#define S_TCB_TX_COMPACT 17
+#define M_TCB_TX_COMPACT 0x1ULL
+#define V_TCB_TX_COMPACT(x) ((x) << S_TCB_TX_COMPACT)
+
+#define W_TCB_RX_COMPACT 18
+#define S_TCB_RX_COMPACT 18
+#define M_TCB_RX_COMPACT 0x1ULL
+#define V_TCB_RX_COMPACT(x) ((x) << S_TCB_RX_COMPACT)
+
+#define W_TCB_RCV_WND 18
+#define S_TCB_RCV_WND 19
+#define M_TCB_RCV_WND 0x7ffffffULL
+#define V_TCB_RCV_WND(x) ((x) << S_TCB_RCV_WND)
+
+#define W_TCB_RX_HDR_OFFSET 19
+#define S_TCB_RX_HDR_OFFSET 14
+#define M_TCB_RX_HDR_OFFSET 0x7ffffffULL
+#define V_TCB_RX_HDR_OFFSET(x) ((x) << S_TCB_RX_HDR_OFFSET)
+
+#define W_TCB_RX_FRAG0_START_IDX_RAW 20
+#define S_TCB_RX_FRAG0_START_IDX_RAW 9
+#define M_TCB_RX_FRAG0_START_IDX_RAW 0x7ffffffULL
+#define V_TCB_RX_FRAG0_START_IDX_RAW(x) ((x) << S_TCB_RX_FRAG0_START_IDX_RAW)
+
+#define W_TCB_RX_FRAG1_START_IDX_OFFSET 21
+#define S_TCB_RX_FRAG1_START_IDX_OFFSET 4
+#define M_TCB_RX_FRAG1_START_IDX_OFFSET 0x7ffffffULL
+#define V_TCB_RX_FRAG1_START_IDX_OFFSET(x) ((x) << S_TCB_RX_FRAG1_START_IDX_OFFSET)
+
+#define W_TCB_RX_FRAG0_LEN 21
+#define S_TCB_RX_FRAG0_LEN 31
+#define M_TCB_RX_FRAG0_LEN 0x7ffffffULL
+#define V_TCB_RX_FRAG0_LEN(x) ((x) << S_TCB_RX_FRAG0_LEN)
+
+#define W_TCB_RX_FRAG1_LEN 22
+#define S_TCB_RX_FRAG1_LEN 26
+#define M_TCB_RX_FRAG1_LEN 0x7ffffffULL
+#define V_TCB_RX_FRAG1_LEN(x) ((x) << S_TCB_RX_FRAG1_LEN)
+
+#define W_TCB_NEWRENO_RECOVER 23
+#define S_TCB_NEWRENO_RECOVER 21
+#define M_TCB_NEWRENO_RECOVER 0x7ffffffULL
+#define V_TCB_NEWRENO_RECOVER(x) ((x) << S_TCB_NEWRENO_RECOVER)
+
+#define W_TCB_PDU_HAVE_LEN 24
+#define S_TCB_PDU_HAVE_LEN 16
+#define M_TCB_PDU_HAVE_LEN 0x1ULL
+#define V_TCB_PDU_HAVE_LEN(x) ((x) << S_TCB_PDU_HAVE_LEN)
+
+#define W_TCB_PDU_LEN 24
+#define S_TCB_PDU_LEN 17
+#define M_TCB_PDU_LEN 0xffffULL
+#define V_TCB_PDU_LEN(x) ((x) << S_TCB_PDU_LEN)
+
+#define W_TCB_RX_QUIESCE 25
+#define S_TCB_RX_QUIESCE 1
+#define M_TCB_RX_QUIESCE 0x1ULL
+#define V_TCB_RX_QUIESCE(x) ((x) << S_TCB_RX_QUIESCE)
+
+#define W_TCB_RX_PTR_RAW 25
+#define S_TCB_RX_PTR_RAW 2
+#define M_TCB_RX_PTR_RAW 0x1ffffULL
+#define V_TCB_RX_PTR_RAW(x) ((x) << S_TCB_RX_PTR_RAW)
+
+#define W_TCB_CPU_NO 25
+#define S_TCB_CPU_NO 19
+#define M_TCB_CPU_NO 0x7fULL
+#define V_TCB_CPU_NO(x) ((x) << S_TCB_CPU_NO)
+
+#define W_TCB_ULP_TYPE 25
+#define S_TCB_ULP_TYPE 26
+#define M_TCB_ULP_TYPE 0xfULL
+#define V_TCB_ULP_TYPE(x) ((x) << S_TCB_ULP_TYPE)
+
+#define W_TCB_RX_FRAG1_PTR_RAW 25
+#define S_TCB_RX_FRAG1_PTR_RAW 30
+#define M_TCB_RX_FRAG1_PTR_RAW 0x1ffffULL
+#define V_TCB_RX_FRAG1_PTR_RAW(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW)
+
+#define W_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 26
+#define S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 15
+#define M_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 0x7ffffffULL
+#define V_TCB_RX_FRAG2_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW)
+
+#define W_TCB_RX_FRAG2_PTR_RAW 27
+#define S_TCB_RX_FRAG2_PTR_RAW 10
+#define M_TCB_RX_FRAG2_PTR_RAW 0x1ffffULL
+#define V_TCB_RX_FRAG2_PTR_RAW(x) ((x) << S_TCB_RX_FRAG2_PTR_RAW)
+
+#define W_TCB_RX_FRAG2_LEN_RAW 27
+#define S_TCB_RX_FRAG2_LEN_RAW 27
+#define M_TCB_RX_FRAG2_LEN_RAW 0x7ffffffULL
+#define V_TCB_RX_FRAG2_LEN_RAW(x) ((x) << S_TCB_RX_FRAG2_LEN_RAW)
+
+#define W_TCB_RX_FRAG3_PTR_RAW 28
+#define S_TCB_RX_FRAG3_PTR_RAW 22
+#define M_TCB_RX_FRAG3_PTR_RAW 0x1ffffULL
+#define V_TCB_RX_FRAG3_PTR_RAW(x) ((x) << S_TCB_RX_FRAG3_PTR_RAW)
+
+#define W_TCB_RX_FRAG3_LEN_RAW 29
+#define S_TCB_RX_FRAG3_LEN_RAW 7
+#define M_TCB_RX_FRAG3_LEN_RAW 0x7ffffffULL
+#define V_TCB_RX_FRAG3_LEN_RAW(x) ((x) << S_TCB_RX_FRAG3_LEN_RAW)
+
+#define W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 30
+#define S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 2
+#define M_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 0x7ffffffULL
+#define V_TCB_RX_FRAG3_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW)
+
+#define W_TCB_PDU_HDR_LEN 30
+#define S_TCB_PDU_HDR_LEN 29
+#define M_TCB_PDU_HDR_LEN 0xffULL
+#define V_TCB_PDU_HDR_LEN(x) ((x) << S_TCB_PDU_HDR_LEN)
+
+#define W_TCB_SLUSH1 31
+#define S_TCB_SLUSH1 5
+#define M_TCB_SLUSH1 0x7ffffULL
+#define V_TCB_SLUSH1(x) ((x) << S_TCB_SLUSH1)
+
+#define W_TCB_ULP_RAW 31
+#define S_TCB_ULP_RAW 24
+#define M_TCB_ULP_RAW 0xffULL
+#define V_TCB_ULP_RAW(x) ((x) << S_TCB_ULP_RAW)
+
+#define W_TCB_DDP_RDMAP_VERSION 25
+#define S_TCB_DDP_RDMAP_VERSION 30
+#define M_TCB_DDP_RDMAP_VERSION 0x1ULL
+#define V_TCB_DDP_RDMAP_VERSION(x) ((x) << S_TCB_DDP_RDMAP_VERSION)
+
+#define W_TCB_MARKER_ENABLE_RX 25
+#define S_TCB_MARKER_ENABLE_RX 31
+#define M_TCB_MARKER_ENABLE_RX 0x1ULL
+#define V_TCB_MARKER_ENABLE_RX(x) ((x) << S_TCB_MARKER_ENABLE_RX)
+
+#define W_TCB_MARKER_ENABLE_TX 26
+#define S_TCB_MARKER_ENABLE_TX 0
+#define M_TCB_MARKER_ENABLE_TX 0x1ULL
+#define V_TCB_MARKER_ENABLE_TX(x) ((x) << S_TCB_MARKER_ENABLE_TX)
+
+#define W_TCB_CRC_ENABLE 26
+#define S_TCB_CRC_ENABLE 1
+#define M_TCB_CRC_ENABLE 0x1ULL
+#define V_TCB_CRC_ENABLE(x) ((x) << S_TCB_CRC_ENABLE)
+
+#define W_TCB_IRS_ULP 26
+#define S_TCB_IRS_ULP 2
+#define M_TCB_IRS_ULP 0x1ffULL
+#define V_TCB_IRS_ULP(x) ((x) << S_TCB_IRS_ULP)
+
+#define W_TCB_ISS_ULP 26
+#define S_TCB_ISS_ULP 11
+#define M_TCB_ISS_ULP 0x1ffULL
+#define V_TCB_ISS_ULP(x) ((x) << S_TCB_ISS_ULP)
+
+#define W_TCB_TX_PDU_LEN 26
+#define S_TCB_TX_PDU_LEN 20
+#define M_TCB_TX_PDU_LEN 0x3fffULL
+#define V_TCB_TX_PDU_LEN(x) ((x) << S_TCB_TX_PDU_LEN)
+
+#define W_TCB_TX_PDU_OUT 27
+#define S_TCB_TX_PDU_OUT 2
+#define M_TCB_TX_PDU_OUT 0x1ULL
+#define V_TCB_TX_PDU_OUT(x) ((x) << S_TCB_TX_PDU_OUT)
+
+#define W_TCB_CQ_IDX_SQ 27
+#define S_TCB_CQ_IDX_SQ 3
+#define M_TCB_CQ_IDX_SQ 0xffffULL
+#define V_TCB_CQ_IDX_SQ(x) ((x) << S_TCB_CQ_IDX_SQ)
+
+#define W_TCB_CQ_IDX_RQ 27
+#define S_TCB_CQ_IDX_RQ 19
+#define M_TCB_CQ_IDX_RQ 0xffffULL
+#define V_TCB_CQ_IDX_RQ(x) ((x) << S_TCB_CQ_IDX_RQ)
+
+#define W_TCB_QP_ID 28
+#define S_TCB_QP_ID 3
+#define M_TCB_QP_ID 0xffffULL
+#define V_TCB_QP_ID(x) ((x) << S_TCB_QP_ID)
+
+#define W_TCB_PD_ID 28
+#define S_TCB_PD_ID 19
+#define M_TCB_PD_ID 0xffffULL
+#define V_TCB_PD_ID(x) ((x) << S_TCB_PD_ID)
+
+#define W_TCB_STAG 29
+#define S_TCB_STAG 3
+#define M_TCB_STAG 0xffffffffULL
+#define V_TCB_STAG(x) ((x) << S_TCB_STAG)
+
+#define W_TCB_RQ_START 30
+#define S_TCB_RQ_START 3
+#define M_TCB_RQ_START 0x3ffffffULL
+#define V_TCB_RQ_START(x) ((x) << S_TCB_RQ_START)
+
+#define W_TCB_RQ_MSN 30
+#define S_TCB_RQ_MSN 29
+#define M_TCB_RQ_MSN 0x3ffULL
+#define V_TCB_RQ_MSN(x) ((x) << S_TCB_RQ_MSN)
+
+#define W_TCB_RQ_MAX_OFFSET 31
+#define S_TCB_RQ_MAX_OFFSET 7
+#define M_TCB_RQ_MAX_OFFSET 0xfULL
+#define V_TCB_RQ_MAX_OFFSET(x) ((x) << S_TCB_RQ_MAX_OFFSET)
+
+#define W_TCB_RQ_WRITE_PTR 31
+#define S_TCB_RQ_WRITE_PTR 11
+#define M_TCB_RQ_WRITE_PTR 0x3ffULL
+#define V_TCB_RQ_WRITE_PTR(x) ((x) << S_TCB_RQ_WRITE_PTR)
+
+#define W_TCB_INB_WRITE_PERM 31
+#define S_TCB_INB_WRITE_PERM 21
+#define M_TCB_INB_WRITE_PERM 0x1ULL
+#define V_TCB_INB_WRITE_PERM(x) ((x) << S_TCB_INB_WRITE_PERM)
+
+#define W_TCB_INB_READ_PERM 31
+#define S_TCB_INB_READ_PERM 22
+#define M_TCB_INB_READ_PERM 0x1ULL
+#define V_TCB_INB_READ_PERM(x) ((x) << S_TCB_INB_READ_PERM)
+
+#define W_TCB_ORD_L_BIT_VLD 31
+#define S_TCB_ORD_L_BIT_VLD 23
+#define M_TCB_ORD_L_BIT_VLD 0x1ULL
+#define V_TCB_ORD_L_BIT_VLD(x) ((x) << S_TCB_ORD_L_BIT_VLD)
+
+#define W_TCB_RDMAP_OPCODE 31
+#define S_TCB_RDMAP_OPCODE 24
+#define M_TCB_RDMAP_OPCODE 0xfULL
+#define V_TCB_RDMAP_OPCODE(x) ((x) << S_TCB_RDMAP_OPCODE)
+
+#define W_TCB_TX_FLUSH 31
+#define S_TCB_TX_FLUSH 28
+#define M_TCB_TX_FLUSH 0x1ULL
+#define V_TCB_TX_FLUSH(x) ((x) << S_TCB_TX_FLUSH)
+
+#define W_TCB_TX_OOS_RXMT 31
+#define S_TCB_TX_OOS_RXMT 29
+#define M_TCB_TX_OOS_RXMT 0x1ULL
+#define V_TCB_TX_OOS_RXMT(x) ((x) << S_TCB_TX_OOS_RXMT)
+
+#define W_TCB_TX_OOS_TXMT 31
+#define S_TCB_TX_OOS_TXMT 30
+#define M_TCB_TX_OOS_TXMT 0x1ULL
+#define V_TCB_TX_OOS_TXMT(x) ((x) << S_TCB_TX_OOS_TXMT)
+
+#define W_TCB_SLUSH_AUX2 31
+#define S_TCB_SLUSH_AUX2 31
+#define M_TCB_SLUSH_AUX2 0x1ULL
+#define V_TCB_SLUSH_AUX2(x) ((x) << S_TCB_SLUSH_AUX2)
+
+#define W_TCB_RX_FRAG1_PTR_RAW2 25
+#define S_TCB_RX_FRAG1_PTR_RAW2 30
+#define M_TCB_RX_FRAG1_PTR_RAW2 0x1ffffULL
+#define V_TCB_RX_FRAG1_PTR_RAW2(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW2)
+
+#define W_TCB_RX_DDP_FLAGS 26
+#define S_TCB_RX_DDP_FLAGS 15
+#define M_TCB_RX_DDP_FLAGS 0x3ffULL
+#define V_TCB_RX_DDP_FLAGS(x) ((x) << S_TCB_RX_DDP_FLAGS)
+
+#define W_TCB_SLUSH_AUX3 26
+#define S_TCB_SLUSH_AUX3 31
+#define M_TCB_SLUSH_AUX3 0x1ffULL
+#define V_TCB_SLUSH_AUX3(x) ((x) << S_TCB_SLUSH_AUX3)
+
+#define W_TCB_RX_DDP_BUF0_OFFSET 27
+#define S_TCB_RX_DDP_BUF0_OFFSET 8
+#define M_TCB_RX_DDP_BUF0_OFFSET 0x3fffffULL
+#define V_TCB_RX_DDP_BUF0_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF0_OFFSET)
+
+#define W_TCB_RX_DDP_BUF0_LEN 27
+#define S_TCB_RX_DDP_BUF0_LEN 30
+#define M_TCB_RX_DDP_BUF0_LEN 0x3fffffULL
+#define V_TCB_RX_DDP_BUF0_LEN(x) ((x) << S_TCB_RX_DDP_BUF0_LEN)
+
+#define W_TCB_RX_DDP_BUF1_OFFSET 28
+#define S_TCB_RX_DDP_BUF1_OFFSET 20
+#define M_TCB_RX_DDP_BUF1_OFFSET 0x3fffffULL
+#define V_TCB_RX_DDP_BUF1_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF1_OFFSET)
+
+#define W_TCB_RX_DDP_BUF1_LEN 29
+#define S_TCB_RX_DDP_BUF1_LEN 10
+#define M_TCB_RX_DDP_BUF1_LEN 0x3fffffULL
+#define V_TCB_RX_DDP_BUF1_LEN(x) ((x) << S_TCB_RX_DDP_BUF1_LEN)
+
+#define W_TCB_RX_DDP_BUF0_TAG 30
+#define S_TCB_RX_DDP_BUF0_TAG 0
+#define M_TCB_RX_DDP_BUF0_TAG 0xffffffffULL
+#define V_TCB_RX_DDP_BUF0_TAG(x) ((x) << S_TCB_RX_DDP_BUF0_TAG)
+
+#define W_TCB_RX_DDP_BUF1_TAG 31
+#define S_TCB_RX_DDP_BUF1_TAG 0
+#define M_TCB_RX_DDP_BUF1_TAG 0xffffffffULL
+#define V_TCB_RX_DDP_BUF1_TAG(x) ((x) << S_TCB_RX_DDP_BUF1_TAG)
+
+#define S_TF_DACK 10
+#define V_TF_DACK(x) ((x) << S_TF_DACK)
+
+#define S_TF_NAGLE 11
+#define V_TF_NAGLE(x) ((x) << S_TF_NAGLE)
+
+#define S_TF_RECV_SCALE 12
+#define V_TF_RECV_SCALE(x) ((x) << S_TF_RECV_SCALE)
+
+#define S_TF_RECV_TSTMP 13
+#define V_TF_RECV_TSTMP(x) ((x) << S_TF_RECV_TSTMP)
+
+#define S_TF_RECV_SACK 14
+#define V_TF_RECV_SACK(x) ((x) << S_TF_RECV_SACK)
+
+#define S_TF_TURBO 15
+#define V_TF_TURBO(x) ((x) << S_TF_TURBO)
+
+#define S_TF_KEEPALIVE 16
+#define V_TF_KEEPALIVE(x) ((x) << S_TF_KEEPALIVE)
+
+#define S_TF_TCAM_BYPASS 17
+#define V_TF_TCAM_BYPASS(x) ((x) << S_TF_TCAM_BYPASS)
+
+#define S_TF_CORE_FIN 18
+#define V_TF_CORE_FIN(x) ((x) << S_TF_CORE_FIN)
+
+#define S_TF_CORE_MORE 19
+#define V_TF_CORE_MORE(x) ((x) << S_TF_CORE_MORE)
+
+#define S_TF_MIGRATING 20
+#define V_TF_MIGRATING(x) ((x) << S_TF_MIGRATING)
+
+#define S_TF_ACTIVE_OPEN 21
+#define V_TF_ACTIVE_OPEN(x) ((x) << S_TF_ACTIVE_OPEN)
+
+#define S_TF_ASK_MODE 22
+#define V_TF_ASK_MODE(x) ((x) << S_TF_ASK_MODE)
+
+#define S_TF_NON_OFFLOAD 23
+#define V_TF_NON_OFFLOAD(x) ((x) << S_TF_NON_OFFLOAD)
+
+#define S_TF_MOD_SCHD 24
+#define V_TF_MOD_SCHD(x) ((x) << S_TF_MOD_SCHD)
+
+#define S_TF_MOD_SCHD_REASON0 25
+#define V_TF_MOD_SCHD_REASON0(x) ((x) << S_TF_MOD_SCHD_REASON0)
+
+#define S_TF_MOD_SCHD_REASON1 26
+#define V_TF_MOD_SCHD_REASON1(x) ((x) << S_TF_MOD_SCHD_REASON1)
+
+#define S_TF_MOD_SCHD_RX 27
+#define V_TF_MOD_SCHD_RX(x) ((x) << S_TF_MOD_SCHD_RX)
+
+#define S_TF_CORE_PUSH 28
+#define V_TF_CORE_PUSH(x) ((x) << S_TF_CORE_PUSH)
+
+#define S_TF_RCV_COALESCE_ENABLE 29
+#define V_TF_RCV_COALESCE_ENABLE(x) ((x) << S_TF_RCV_COALESCE_ENABLE)
+
+#define S_TF_RCV_COALESCE_PUSH 30
+#define V_TF_RCV_COALESCE_PUSH(x) ((x) << S_TF_RCV_COALESCE_PUSH)
+
+#define S_TF_RCV_COALESCE_LAST_PSH 31
+#define V_TF_RCV_COALESCE_LAST_PSH(x) ((x) << S_TF_RCV_COALESCE_LAST_PSH)
+
+#define S_TF_RCV_COALESCE_HEARTBEAT 32
+#define V_TF_RCV_COALESCE_HEARTBEAT(x) ((x) << S_TF_RCV_COALESCE_HEARTBEAT)
+
+#define S_TF_HALF_CLOSE 33
+#define V_TF_HALF_CLOSE(x) ((x) << S_TF_HALF_CLOSE)
+
+#define S_TF_DACK_MSS 34
+#define V_TF_DACK_MSS(x) ((x) << S_TF_DACK_MSS)
+
+#define S_TF_CCTRL_SEL0 35
+#define V_TF_CCTRL_SEL0(x) ((x) << S_TF_CCTRL_SEL0)
+
+#define S_TF_CCTRL_SEL1 36
+#define V_TF_CCTRL_SEL1(x) ((x) << S_TF_CCTRL_SEL1)
+
+#define S_TF_TCP_NEWRENO_FAST_RECOVERY 37
+#define V_TF_TCP_NEWRENO_FAST_RECOVERY(x) ((x) << S_TF_TCP_NEWRENO_FAST_RECOVERY)
+
+#define S_TF_TX_PACE_AUTO 38
+#define V_TF_TX_PACE_AUTO(x) ((x) << S_TF_TX_PACE_AUTO)
+
+#define S_TF_PEER_FIN_HELD 39
+#define V_TF_PEER_FIN_HELD(x) ((x) << S_TF_PEER_FIN_HELD)
+
+#define S_TF_CORE_URG 40
+#define V_TF_CORE_URG(x) ((x) << S_TF_CORE_URG)
+
+#define S_TF_RDMA_ERROR 41
+#define V_TF_RDMA_ERROR(x) ((x) << S_TF_RDMA_ERROR)
+
+#define S_TF_SSWS_DISABLED 42
+#define V_TF_SSWS_DISABLED(x) ((x) << S_TF_SSWS_DISABLED)
+
+#define S_TF_DUPACK_COUNT_ODD 43
+#define V_TF_DUPACK_COUNT_ODD(x) ((x) << S_TF_DUPACK_COUNT_ODD)
+
+#define S_TF_TX_CHANNEL 44
+#define V_TF_TX_CHANNEL(x) ((x) << S_TF_TX_CHANNEL)
+
+#define S_TF_RX_CHANNEL 45
+#define V_TF_RX_CHANNEL(x) ((x) << S_TF_RX_CHANNEL)
+
+#define S_TF_TX_PACE_FIXED 46
+#define V_TF_TX_PACE_FIXED(x) ((x) << S_TF_TX_PACE_FIXED)
+
+#define S_TF_RDMA_FLM_ERROR 47
+#define V_TF_RDMA_FLM_ERROR(x) ((x) << S_TF_RDMA_FLM_ERROR)
+
+#define S_TF_RX_FLOW_CONTROL_DISABLE 48
+#define V_TF_RX_FLOW_CONTROL_DISABLE(x) ((x) << S_TF_RX_FLOW_CONTROL_DISABLE)
+
+#endif /* _TCB_DEFS_H */
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig
index 727b10d89686..1a854598e0e6 100644
--- a/drivers/infiniband/hw/ehca/Kconfig
+++ b/drivers/infiniband/hw/ehca/Kconfig
@@ -7,11 +7,3 @@ config INFINIBAND_EHCA
To compile the driver as a module, choose M here. The module
will be called ib_ehca.
-config INFINIBAND_EHCA_SCALING
- bool "Scaling support (EXPERIMENTAL)"
- depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL
- default y
- ---help---
- eHCA scaling support schedules the CQ callbacks to different CPUs.
-
- To enable this feature choose Y here.
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 1c722032319c..40404c9e2817 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -42,8 +42,6 @@
#ifndef __EHCA_CLASSES_H__
#define __EHCA_CLASSES_H__
-#include "ehca_classes.h"
-#include "ipz_pt_fn.h"
struct ehca_module;
struct ehca_qp;
@@ -54,14 +52,22 @@ struct ehca_mw;
struct ehca_pd;
struct ehca_av;
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
#ifdef CONFIG_PPC64
#include "ehca_classes_pSeries.h"
#endif
+#include "ipz_pt_fn.h"
+#include "ehca_qes.h"
+#include "ehca_irq.h"
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_user_verbs.h>
+#define EHCA_EQE_CACHE_SIZE 20
-#include "ehca_irq.h"
+struct ehca_eqe_cache_entry {
+ struct ehca_eqe *eqe;
+ struct ehca_cq *cq;
+};
struct ehca_eq {
u32 length;
@@ -74,6 +80,8 @@ struct ehca_eq {
spinlock_t spinlock;
struct tasklet_struct interrupt_task;
u32 ist;
+ spinlock_t irq_spinlock;
+ struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
};
struct ehca_sport {
@@ -119,13 +127,14 @@ struct ehca_qp {
struct ipz_qp_handle ipz_qp_handle;
struct ehca_pfqp pf;
struct ib_qp_init_attr init_attr;
- u64 uspace_squeue;
- u64 uspace_rqueue;
- u64 uspace_fwh;
struct ehca_cq *send_cq;
struct ehca_cq *recv_cq;
unsigned int sqerr_purgeflag;
struct hlist_node list_entries;
+ /* mmap counter for resources mapped into user space */
+ u32 mm_count_squeue;
+ u32 mm_count_rqueue;
+ u32 mm_count_galpa;
};
/* must be power of 2 */
@@ -142,13 +151,14 @@ struct ehca_cq {
struct ipz_cq_handle ipz_cq_handle;
struct ehca_pfcq pf;
spinlock_t cb_lock;
- u64 uspace_queue;
- u64 uspace_fwh;
struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
struct list_head entry;
u32 nr_callbacks;
spinlock_t task_lock;
u32 ownpid;
+ /* mmap counter for resources mapped into user space */
+ u32 mm_count_queue;
+ u32 mm_count_galpa;
};
enum ehca_mr_flag {
@@ -248,20 +258,6 @@ struct ehca_ucontext {
struct ib_ucontext ib_ucontext;
};
-struct ehca_module *ehca_module_new(void);
-
-int ehca_module_delete(struct ehca_module *me);
-
-int ehca_eq_ctor(struct ehca_eq *eq);
-
-int ehca_eq_dtor(struct ehca_eq *eq);
-
-struct ehca_shca *ehca_shca_new(void);
-
-int ehca_shca_delete(struct ehca_shca *me);
-
-struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor);
-
int ehca_init_pd_cache(void);
void ehca_cleanup_pd_cache(void);
int ehca_init_cq_cache(void);
@@ -281,9 +277,9 @@ extern struct idr ehca_cq_idr;
extern int ehca_static_rate;
extern int ehca_port_act_time;
extern int ehca_use_hp_mr;
+extern int ehca_scaling_code;
struct ipzu_queue_resp {
- u64 queue; /* points to first queue entry */
u32 qe_size; /* queue entry size */
u32 act_nr_of_sg;
u32 queue_length; /* queue length allocated in bytes */
@@ -296,7 +292,6 @@ struct ehca_create_cq_resp {
u32 cq_number;
u32 token;
struct ipzu_queue_resp ipz_queue;
- struct h_galpas galpas;
};
struct ehca_create_qp_resp {
@@ -309,7 +304,6 @@ struct ehca_create_qp_resp {
u32 dummy; /* padding for 8 byte alignment */
struct ipzu_queue_resp ipz_squeue;
struct ipzu_queue_resp ipz_rqueue;
- struct h_galpas galpas;
};
struct ehca_alloc_cq_parms {
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 6074c897f51c..6ebfa27e4e16 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -134,14 +134,13 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
return ERR_PTR(-EINVAL);
- my_cq = kmem_cache_alloc(cq_cache, GFP_KERNEL);
+ my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL);
if (!my_cq) {
ehca_err(device, "Out of memory for ehca_cq struct device=%p",
device);
return ERR_PTR(-ENOMEM);
}
- memset(my_cq, 0, sizeof(struct ehca_cq));
memset(&param, 0, sizeof(struct ehca_alloc_cq_parms));
spin_lock_init(&my_cq->spinlock);
@@ -267,7 +266,6 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
if (context) {
struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
struct ehca_create_cq_resp resp;
- struct vm_area_struct *vma;
memset(&resp, 0, sizeof(resp));
resp.cq_number = my_cq->cq_number;
resp.token = my_cq->token;
@@ -276,40 +274,14 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
resp.ipz_queue.queue_length = ipz_queue->queue_length;
resp.ipz_queue.pagesize = ipz_queue->pagesize;
resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
- ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000,
- ipz_queue->queue_length,
- (void**)&resp.ipz_queue.queue,
- &vma);
- if (ret) {
- ehca_err(device, "Could not mmap queue pages");
- cq = ERR_PTR(ret);
- goto create_cq_exit4;
- }
- my_cq->uspace_queue = resp.ipz_queue.queue;
- resp.galpas = my_cq->galpas;
- ret = ehca_mmap_register(my_cq->galpas.user.fw_handle,
- (void**)&resp.galpas.kernel.fw_handle,
- &vma);
- if (ret) {
- ehca_err(device, "Could not mmap fw_handle");
- cq = ERR_PTR(ret);
- goto create_cq_exit5;
- }
- my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
ehca_err(device, "Copy to udata failed.");
- goto create_cq_exit6;
+ goto create_cq_exit4;
}
}
return cq;
-create_cq_exit6:
- ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
-
-create_cq_exit5:
- ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length);
-
create_cq_exit4:
ipz_queue_dtor(&my_cq->ipz_queue);
@@ -333,7 +305,6 @@ create_cq_exit1:
int ehca_destroy_cq(struct ib_cq *cq)
{
u64 h_ret;
- int ret;
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
int cq_num = my_cq->cq_number;
struct ib_device *device = cq->device;
@@ -343,6 +314,20 @@ int ehca_destroy_cq(struct ib_cq *cq)
u32 cur_pid = current->tgid;
unsigned long flags;
+ if (cq->uobject) {
+ if (my_cq->mm_count_galpa || my_cq->mm_count_queue) {
+ ehca_err(device, "Resources still referenced in "
+ "user space cq_num=%x", my_cq->cq_number);
+ return -EINVAL;
+ }
+ if (my_cq->ownpid != cur_pid) {
+ ehca_err(device, "Invalid caller pid=%x ownpid=%x "
+ "cq_num=%x",
+ cur_pid, my_cq->ownpid, my_cq->cq_number);
+ return -EINVAL;
+ }
+ }
+
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
while (my_cq->nr_callbacks) {
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
@@ -353,25 +338,6 @@ int ehca_destroy_cq(struct ib_cq *cq)
idr_remove(&ehca_cq_idr, my_cq->token);
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
- if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
- ehca_err(device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_cq->ownpid);
- return -EINVAL;
- }
-
- /* un-mmap if vma alloc */
- if (my_cq->uspace_queue ) {
- ret = ehca_munmap(my_cq->uspace_queue,
- my_cq->ipz_queue.queue_length);
- if (ret)
- ehca_err(device, "Could not munmap queue ehca_cq=%p "
- "cq_num=%x", my_cq, cq_num);
- ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
- if (ret)
- ehca_err(device, "Could not munmap fwh ehca_cq=%p "
- "cq_num=%x", my_cq, cq_num);
- }
-
h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
if (h_ret == H_R_STATE) {
/* cq in err: read err data and destroy it forcibly */
@@ -400,7 +366,7 @@ int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
u32 cur_pid = current->tgid;
- if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
+ if (cq->uobject && my_cq->ownpid != cur_pid) {
ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x",
cur_pid, my_cq->ownpid);
return -EINVAL;
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 5281dec66f12..4961eb88827c 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -61,6 +61,7 @@ int ehca_create_eq(struct ehca_shca *shca,
struct ib_device *ib_dev = &shca->ib_device;
spin_lock_init(&eq->spinlock);
+ spin_lock_init(&eq->irq_spinlock);
eq->is_initialized = 0;
if (type != EHCA_EQ && type != EHCA_NEQ) {
@@ -122,7 +123,7 @@ int ehca_create_eq(struct ehca_shca *shca,
/* register interrupt handlers and initialize work queues */
if (type == EHCA_EQ) {
ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_eq,
- SA_INTERRUPT, "ehca_eq",
+ IRQF_DISABLED, "ehca_eq",
(void *)shca);
if (ret < 0)
ehca_err(ib_dev, "Can't map interrupt handler.");
@@ -130,7 +131,7 @@ int ehca_create_eq(struct ehca_shca *shca,
tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
} else if (type == EHCA_NEQ) {
ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_neq,
- SA_INTERRUPT, "ehca_neq",
+ IRQF_DISABLED, "ehca_neq",
(void *)shca);
if (ret < 0)
ehca_err(ib_dev, "Can't map interrupt handler.");
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index b7be950ab47c..30eb45df9f0b 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -162,6 +162,9 @@ int ehca_query_port(struct ib_device *ibdev,
props->active_width = IB_WIDTH_12X;
props->active_speed = 0x1;
+ /* at the moment (logical) link state is always LINK_UP */
+ props->phys_state = 0x5;
+
query_port1:
ehca_free_fw_ctrlblock(rblock);
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index c069be8cbcb2..3ec53c687d08 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -63,15 +63,11 @@
#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63)
#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7)
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
-
static void queue_comp_task(struct ehca_cq *__cq);
static struct ehca_comp_pool* pool;
static struct notifier_block comp_pool_callback_nb;
-#endif
-
static inline void comp_event_callback(struct ehca_cq *cq)
{
if (!cq->ib_cq.comp_handler)
@@ -206,7 +202,7 @@ static void qp_event_callback(struct ehca_shca *shca,
}
static void cq_event_callback(struct ehca_shca *shca,
- u64 eqe)
+ u64 eqe)
{
struct ehca_cq *cq;
unsigned long flags;
@@ -318,7 +314,7 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
"disruptive port %x configuration change", port);
ehca_info(&shca->ib_device,
- "port %x is inactive.", port);
+ "port %x is inactive.", port);
event.device = &shca->ib_device;
event.event = IB_EVENT_PORT_ERR;
event.element.port_num = port;
@@ -326,7 +322,7 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
ib_dispatch_event(&event);
ehca_info(&shca->ib_device,
- "port %x is active.", port);
+ "port %x is active.", port);
event.device = &shca->ib_device;
event.event = IB_EVENT_PORT_ACTIVE;
event.element.port_num = port;
@@ -401,115 +397,170 @@ irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
return IRQ_HANDLED;
}
-void ehca_tasklet_eq(unsigned long data)
-{
- struct ehca_shca *shca = (struct ehca_shca*)data;
- struct ehca_eqe *eqe;
- int int_state;
- int query_cnt = 0;
- do {
- eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
+{
+ u64 eqe_value;
+ u32 token;
+ unsigned long flags;
+ struct ehca_cq *cq;
+ eqe_value = eqe->entry;
+ ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value);
+ if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+ ehca_dbg(&shca->ib_device, "... completion event");
+ token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+ spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ cq = idr_find(&ehca_cq_idr, token);
+ if (cq == NULL) {
+ spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ ehca_err(&shca->ib_device,
+ "Invalid eqe for non-existing cq token=%x",
+ token);
+ return;
+ }
+ reset_eq_pending(cq);
+ if (ehca_scaling_code) {
+ queue_comp_task(cq);
+ spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ } else {
+ spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ comp_event_callback(cq);
+ }
+ } else {
+ ehca_dbg(&shca->ib_device,
+ "Got non completion event");
+ parse_identifier(shca, eqe_value);
+ }
+}
- if ((shca->hw_level >= 2) && eqe)
- int_state = 1;
- else
- int_state = 0;
-
- while ((int_state == 1) || eqe) {
- while (eqe) {
- u64 eqe_value = eqe->entry;
-
- ehca_dbg(&shca->ib_device,
- "eqe_value=%lx", eqe_value);
-
- /* TODO: better structure */
- if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT,
- eqe_value)) {
- unsigned long flags;
- u32 token;
- struct ehca_cq *cq;
-
- ehca_dbg(&shca->ib_device,
- "... completion event");
- token =
- EHCA_BMASK_GET(EQE_CQ_TOKEN,
- eqe_value);
- spin_lock_irqsave(&ehca_cq_idr_lock,
- flags);
- cq = idr_find(&ehca_cq_idr, token);
-
- if (cq == NULL) {
- spin_unlock_irqrestore(&ehca_cq_idr_lock,
- flags);
- break;
- }
-
- reset_eq_pending(cq);
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
- queue_comp_task(cq);
- spin_unlock_irqrestore(&ehca_cq_idr_lock,
- flags);
-#else
- spin_unlock_irqrestore(&ehca_cq_idr_lock,
- flags);
- comp_event_callback(cq);
-#endif
- } else {
- ehca_dbg(&shca->ib_device,
- "... non completion event");
- parse_identifier(shca, eqe_value);
- }
- eqe =
- (struct ehca_eqe *)ehca_poll_eq(shca,
- &shca->eq);
- }
+void ehca_process_eq(struct ehca_shca *shca, int is_irq)
+{
+ struct ehca_eq *eq = &shca->eq;
+ struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
+ u64 eqe_value;
+ unsigned long flags;
+ int eqe_cnt, i;
+ int eq_empty = 0;
+
+ spin_lock_irqsave(&eq->irq_spinlock, flags);
+ if (is_irq) {
+ const int max_query_cnt = 100;
+ int query_cnt = 0;
+ int int_state = 1;
+ do {
+ int_state = hipz_h_query_int_state(
+ shca->ipz_hca_handle, eq->ist);
+ query_cnt++;
+ iosync();
+ } while (int_state && query_cnt < max_query_cnt);
+ if (unlikely((query_cnt == max_query_cnt)))
+ ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
+ int_state, query_cnt);
+ }
- if (shca->hw_level >= 2) {
- int_state =
- hipz_h_query_int_state(shca->ipz_hca_handle,
- shca->eq.ist);
- query_cnt++;
- iosync();
- if (query_cnt >= 100) {
- query_cnt = 0;
- int_state = 0;
- }
+ /* read out all eqes */
+ eqe_cnt = 0;
+ do {
+ u32 token;
+ eqe_cache[eqe_cnt].eqe =
+ (struct ehca_eqe *)ehca_poll_eq(shca, eq);
+ if (!eqe_cache[eqe_cnt].eqe)
+ break;
+ eqe_value = eqe_cache[eqe_cnt].eqe->entry;
+ if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+ token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+ spin_lock(&ehca_cq_idr_lock);
+ eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
+ if (!eqe_cache[eqe_cnt].cq) {
+ spin_unlock(&ehca_cq_idr_lock);
+ ehca_err(&shca->ib_device,
+ "Invalid eqe for non-existing cq "
+ "token=%x", token);
+ continue;
}
- eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
-
+ spin_unlock(&ehca_cq_idr_lock);
+ } else
+ eqe_cache[eqe_cnt].cq = NULL;
+ eqe_cnt++;
+ } while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
+ if (!eqe_cnt) {
+ if (is_irq)
+ ehca_dbg(&shca->ib_device,
+ "No eqe found for irq event");
+ goto unlock_irq_spinlock;
+ } else if (!is_irq)
+ ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
+ if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
+ ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
+ /* enable irq for new packets */
+ for (i = 0; i < eqe_cnt; i++) {
+ if (eq->eqe_cache[i].cq)
+ reset_eq_pending(eq->eqe_cache[i].cq);
+ }
+ /* check eq */
+ spin_lock(&eq->spinlock);
+ eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
+ spin_unlock(&eq->spinlock);
+ /* call completion handler for cached eqes */
+ for (i = 0; i < eqe_cnt; i++)
+ if (eq->eqe_cache[i].cq) {
+ if (ehca_scaling_code) {
+ spin_lock(&ehca_cq_idr_lock);
+ queue_comp_task(eq->eqe_cache[i].cq);
+ spin_unlock(&ehca_cq_idr_lock);
+ } else
+ comp_event_callback(eq->eqe_cache[i].cq);
+ } else {
+ ehca_dbg(&shca->ib_device, "Got non completion event");
+ parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
}
- } while (int_state != 0);
-
- return;
+ /* poll eq if not empty */
+ if (eq_empty)
+ goto unlock_irq_spinlock;
+ do {
+ struct ehca_eqe *eqe;
+ eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+ if (!eqe)
+ break;
+ process_eqe(shca, eqe);
+ eqe_cnt++;
+ } while (1);
+
+unlock_irq_spinlock:
+ spin_unlock_irqrestore(&eq->irq_spinlock, flags);
}
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+void ehca_tasklet_eq(unsigned long data)
+{
+ ehca_process_eq((struct ehca_shca*)data, 1);
+}
static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
{
- unsigned long flags_last_cpu;
+ int cpu;
+ unsigned long flags;
+ WARN_ON_ONCE(!in_interrupt());
if (ehca_debug_level)
ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
- spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
- pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
- if (pool->last_cpu == NR_CPUS)
- pool->last_cpu = first_cpu(cpu_online_map);
- spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
+ spin_lock_irqsave(&pool->last_cpu_lock, flags);
+ cpu = next_cpu(pool->last_cpu, cpu_online_map);
+ if (cpu == NR_CPUS)
+ cpu = first_cpu(cpu_online_map);
+ pool->last_cpu = cpu;
+ spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
- return pool->last_cpu;
+ return cpu;
}
static void __queue_comp_task(struct ehca_cq *__cq,
struct ehca_cpu_comp_task *cct)
{
- unsigned long flags_cct;
- unsigned long flags_cq;
+ unsigned long flags;
- spin_lock_irqsave(&cct->task_lock, flags_cct);
- spin_lock_irqsave(&__cq->task_lock, flags_cq);
+ spin_lock_irqsave(&cct->task_lock, flags);
+ spin_lock(&__cq->task_lock);
if (__cq->nr_callbacks == 0) {
__cq->nr_callbacks++;
@@ -520,8 +571,8 @@ static void __queue_comp_task(struct ehca_cq *__cq,
else
__cq->nr_callbacks++;
- spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+ spin_unlock(&__cq->task_lock);
+ spin_unlock_irqrestore(&cct->task_lock, flags);
}
static void queue_comp_task(struct ehca_cq *__cq)
@@ -532,69 +583,69 @@ static void queue_comp_task(struct ehca_cq *__cq)
cpu = get_cpu();
cpu_id = find_next_online_cpu(pool);
-
BUG_ON(!cpu_online(cpu_id));
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+ BUG_ON(!cct);
if (cct->cq_jobs > 0) {
cpu_id = find_next_online_cpu(pool);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+ BUG_ON(!cct);
}
__queue_comp_task(__cq, cct);
-
- put_cpu();
-
- return;
}
static void run_comp_task(struct ehca_cpu_comp_task* cct)
{
struct ehca_cq *cq;
- unsigned long flags_cct;
- unsigned long flags_cq;
+ unsigned long flags;
- spin_lock_irqsave(&cct->task_lock, flags_cct);
+ spin_lock_irqsave(&cct->task_lock, flags);
while (!list_empty(&cct->cq_list)) {
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+ spin_unlock_irqrestore(&cct->task_lock, flags);
comp_event_callback(cq);
- spin_lock_irqsave(&cct->task_lock, flags_cct);
+ spin_lock_irqsave(&cct->task_lock, flags);
- spin_lock_irqsave(&cq->task_lock, flags_cq);
+ spin_lock(&cq->task_lock);
cq->nr_callbacks--;
if (cq->nr_callbacks == 0) {
list_del_init(cct->cq_list.next);
cct->cq_jobs--;
}
- spin_unlock_irqrestore(&cq->task_lock, flags_cq);
-
+ spin_unlock(&cq->task_lock);
}
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
-
- return;
+ spin_unlock_irqrestore(&cct->task_lock, flags);
}
static int comp_task(void *__cct)
{
struct ehca_cpu_comp_task* cct = __cct;
+ int cql_empty;
DECLARE_WAITQUEUE(wait, current);
set_current_state(TASK_INTERRUPTIBLE);
while(!kthread_should_stop()) {
add_wait_queue(&cct->wait_queue, &wait);
- if (list_empty(&cct->cq_list))
+ spin_lock_irq(&cct->task_lock);
+ cql_empty = list_empty(&cct->cq_list);
+ spin_unlock_irq(&cct->task_lock);
+ if (cql_empty)
schedule();
else
__set_current_state(TASK_RUNNING);
remove_wait_queue(&cct->wait_queue, &wait);
- if (!list_empty(&cct->cq_list))
+ spin_lock_irq(&cct->task_lock);
+ cql_empty = list_empty(&cct->cq_list);
+ spin_unlock_irq(&cct->task_lock);
+ if (!cql_empty)
run_comp_task(__cct);
set_current_state(TASK_INTERRUPTIBLE);
@@ -637,8 +688,6 @@ static void destroy_comp_task(struct ehca_comp_pool *pool,
if (task)
kthread_stop(task);
-
- return;
}
static void take_over_work(struct ehca_comp_pool *pool,
@@ -654,11 +703,11 @@ static void take_over_work(struct ehca_comp_pool *pool,
list_splice_init(&cct->cq_list, &list);
while(!list_empty(&list)) {
- cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+ cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
- list_del(&cq->entry);
- __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
- smp_processor_id()));
+ list_del(&cq->entry);
+ __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
+ smp_processor_id()));
}
spin_unlock_irqrestore(&cct->task_lock, flags_cct);
@@ -708,14 +757,14 @@ static int comp_pool_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-#endif
-
int ehca_create_comp_pool(void)
{
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
int cpu;
struct task_struct *task;
+ if (!ehca_scaling_code)
+ return 0;
+
pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
if (pool == NULL)
return -ENOMEM;
@@ -740,23 +789,25 @@ int ehca_create_comp_pool(void)
comp_pool_callback_nb.notifier_call = comp_pool_callback;
comp_pool_callback_nb.priority =0;
register_cpu_notifier(&comp_pool_callback_nb);
-#endif
+
+ printk(KERN_INFO "eHCA scaling code enabled\n");
return 0;
}
void ehca_destroy_comp_pool(void)
{
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
int i;
+ if (!ehca_scaling_code)
+ return;
+
unregister_cpu_notifier(&comp_pool_callback_nb);
for (i = 0; i < NR_CPUS; i++) {
if (cpu_online(i))
destroy_comp_task(pool, i);
}
-#endif
-
- return;
+ free_percpu(pool->cpu_comp_tasks);
+ kfree(pool);
}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
index be579cc0adf6..6ed06ee033ed 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.h
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -56,6 +56,7 @@ void ehca_tasklet_neq(unsigned long data);
irqreturn_t ehca_interrupt_eq(int irq, void *dev_id);
void ehca_tasklet_eq(unsigned long data);
+void ehca_process_eq(struct ehca_shca *shca, int is_irq);
struct ehca_cpu_comp_task {
wait_queue_head_t wait_queue;
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index cd7789f0d08e..95fd59fb4528 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -171,14 +171,6 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
void ehca_poll_eqs(unsigned long data);
-int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped,
- struct vm_area_struct **vma);
-
-int ehca_mmap_register(u64 physical,void **mapped,
- struct vm_area_struct **vma);
-
-int ehca_munmap(unsigned long addr, size_t len);
-
#ifdef CONFIG_PPC_64K_PAGES
void *ehca_alloc_fw_ctrlblock(gfp_t flags);
void ehca_free_fw_ctrlblock(void *ptr);
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 6574fbbaead5..c1835121a822 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -52,7 +52,7 @@
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
-MODULE_VERSION("SVNEHCA_0019");
+MODULE_VERSION("SVNEHCA_0021");
int ehca_open_aqp1 = 0;
int ehca_debug_level = 0;
@@ -62,6 +62,7 @@ int ehca_use_hp_mr = 0;
int ehca_port_act_time = 30;
int ehca_poll_all_eqs = 1;
int ehca_static_rate = -1;
+int ehca_scaling_code = 1;
module_param_named(open_aqp1, ehca_open_aqp1, int, 0);
module_param_named(debug_level, ehca_debug_level, int, 0);
@@ -71,6 +72,7 @@ module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0);
module_param_named(port_act_time, ehca_port_act_time, int, 0);
module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0);
module_param_named(static_rate, ehca_static_rate, int, 0);
+module_param_named(scaling_code, ehca_scaling_code, int, 0);
MODULE_PARM_DESC(open_aqp1,
"AQP1 on startup (0: no (default), 1: yes)");
@@ -91,6 +93,8 @@ MODULE_PARM_DESC(poll_all_eqs,
" (0: no, 1: yes (default))");
MODULE_PARM_DESC(static_rate,
"set permanent static rate (default: disabled)");
+MODULE_PARM_DESC(scaling_code,
+ "set scaling code (0: disabled, 1: enabled/default)");
spinlock_t ehca_qp_idr_lock;
spinlock_t ehca_cq_idr_lock;
@@ -288,7 +292,7 @@ int ehca_init_device(struct ehca_shca *shca)
strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
shca->ib_device.owner = THIS_MODULE;
- shca->ib_device.uverbs_abi_ver = 5;
+ shca->ib_device.uverbs_abi_ver = 6;
shca->ib_device.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -432,8 +436,8 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport)
static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n",
- ehca_debug_level);
+ return snprintf(buf, PAGE_SIZE, "%d\n",
+ ehca_debug_level);
}
static ssize_t ehca_store_debug_level(struct device_driver *ddp,
@@ -778,8 +782,24 @@ void ehca_poll_eqs(unsigned long data)
spin_lock(&shca_list_lock);
list_for_each_entry(shca, &shca_list, shca_list) {
- if (shca->eq.is_initialized)
- ehca_tasklet_eq((unsigned long)(void*)shca);
+ if (shca->eq.is_initialized) {
+ /* call deadman proc only if eq ptr does not change */
+ struct ehca_eq *eq = &shca->eq;
+ int max = 3;
+ volatile u64 q_ofs, q_ofs2;
+ u64 flags;
+ spin_lock_irqsave(&eq->spinlock, flags);
+ q_ofs = eq->ipz_queue.current_q_offset;
+ spin_unlock_irqrestore(&eq->spinlock, flags);
+ do {
+ spin_lock_irqsave(&eq->spinlock, flags);
+ q_ofs2 = eq->ipz_queue.current_q_offset;
+ spin_unlock_irqrestore(&eq->spinlock, flags);
+ max--;
+ } while (q_ofs == q_ofs2 && max > 0);
+ if (q_ofs == q_ofs2)
+ ehca_process_eq(shca, 0);
+ }
}
mod_timer(&poll_eqs_timer, jiffies + HZ);
spin_unlock(&shca_list_lock);
@@ -790,7 +810,7 @@ int __init ehca_module_init(void)
int ret;
printk(KERN_INFO "eHCA Infiniband Device Driver "
- "(Rel.: SVNEHCA_0019)\n");
+ "(Rel.: SVNEHCA_0021)\n");
idr_init(&ehca_qp_idr);
idr_init(&ehca_cq_idr);
spin_lock_init(&ehca_qp_idr_lock);
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index cfb362a1029c..d22ab563633f 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -53,9 +53,8 @@ static struct ehca_mr *ehca_mr_new(void)
{
struct ehca_mr *me;
- me = kmem_cache_alloc(mr_cache, GFP_KERNEL);
+ me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
if (me) {
- memset(me, 0, sizeof(struct ehca_mr));
spin_lock_init(&me->mrlock);
} else
ehca_gen_err("alloc failed");
@@ -72,9 +71,8 @@ static struct ehca_mw *ehca_mw_new(void)
{
struct ehca_mw *me;
- me = kmem_cache_alloc(mw_cache, GFP_KERNEL);
+ me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
if (me) {
- memset(me, 0, sizeof(struct ehca_mw));
spin_lock_init(&me->mwlock);
} else
ehca_gen_err("alloc failed");
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c
index d5345e5b3cd6..79d0591a8043 100644
--- a/drivers/infiniband/hw/ehca/ehca_pd.c
+++ b/drivers/infiniband/hw/ehca/ehca_pd.c
@@ -50,14 +50,13 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device,
{
struct ehca_pd *pd;
- pd = kmem_cache_alloc(pd_cache, GFP_KERNEL);
+ pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL);
if (!pd) {
ehca_err(device, "device=%p context=%p out of memory",
device, context);
return ERR_PTR(-ENOMEM);
}
- memset(pd, 0, sizeof(struct ehca_pd));
pd->ownpid = current->tgid;
/*
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 34b85556d01e..df0516f24379 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -450,13 +450,12 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
if (pd->uobject && udata)
context = pd->uobject->context;
- my_qp = kmem_cache_alloc(qp_cache, GFP_KERNEL);
+ my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
if (!my_qp) {
ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
return ERR_PTR(-ENOMEM);
}
- memset(my_qp, 0, sizeof(struct ehca_qp));
memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms));
spin_lock_init(&my_qp->spinlock_s);
spin_lock_init(&my_qp->spinlock_r);
@@ -637,7 +636,6 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue;
struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue;
struct ehca_create_qp_resp resp;
- struct vm_area_struct * vma;
memset(&resp, 0, sizeof(resp));
resp.qp_num = my_qp->real_qp_num;
@@ -651,59 +649,21 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length;
resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize;
resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state;
- ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x22000000,
- ipz_rqueue->queue_length,
- (void**)&resp.ipz_rqueue.queue,
- &vma);
- if (ret) {
- ehca_err(pd->device, "Could not mmap rqueue pages");
- goto create_qp_exit3;
- }
- my_qp->uspace_rqueue = resp.ipz_rqueue.queue;
/* squeue properties */
resp.ipz_squeue.qe_size = ipz_squeue->qe_size;
resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg;
resp.ipz_squeue.queue_length = ipz_squeue->queue_length;
resp.ipz_squeue.pagesize = ipz_squeue->pagesize;
resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state;
- ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x23000000,
- ipz_squeue->queue_length,
- (void**)&resp.ipz_squeue.queue,
- &vma);
- if (ret) {
- ehca_err(pd->device, "Could not mmap squeue pages");
- goto create_qp_exit4;
- }
- my_qp->uspace_squeue = resp.ipz_squeue.queue;
- /* fw_handle */
- resp.galpas = my_qp->galpas;
- ret = ehca_mmap_register(my_qp->galpas.user.fw_handle,
- (void**)&resp.galpas.kernel.fw_handle,
- &vma);
- if (ret) {
- ehca_err(pd->device, "Could not mmap fw_handle");
- goto create_qp_exit5;
- }
- my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
-
if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
ehca_err(pd->device, "Copy to udata failed");
ret = -EINVAL;
- goto create_qp_exit6;
+ goto create_qp_exit3;
}
}
return &my_qp->ib_qp;
-create_qp_exit6:
- ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
-
-create_qp_exit5:
- ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length);
-
-create_qp_exit4:
- ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length);
-
create_qp_exit3:
ipz_queue_dtor(&my_qp->ipz_rqueue);
ipz_queue_dtor(&my_qp->ipz_squeue);
@@ -931,7 +891,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
my_qp->qp_type == IB_QPT_SMI) &&
statetrans == IB_QPST_SQE2RTS) {
/* mark next free wqe if kernel */
- if (my_qp->uspace_squeue == 0) {
+ if (!ibqp->uobject) {
struct ehca_wqe *wqe;
/* lock send queue */
spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
@@ -1417,11 +1377,18 @@ int ehca_destroy_qp(struct ib_qp *ibqp)
enum ib_qp_type qp_type;
unsigned long flags;
- if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
- my_pd->ownpid != cur_pid) {
- ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_pd->ownpid);
- return -EINVAL;
+ if (ibqp->uobject) {
+ if (my_qp->mm_count_galpa ||
+ my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
+ ehca_err(ibqp->device, "Resources still referenced in "
+ "user space qp_num=%x", ibqp->qp_num);
+ return -EINVAL;
+ }
+ if (my_pd->ownpid != cur_pid) {
+ ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x",
+ cur_pid, my_pd->ownpid);
+ return -EINVAL;
+ }
}
if (my_qp->send_cq) {
@@ -1439,24 +1406,6 @@ int ehca_destroy_qp(struct ib_qp *ibqp)
idr_remove(&ehca_qp_idr, my_qp->token);
spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
- /* un-mmap if vma alloc */
- if (my_qp->uspace_rqueue) {
- ret = ehca_munmap(my_qp->uspace_rqueue,
- my_qp->ipz_rqueue.queue_length);
- if (ret)
- ehca_err(ibqp->device, "Could not munmap rqueue "
- "qp_num=%x", qp_num);
- ret = ehca_munmap(my_qp->uspace_squeue,
- my_qp->ipz_squeue.queue_length);
- if (ret)
- ehca_err(ibqp->device, "Could not munmap squeue "
- "qp_num=%x", qp_num);
- ret = ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
- if (ret)
- ehca_err(ibqp->device, "Could not munmap fwh qp_num=%x",
- qp_num);
- }
-
h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
if (h_ret != H_SUCCESS) {
ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx "
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index b46bda1bf85d..08d3f892d9f3 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -579,7 +579,7 @@ poll_cq_one_read_cqe:
} else
wc->status = IB_WC_SUCCESS;
- wc->qp_num = cqe->local_qp_number;
+ wc->qp = NULL;
wc->byte_len = cqe->nr_bytes_transferred;
wc->pkey_index = cqe->pkey_index;
wc->slid = cqe->rlid;
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index e08764e4aef2..73db920b6945 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -68,105 +68,183 @@ int ehca_dealloc_ucontext(struct ib_ucontext *context)
return 0;
}
-struct page *ehca_nopage(struct vm_area_struct *vma,
- unsigned long address, int *type)
+static void ehca_mm_open(struct vm_area_struct *vma)
{
- struct page *mypage = NULL;
- u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
- u32 idr_handle = fileoffset >> 32;
- u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */
- u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
- u32 cur_pid = current->tgid;
- unsigned long flags;
- struct ehca_cq *cq;
- struct ehca_qp *qp;
- struct ehca_pd *pd;
- u64 offset;
- void *vaddr;
+ u32 *count = (u32*)vma->vm_private_data;
+ if (!count) {
+ ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
+ vma->vm_start, vma->vm_end);
+ return;
+ }
+ (*count)++;
+ if (!(*count))
+ ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx",
+ vma->vm_start, vma->vm_end);
+ ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
+ vma->vm_start, vma->vm_end, *count);
+}
- switch (q_type) {
- case 1: /* CQ */
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
- cq = idr_find(&ehca_cq_idr, idr_handle);
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+static void ehca_mm_close(struct vm_area_struct *vma)
+{
+ u32 *count = (u32*)vma->vm_private_data;
+ if (!count) {
+ ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
+ vma->vm_start, vma->vm_end);
+ return;
+ }
+ (*count)--;
+ ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
+ vma->vm_start, vma->vm_end, *count);
+}
- /* make sure this mmap really belongs to the authorized user */
- if (!cq) {
- ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS");
- return NOPAGE_SIGBUS;
+static struct vm_operations_struct vm_ops = {
+ .open = ehca_mm_open,
+ .close = ehca_mm_close,
+};
+
+static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
+ u32 *mm_count)
+{
+ int ret;
+ u64 vsize, physical;
+
+ vsize = vma->vm_end - vma->vm_start;
+ if (vsize != EHCA_PAGESIZE) {
+ ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
+ return -EINVAL;
+ }
+
+ physical = galpas->user.fw_handle;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical);
+ /* VM_IO | VM_RESERVED are set by remap_pfn_range() */
+ ret = remap_pfn_range(vma, vma->vm_start, physical >> PAGE_SHIFT,
+ vsize, vma->vm_page_prot);
+ if (unlikely(ret)) {
+ ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
+ return -ENOMEM;
+ }
+
+ vma->vm_private_data = mm_count;
+ (*mm_count)++;
+ vma->vm_ops = &vm_ops;
+
+ return 0;
+}
+
+static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
+ u32 *mm_count)
+{
+ int ret;
+ u64 start, ofs;
+ struct page *page;
+
+ vma->vm_flags |= VM_RESERVED;
+ start = vma->vm_start;
+ for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) {
+ u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs);
+ page = virt_to_page(virt_addr);
+ ret = vm_insert_page(vma, start, page);
+ if (unlikely(ret)) {
+ ehca_gen_err("vm_insert_page() failed rc=%x", ret);
+ return ret;
}
+ start += PAGE_SIZE;
+ }
+ vma->vm_private_data = mm_count;
+ (*mm_count)++;
+ vma->vm_ops = &vm_ops;
- if (cq->ownpid != cur_pid) {
+ return 0;
+}
+
+static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq,
+ u32 rsrc_type)
+{
+ int ret;
+
+ switch (rsrc_type) {
+ case 1: /* galpa fw handle */
+ ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number);
+ ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa);
+ if (unlikely(ret)) {
ehca_err(cq->ib_cq.device,
- "Invalid caller pid=%x ownpid=%x",
- cur_pid, cq->ownpid);
- return NOPAGE_SIGBUS;
+ "ehca_mmap_fw() failed rc=%x cq_num=%x",
+ ret, cq->cq_number);
+ return ret;
}
+ break;
- if (rsrc_type == 2) {
- ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq);
- offset = address - vma->vm_start;
- vaddr = ipz_qeit_calc(&cq->ipz_queue, offset);
- ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p",
- offset, vaddr);
- mypage = virt_to_page(vaddr);
+ case 2: /* cq queue_addr */
+ ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number);
+ ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue);
+ if (unlikely(ret)) {
+ ehca_err(cq->ib_cq.device,
+ "ehca_mmap_queue() failed rc=%x cq_num=%x",
+ ret, cq->cq_number);
+ return ret;
}
break;
- case 2: /* QP */
- spin_lock_irqsave(&ehca_qp_idr_lock, flags);
- qp = idr_find(&ehca_qp_idr, idr_handle);
- spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ default:
+ ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x",
+ rsrc_type, cq->cq_number);
+ return -EINVAL;
+ }
- /* make sure this mmap really belongs to the authorized user */
- if (!qp) {
- ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS");
- return NOPAGE_SIGBUS;
+ return 0;
+}
+
+static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
+ u32 rsrc_type)
+{
+ int ret;
+
+ switch (rsrc_type) {
+ case 1: /* galpa fw handle */
+ ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num);
+ ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa);
+ if (unlikely(ret)) {
+ ehca_err(qp->ib_qp.device,
+ "remap_pfn_range() failed ret=%x qp_num=%x",
+ ret, qp->ib_qp.qp_num);
+ return -ENOMEM;
}
+ break;
- pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
- if (pd->ownpid != cur_pid) {
+ case 2: /* qp rqueue_addr */
+ ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue",
+ qp->ib_qp.qp_num);
+ ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, &qp->mm_count_rqueue);
+ if (unlikely(ret)) {
ehca_err(qp->ib_qp.device,
- "Invalid caller pid=%x ownpid=%x",
- cur_pid, pd->ownpid);
- return NOPAGE_SIGBUS;
+ "ehca_mmap_queue(rq) failed rc=%x qp_num=%x",
+ ret, qp->ib_qp.qp_num);
+ return ret;
}
+ break;
- if (rsrc_type == 2) { /* rqueue */
- ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp);
- offset = address - vma->vm_start;
- vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset);
- ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
- offset, vaddr);
- mypage = virt_to_page(vaddr);
- } else if (rsrc_type == 3) { /* squeue */
- ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp);
- offset = address - vma->vm_start;
- vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset);
- ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
- offset, vaddr);
- mypage = virt_to_page(vaddr);
+ case 3: /* qp squeue_addr */
+ ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue",
+ qp->ib_qp.qp_num);
+ ret = ehca_mmap_queue(vma, &qp->ipz_squeue, &qp->mm_count_squeue);
+ if (unlikely(ret)) {
+ ehca_err(qp->ib_qp.device,
+ "ehca_mmap_queue(sq) failed rc=%x qp_num=%x",
+ ret, qp->ib_qp.qp_num);
+ return ret;
}
break;
default:
- ehca_gen_err("bad queue type %x", q_type);
- return NOPAGE_SIGBUS;
- }
-
- if (!mypage) {
- ehca_gen_err("Invalid page adr==NULL ret=NOPAGE_SIGBUS");
- return NOPAGE_SIGBUS;
+ ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x",
+ rsrc_type, qp->ib_qp.qp_num);
+ return -EINVAL;
}
- get_page(mypage);
- return mypage;
+ return 0;
}
-static struct vm_operations_struct ehcau_vm_ops = {
- .nopage = ehca_nopage,
-};
-
int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
@@ -175,7 +253,6 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
u32 cur_pid = current->tgid;
u32 ret;
- u64 vsize, physical;
unsigned long flags;
struct ehca_cq *cq;
struct ehca_qp *qp;
@@ -201,44 +278,12 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
return -EINVAL;
- switch (rsrc_type) {
- case 1: /* galpa fw handle */
- ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq);
- vma->vm_flags |= VM_RESERVED;
- vsize = vma->vm_end - vma->vm_start;
- if (vsize != EHCA_PAGESIZE) {
- ehca_err(cq->ib_cq.device, "invalid vsize=%lx",
- vma->vm_end - vma->vm_start);
- return -EINVAL;
- }
-
- physical = cq->galpas.user.fw_handle;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- vma->vm_flags |= VM_IO | VM_RESERVED;
-
- ehca_dbg(cq->ib_cq.device,
- "vsize=%lx physical=%lx", vsize, physical);
- ret = remap_pfn_range(vma, vma->vm_start,
- physical >> PAGE_SHIFT, vsize,
- vma->vm_page_prot);
- if (ret) {
- ehca_err(cq->ib_cq.device,
- "remap_pfn_range() failed ret=%x",
- ret);
- return -ENOMEM;
- }
- break;
-
- case 2: /* cq queue_addr */
- ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq);
- vma->vm_flags |= VM_RESERVED;
- vma->vm_ops = &ehcau_vm_ops;
- break;
-
- default:
- ehca_err(cq->ib_cq.device, "bad resource type %x",
- rsrc_type);
- return -EINVAL;
+ ret = ehca_mmap_cq(vma, cq, rsrc_type);
+ if (unlikely(ret)) {
+ ehca_err(cq->ib_cq.device,
+ "ehca_mmap_cq() failed rc=%x cq_num=%x",
+ ret, cq->cq_number);
+ return ret;
}
break;
@@ -262,50 +307,12 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context)
return -EINVAL;
- switch (rsrc_type) {
- case 1: /* galpa fw handle */
- ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp);
- vma->vm_flags |= VM_RESERVED;
- vsize = vma->vm_end - vma->vm_start;
- if (vsize != EHCA_PAGESIZE) {
- ehca_err(qp->ib_qp.device, "invalid vsize=%lx",
- vma->vm_end - vma->vm_start);
- return -EINVAL;
- }
-
- physical = qp->galpas.user.fw_handle;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- vma->vm_flags |= VM_IO | VM_RESERVED;
-
- ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx",
- vsize, physical);
- ret = remap_pfn_range(vma, vma->vm_start,
- physical >> PAGE_SHIFT, vsize,
- vma->vm_page_prot);
- if (ret) {
- ehca_err(qp->ib_qp.device,
- "remap_pfn_range() failed ret=%x",
- ret);
- return -ENOMEM;
- }
- break;
-
- case 2: /* qp rqueue_addr */
- ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp);
- vma->vm_flags |= VM_RESERVED;
- vma->vm_ops = &ehcau_vm_ops;
- break;
-
- case 3: /* qp squeue_addr */
- ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp);
- vma->vm_flags |= VM_RESERVED;
- vma->vm_ops = &ehcau_vm_ops;
- break;
-
- default:
- ehca_err(qp->ib_qp.device, "bad resource type %x",
- rsrc_type);
- return -EINVAL;
+ ret = ehca_mmap_qp(vma, qp, rsrc_type);
+ if (unlikely(ret)) {
+ ehca_err(qp->ib_qp.device,
+ "ehca_mmap_qp() failed rc=%x qp_num=%x",
+ ret, qp->ib_qp.qp_num);
+ return ret;
}
break;
@@ -316,77 +323,3 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
return 0;
}
-
-int ehca_mmap_nopage(u64 foffset, u64 length, void **mapped,
- struct vm_area_struct **vma)
-{
- down_write(&current->mm->mmap_sem);
- *mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS,
- foffset);
- up_write(&current->mm->mmap_sem);
- if (!(*mapped)) {
- ehca_gen_err("couldn't mmap foffset=%lx length=%lx",
- foffset, length);
- return -EINVAL;
- }
-
- *vma = find_vma(current->mm, (u64)*mapped);
- if (!(*vma)) {
- down_write(&current->mm->mmap_sem);
- do_munmap(current->mm, 0, length);
- up_write(&current->mm->mmap_sem);
- ehca_gen_err("couldn't find vma queue=%p", *mapped);
- return -EINVAL;
- }
- (*vma)->vm_flags |= VM_RESERVED;
- (*vma)->vm_ops = &ehcau_vm_ops;
-
- return 0;
-}
-
-int ehca_mmap_register(u64 physical, void **mapped,
- struct vm_area_struct **vma)
-{
- int ret;
- unsigned long vsize;
- /* ehca hw supports only 4k page */
- ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma);
- if (ret) {
- ehca_gen_err("could'nt mmap physical=%lx", physical);
- return ret;
- }
-
- (*vma)->vm_flags |= VM_RESERVED;
- vsize = (*vma)->vm_end - (*vma)->vm_start;
- if (vsize != EHCA_PAGESIZE) {
- ehca_gen_err("invalid vsize=%lx",
- (*vma)->vm_end - (*vma)->vm_start);
- return -EINVAL;
- }
-
- (*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot);
- (*vma)->vm_flags |= VM_IO | VM_RESERVED;
-
- ret = remap_pfn_range((*vma), (*vma)->vm_start,
- physical >> PAGE_SHIFT, vsize,
- (*vma)->vm_page_prot);
- if (ret) {
- ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
- return -ENOMEM;
- }
-
- return 0;
-
-}
-
-int ehca_munmap(unsigned long addr, size_t len) {
- int ret = 0;
- struct mm_struct *mm = current->mm;
- if (mm) {
- down_write(&mm->mmap_sem);
- ret = do_munmap(mm, addr, len);
- up_write(&mm->mmap_sem);
- }
- return ret;
-}
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
index dc3bda2634b7..8199c45768a3 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -79,7 +79,7 @@ static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
if (q_offset >= queue->queue_length)
return NULL;
current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
- return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
+ return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
}
/*
@@ -247,6 +247,15 @@ static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
return ret;
}
+static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue)
+{
+ void *ret = ipz_qeit_get(queue);
+ u32 qe = *(u8 *) ret;
+ if ((qe >> 7) != (queue->toggle_state & 1))
+ return NULL;
+ return ret;
+}
+
/* returns address (GX) of first queue entry */
static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
{
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 28c087b824c2..0f13a2182cc7 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -59,7 +59,7 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
size_t count, loff_t *off);
-static struct file_operations diag_file_ops = {
+static const struct file_operations diag_file_ops = {
.owner = THIS_MODULE,
.write = ipath_diag_write,
.read = ipath_diag_read,
@@ -71,7 +71,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
const char __user *data,
size_t count, loff_t *off);
-static struct file_operations diagpkt_file_ops = {
+static const struct file_operations diagpkt_file_ops = {
.owner = THIS_MODULE,
.write = ipath_diagpkt_write,
};
diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c
index 6e0f2b8918ce..f6f949040825 100644
--- a/drivers/infiniband/hw/ipath/ipath_dma.c
+++ b/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -96,8 +96,8 @@ static void ipath_dma_unmap_page(struct ib_device *dev,
BUG_ON(!valid_dma_direction(direction));
}
-int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
+static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
{
u64 addr;
int i;
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index b932bcb67a5e..5d64ff875297 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -54,7 +54,7 @@ static ssize_t ipath_write(struct file *, const char __user *, size_t,
static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
static int ipath_mmap(struct file *, struct vm_area_struct *);
-static struct file_operations ipath_file_ops = {
+static const struct file_operations ipath_file_ops = {
.owner = THIS_MODULE,
.write = ipath_write,
.open = ipath_open,
@@ -2153,7 +2153,7 @@ bail:
static struct class *ipath_class;
-static int init_cdev(int minor, char *name, struct file_operations *fops,
+static int init_cdev(int minor, char *name, const struct file_operations *fops,
struct cdev **cdevp, struct class_device **class_devp)
{
const dev_t dev = MKDEV(IPATH_MAJOR, minor);
@@ -2210,7 +2210,7 @@ done:
return ret;
}
-int ipath_cdev_init(int minor, char *name, struct file_operations *fops,
+int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
struct cdev **cdevp, struct class_device **class_devp)
{
return init_cdev(minor, name, fops, cdevp, class_devp);
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 79a60f020a21..5b40a846ff95 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -47,7 +47,7 @@
static struct super_block *ipath_super;
static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
- int mode, struct file_operations *fops,
+ int mode, const struct file_operations *fops,
void *data)
{
int error;
@@ -81,7 +81,7 @@ bail:
static int create_file(const char *name, mode_t mode,
struct dentry *parent, struct dentry **dentry,
- struct file_operations *fops, void *data)
+ const struct file_operations *fops, void *data)
{
int error;
@@ -105,7 +105,7 @@ static ssize_t atomic_stats_read(struct file *file, char __user *buf,
sizeof ipath_stats);
}
-static struct file_operations atomic_stats_ops = {
+static const struct file_operations atomic_stats_ops = {
.read = atomic_stats_read,
};
@@ -127,7 +127,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf,
sizeof counters);
}
-static struct file_operations atomic_counters_ops = {
+static const struct file_operations atomic_counters_ops = {
.read = atomic_counters_read,
};
@@ -166,7 +166,7 @@ static ssize_t atomic_node_info_read(struct file *file, char __user *buf,
sizeof nodeinfo);
}
-static struct file_operations atomic_node_info_ops = {
+static const struct file_operations atomic_node_info_ops = {
.read = atomic_node_info_read,
};
@@ -291,7 +291,7 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
sizeof portinfo);
}
-static struct file_operations atomic_port_info_ops = {
+static const struct file_operations atomic_port_info_ops = {
.read = atomic_port_info_read,
};
@@ -394,7 +394,7 @@ bail:
return ret;
}
-static struct file_operations flash_ops = {
+static const struct file_operations flash_ops = {
.read = flash_read,
.write = flash_write,
};
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 986b2125b8f5..6d8d05fb5999 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -593,7 +593,7 @@ void ipath_shutdown_device(struct ipath_devdata *);
void ipath_disarm_senderrbufs(struct ipath_devdata *);
struct file_operations;
-int ipath_cdev_init(int minor, char *name, struct file_operations *fops,
+int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
struct cdev **cdevp, struct class_device **class_devp);
void ipath_cdev_cleanup(struct cdev **cdevp,
struct class_device **class_devp);
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 46c1c89bf6ae..64f07b19349f 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -379,7 +379,7 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
wc.vendor_err = 0;
wc.byte_len = 0;
wc.imm_data = 0;
- wc.qp_num = qp->ibqp.qp_num;
+ wc.qp = &qp->ibqp;
wc.src_qp = 0;
wc.wc_flags = 0;
wc.pkey_index = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index ce6038743c5c..5ff20cb04494 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -702,7 +702,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc->vendor_err = 0;
wc->byte_len = 0;
- wc->qp_num = qp->ibqp.qp_num;
+ wc->qp = &qp->ibqp;
wc->src_qp = qp->remote_qpn;
wc->pkey_index = 0;
wc->slid = qp->remote_ah_attr.dlid;
@@ -836,7 +836,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc.vendor_err = 0;
wc.byte_len = wqe->length;
- wc.qp_num = qp->ibqp.qp_num;
+ wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
@@ -951,7 +951,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc.vendor_err = 0;
wc.byte_len = 0;
- wc.qp_num = qp->ibqp.qp_num;
+ wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
@@ -1511,7 +1511,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
- wc.qp_num = qp->ibqp.qp_num;
+ wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index f7530512045d..e86cb171872e 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -137,7 +137,7 @@ bad_lkey:
wc.vendor_err = 0;
wc.byte_len = 0;
wc.imm_data = 0;
- wc.qp_num = qp->ibqp.qp_num;
+ wc.qp = &qp->ibqp;
wc.src_qp = 0;
wc.wc_flags = 0;
wc.pkey_index = 0;
@@ -336,7 +336,7 @@ again:
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc.vendor_err = 0;
wc.byte_len = 0;
- wc.qp_num = sqp->ibqp.qp_num;
+ wc.qp = &sqp->ibqp;
wc.src_qp = sqp->remote_qpn;
wc.pkey_index = 0;
wc.slid = sqp->remote_ah_attr.dlid;
@@ -426,7 +426,7 @@ again:
wc.status = IB_WC_SUCCESS;
wc.vendor_err = 0;
wc.byte_len = wqe->length;
- wc.qp_num = qp->ibqp.qp_num;
+ wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
/* XXX do we know which pkey matched? Only needed for GSI. */
wc.pkey_index = 0;
@@ -447,7 +447,7 @@ send_comp:
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc.vendor_err = 0;
wc.byte_len = wqe->length;
- wc.qp_num = sqp->ibqp.qp_num;
+ wc.qp = &sqp->ibqp;
wc.src_qp = 0;
wc.pkey_index = 0;
wc.slid = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index e636cfd67a82..325d6634ff53 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -49,7 +49,7 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc->vendor_err = 0;
wc->byte_len = wqe->length;
- wc->qp_num = qp->ibqp.qp_num;
+ wc->qp = &qp->ibqp;
wc->src_qp = qp->remote_qpn;
wc->pkey_index = 0;
wc->slid = qp->remote_ah_attr.dlid;
@@ -411,7 +411,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
- wc.qp_num = qp->ibqp.qp_num;
+ wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
wc.pkey_index = 0;
wc.slid = qp->remote_ah_attr.dlid;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 49f1102af8b3..9a3e54664ee4 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -66,7 +66,7 @@ bad_lkey:
wc.vendor_err = 0;
wc.byte_len = 0;
wc.imm_data = 0;
- wc.qp_num = qp->ibqp.qp_num;
+ wc.qp = &qp->ibqp;
wc.src_qp = 0;
wc.wc_flags = 0;
wc.pkey_index = 0;
@@ -255,7 +255,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
wc->status = IB_WC_SUCCESS;
wc->opcode = IB_WC_RECV;
wc->vendor_err = 0;
- wc->qp_num = qp->ibqp.qp_num;
+ wc->qp = &qp->ibqp;
wc->src_qp = sqp->ibqp.qp_num;
/* XXX do we know which pkey matched? Only needed for GSI. */
wc->pkey_index = 0;
@@ -474,7 +474,7 @@ done:
wc.vendor_err = 0;
wc.opcode = IB_WC_SEND;
wc.byte_len = len;
- wc.qp_num = qp->ibqp.qp_num;
+ wc.qp = &qp->ibqp;
wc.src_qp = 0;
wc.wc_flags = 0;
/* XXX initialize other fields? */
@@ -651,7 +651,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
- wc.qp_num = qp->ibqp.qp_num;
+ wc.qp = &qp->ibqp;
wc.src_qp = src_qp;
/* XXX do we know which pkey matched? Only needed for GSI. */
wc.pkey_index = 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 768df7265b81..71314460b11e 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1051,7 +1051,11 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET);
dev_lim->max_eqs = 1 << (field & 0x7);
MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET);
- dev_lim->reserved_mtts = 1 << (field >> 4);
+ if (mthca_is_memfree(dev))
+ dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64),
+ MTHCA_MTT_SEG_SIZE) / MTHCA_MTT_SEG_SIZE;
+ else
+ dev_lim->reserved_mtts = 1 << (field >> 4);
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET);
dev_lim->max_mrw_sz = 1 << field;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET);
@@ -1854,7 +1858,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
memset(inbox + 256, 0, 256);
- MTHCA_PUT(inbox, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET);
+ MTHCA_PUT(inbox, in_wc->qp->qp_num, MAD_IFC_MY_QPN_OFFSET);
MTHCA_PUT(inbox, in_wc->src_qp, MAD_IFC_RQPN_OFFSET);
val = in_wc->sl << 4;
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 1159c8a0f2c5..efd79ef109a6 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -534,7 +534,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
}
}
- entry->qp_num = (*cur_qp)->qpn;
+ entry->qp = &(*cur_qp)->ibqp;
if (is_send) {
wq = &(*cur_qp)->sq;
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index fe5cecf70fed..b7e42efaf43d 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -464,6 +464,8 @@ void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd);
void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
+int mthca_write_mtt_size(struct mthca_dev *dev);
+
struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size);
void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt);
int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 44bc6cc734ab..0d9b7d06bbc2 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -379,7 +379,7 @@ static int mthca_load_fw(struct mthca_dev *mdev)
mdev->fw.arbel.fw_icm =
mthca_alloc_icm(mdev, mdev->fw.arbel.fw_pages,
- GFP_HIGHUSER | __GFP_NOWARN);
+ GFP_HIGHUSER | __GFP_NOWARN, 0);
if (!mdev->fw.arbel.fw_icm) {
mthca_err(mdev, "Couldn't allocate FW area, aborting.\n");
return -ENOMEM;
@@ -412,7 +412,7 @@ err_unmap_fa:
mthca_UNMAP_FA(mdev, &status);
err_free:
- mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
+ mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
return err;
}
@@ -441,7 +441,7 @@ static int mthca_init_icm(struct mthca_dev *mdev,
(unsigned long long) aux_pages << 2);
mdev->fw.arbel.aux_icm = mthca_alloc_icm(mdev, aux_pages,
- GFP_HIGHUSER | __GFP_NOWARN);
+ GFP_HIGHUSER | __GFP_NOWARN, 0);
if (!mdev->fw.arbel.aux_icm) {
mthca_err(mdev, "Couldn't allocate aux memory, aborting.\n");
return -ENOMEM;
@@ -464,10 +464,15 @@ static int mthca_init_icm(struct mthca_dev *mdev,
goto err_unmap_aux;
}
+ /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */
+ mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE,
+ dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE;
+
mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
MTHCA_MTT_SEG_SIZE,
mdev->limits.num_mtt_segs,
- mdev->limits.reserved_mtts, 1);
+ mdev->limits.reserved_mtts,
+ 1, 0);
if (!mdev->mr_table.mtt_table) {
mthca_err(mdev, "Failed to map MTT context memory, aborting.\n");
err = -ENOMEM;
@@ -477,7 +482,8 @@ static int mthca_init_icm(struct mthca_dev *mdev,
mdev->mr_table.mpt_table = mthca_alloc_icm_table(mdev, init_hca->mpt_base,
dev_lim->mpt_entry_sz,
mdev->limits.num_mpts,
- mdev->limits.reserved_mrws, 1);
+ mdev->limits.reserved_mrws,
+ 1, 1);
if (!mdev->mr_table.mpt_table) {
mthca_err(mdev, "Failed to map MPT context memory, aborting.\n");
err = -ENOMEM;
@@ -487,7 +493,8 @@ static int mthca_init_icm(struct mthca_dev *mdev,
mdev->qp_table.qp_table = mthca_alloc_icm_table(mdev, init_hca->qpc_base,
dev_lim->qpc_entry_sz,
mdev->limits.num_qps,
- mdev->limits.reserved_qps, 0);
+ mdev->limits.reserved_qps,
+ 0, 0);
if (!mdev->qp_table.qp_table) {
mthca_err(mdev, "Failed to map QP context memory, aborting.\n");
err = -ENOMEM;
@@ -497,7 +504,8 @@ static int mthca_init_icm(struct mthca_dev *mdev,
mdev->qp_table.eqp_table = mthca_alloc_icm_table(mdev, init_hca->eqpc_base,
dev_lim->eqpc_entry_sz,
mdev->limits.num_qps,
- mdev->limits.reserved_qps, 0);
+ mdev->limits.reserved_qps,
+ 0, 0);
if (!mdev->qp_table.eqp_table) {
mthca_err(mdev, "Failed to map EQP context memory, aborting.\n");
err = -ENOMEM;
@@ -507,7 +515,7 @@ static int mthca_init_icm(struct mthca_dev *mdev,
mdev->qp_table.rdb_table = mthca_alloc_icm_table(mdev, init_hca->rdb_base,
MTHCA_RDB_ENTRY_SIZE,
mdev->limits.num_qps <<
- mdev->qp_table.rdb_shift,
+ mdev->qp_table.rdb_shift, 0,
0, 0);
if (!mdev->qp_table.rdb_table) {
mthca_err(mdev, "Failed to map RDB context memory, aborting\n");
@@ -518,7 +526,8 @@ static int mthca_init_icm(struct mthca_dev *mdev,
mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base,
dev_lim->cqc_entry_sz,
mdev->limits.num_cqs,
- mdev->limits.reserved_cqs, 0);
+ mdev->limits.reserved_cqs,
+ 0, 0);
if (!mdev->cq_table.table) {
mthca_err(mdev, "Failed to map CQ context memory, aborting.\n");
err = -ENOMEM;
@@ -530,7 +539,8 @@ static int mthca_init_icm(struct mthca_dev *mdev,
mthca_alloc_icm_table(mdev, init_hca->srqc_base,
dev_lim->srq_entry_sz,
mdev->limits.num_srqs,
- mdev->limits.reserved_srqs, 0);
+ mdev->limits.reserved_srqs,
+ 0, 0);
if (!mdev->srq_table.table) {
mthca_err(mdev, "Failed to map SRQ context memory, "
"aborting.\n");
@@ -550,7 +560,7 @@ static int mthca_init_icm(struct mthca_dev *mdev,
mdev->limits.num_amgms,
mdev->limits.num_mgms +
mdev->limits.num_amgms,
- 0);
+ 0, 0);
if (!mdev->mcg_table.table) {
mthca_err(mdev, "Failed to map MCG context memory, aborting.\n");
err = -ENOMEM;
@@ -588,7 +598,7 @@ err_unmap_aux:
mthca_UNMAP_ICM_AUX(mdev, &status);
err_free_aux:
- mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+ mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0);
return err;
}
@@ -609,7 +619,7 @@ static void mthca_free_icms(struct mthca_dev *mdev)
mthca_unmap_eq_icm(mdev);
mthca_UNMAP_ICM_AUX(mdev, &status);
- mthca_free_icm(mdev, mdev->fw.arbel.aux_icm);
+ mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0);
}
static int mthca_init_arbel(struct mthca_dev *mdev)
@@ -693,7 +703,7 @@ err_free_icm:
err_stop_fw:
mthca_UNMAP_FA(mdev, &status);
- mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
+ mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
err_disable:
if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
@@ -712,7 +722,7 @@ static void mthca_close_hca(struct mthca_dev *mdev)
mthca_free_icms(mdev);
mthca_UNMAP_FA(mdev, &status);
- mthca_free_icm(mdev, mdev->fw.arbel.fw_icm);
+ mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
mthca_DISABLE_LAM(mdev, &status);
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 6b19645d946c..48f7c65e9aed 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -35,6 +35,9 @@
*/
#include <linux/mm.h>
+#include <linux/scatterlist.h>
+
+#include <asm/page.h>
#include "mthca_memfree.h"
#include "mthca_dev.h"
@@ -58,22 +61,42 @@ struct mthca_user_db_table {
} page[0];
};
-void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
+static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
+{
+ int i;
+
+ if (chunk->nsg > 0)
+ pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
+ PCI_DMA_BIDIRECTIONAL);
+
+ for (i = 0; i < chunk->npages; ++i)
+ __free_pages(chunk->mem[i].page,
+ get_order(chunk->mem[i].length));
+}
+
+static void mthca_free_icm_coherent(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
{
- struct mthca_icm_chunk *chunk, *tmp;
int i;
+ for (i = 0; i < chunk->npages; ++i) {
+ dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
+ lowmem_page_address(chunk->mem[i].page),
+ sg_dma_address(&chunk->mem[i]));
+ }
+}
+
+void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent)
+{
+ struct mthca_icm_chunk *chunk, *tmp;
+
if (!icm)
return;
list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
- if (chunk->nsg > 0)
- pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
- PCI_DMA_BIDIRECTIONAL);
-
- for (i = 0; i < chunk->npages; ++i)
- __free_pages(chunk->mem[i].page,
- get_order(chunk->mem[i].length));
+ if (coherent)
+ mthca_free_icm_coherent(dev, chunk);
+ else
+ mthca_free_icm_pages(dev, chunk);
kfree(chunk);
}
@@ -81,12 +104,41 @@ void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
kfree(icm);
}
+static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
+{
+ mem->page = alloc_pages(gfp_mask, order);
+ if (!mem->page)
+ return -ENOMEM;
+
+ mem->length = PAGE_SIZE << order;
+ mem->offset = 0;
+ return 0;
+}
+
+static int mthca_alloc_icm_coherent(struct device *dev, struct scatterlist *mem,
+ int order, gfp_t gfp_mask)
+{
+ void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, &sg_dma_address(mem),
+ gfp_mask);
+ if (!buf)
+ return -ENOMEM;
+
+ sg_set_buf(mem, buf, PAGE_SIZE << order);
+ BUG_ON(mem->offset);
+ sg_dma_len(mem) = PAGE_SIZE << order;
+ return 0;
+}
+
struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
- gfp_t gfp_mask)
+ gfp_t gfp_mask, int coherent)
{
struct mthca_icm *icm;
struct mthca_icm_chunk *chunk = NULL;
int cur_order;
+ int ret;
+
+ /* We use sg_set_buf for coherent allocs, which assumes low memory */
+ BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
if (!icm)
@@ -112,21 +164,30 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
while (1 << cur_order > npages)
--cur_order;
- chunk->mem[chunk->npages].page = alloc_pages(gfp_mask, cur_order);
- if (chunk->mem[chunk->npages].page) {
- chunk->mem[chunk->npages].length = PAGE_SIZE << cur_order;
- chunk->mem[chunk->npages].offset = 0;
+ if (coherent)
+ ret = mthca_alloc_icm_coherent(&dev->pdev->dev,
+ &chunk->mem[chunk->npages],
+ cur_order, gfp_mask);
+ else
+ ret = mthca_alloc_icm_pages(&chunk->mem[chunk->npages],
+ cur_order, gfp_mask);
- if (++chunk->npages == MTHCA_ICM_CHUNK_LEN) {
+ if (!ret) {
+ ++chunk->npages;
+
+ if (coherent)
+ ++chunk->nsg;
+ else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) {
chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
chunk->npages,
PCI_DMA_BIDIRECTIONAL);
if (chunk->nsg <= 0)
goto fail;
+ }
+ if (chunk->npages == MTHCA_ICM_CHUNK_LEN)
chunk = NULL;
- }
npages -= 1 << cur_order;
} else {
@@ -136,7 +197,7 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
}
}
- if (chunk) {
+ if (!coherent && chunk) {
chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
chunk->npages,
PCI_DMA_BIDIRECTIONAL);
@@ -148,7 +209,7 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
return icm;
fail:
- mthca_free_icm(dev, icm);
+ mthca_free_icm(dev, icm, coherent);
return NULL;
}
@@ -167,7 +228,7 @@ int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int ob
table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
(table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
- __GFP_NOWARN);
+ __GFP_NOWARN, table->coherent);
if (!table->icm[i]) {
ret = -ENOMEM;
goto out;
@@ -175,7 +236,7 @@ int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int ob
if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
&status) || status) {
- mthca_free_icm(dev, table->icm[i]);
+ mthca_free_icm(dev, table->icm[i], table->coherent);
table->icm[i] = NULL;
ret = -ENOMEM;
goto out;
@@ -204,16 +265,16 @@ void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int o
mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
&status);
- mthca_free_icm(dev, table->icm[i]);
+ mthca_free_icm(dev, table->icm[i], table->coherent);
table->icm[i] = NULL;
}
mutex_unlock(&table->mutex);
}
-void *mthca_table_find(struct mthca_icm_table *table, int obj)
+void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle)
{
- int idx, offset, i;
+ int idx, offset, dma_offset, i;
struct mthca_icm_chunk *chunk;
struct mthca_icm *icm;
struct page *page = NULL;
@@ -225,13 +286,22 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj)
idx = (obj & (table->num_obj - 1)) * table->obj_size;
icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE];
- offset = idx % MTHCA_TABLE_CHUNK_SIZE;
+ dma_offset = offset = idx % MTHCA_TABLE_CHUNK_SIZE;
if (!icm)
goto out;
list_for_each_entry(chunk, &icm->chunk_list, list) {
for (i = 0; i < chunk->npages; ++i) {
+ if (dma_handle && dma_offset >= 0) {
+ if (sg_dma_len(&chunk->mem[i]) > dma_offset)
+ *dma_handle = sg_dma_address(&chunk->mem[i]) +
+ dma_offset;
+ dma_offset -= sg_dma_len(&chunk->mem[i]);
+ }
+ /* DMA mapping can merge pages but not split them,
+ * so if we found the page, dma_handle has already
+ * been assigned to. */
if (chunk->mem[i].length > offset) {
page = chunk->mem[i].page;
goto out;
@@ -283,7 +353,7 @@ void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
u64 virt, int obj_size,
int nobj, int reserved,
- int use_lowmem)
+ int use_lowmem, int use_coherent)
{
struct mthca_icm_table *table;
int num_icm;
@@ -302,6 +372,7 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
table->num_obj = nobj;
table->obj_size = obj_size;
table->lowmem = use_lowmem;
+ table->coherent = use_coherent;
mutex_init(&table->mutex);
for (i = 0; i < num_icm; ++i)
@@ -314,12 +385,12 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
(use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
- __GFP_NOWARN);
+ __GFP_NOWARN, use_coherent);
if (!table->icm[i])
goto err;
if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE,
&status) || status) {
- mthca_free_icm(dev, table->icm[i]);
+ mthca_free_icm(dev, table->icm[i], table->coherent);
table->icm[i] = NULL;
goto err;
}
@@ -339,7 +410,7 @@ err:
mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
&status);
- mthca_free_icm(dev, table->icm[i]);
+ mthca_free_icm(dev, table->icm[i], table->coherent);
}
kfree(table);
@@ -357,7 +428,7 @@ void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
&status);
- mthca_free_icm(dev, table->icm[i]);
+ mthca_free_icm(dev, table->icm[i], table->coherent);
}
kfree(table);
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index 6d42947e1dc4..594144145f45 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -69,6 +69,7 @@ struct mthca_icm_table {
int num_obj;
int obj_size;
int lowmem;
+ int coherent;
struct mutex mutex;
struct mthca_icm *icm[0];
};
@@ -82,17 +83,17 @@ struct mthca_icm_iter {
struct mthca_dev;
struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
- gfp_t gfp_mask);
-void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm);
+ gfp_t gfp_mask, int coherent);
+void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent);
struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
u64 virt, int obj_size,
int nobj, int reserved,
- int use_lowmem);
+ int use_lowmem, int use_coherent);
void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table);
int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
-void *mthca_table_find(struct mthca_icm_table *table, int obj);
+void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle);
int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
int start, int end);
void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index f71ffa88db3a..6037dd3f87df 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -243,8 +243,8 @@ void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
kfree(mtt);
}
-int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
- int start_index, u64 *buffer_list, int list_len)
+static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+ int start_index, u64 *buffer_list, int list_len)
{
struct mthca_mailbox *mailbox;
__be64 *mtt_entry;
@@ -295,6 +295,84 @@ out:
return err;
}
+int mthca_write_mtt_size(struct mthca_dev *dev)
+{
+ if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy)
+ /*
+ * Be friendly to WRITE_MTT command
+ * and leave two empty slots for the
+ * index and reserved fields of the
+ * mailbox.
+ */
+ return PAGE_SIZE / sizeof (u64) - 2;
+
+ /* For Arbel, all MTTs must fit in the same page. */
+ return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff;
+}
+
+void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
+ int start_index, u64 *buffer_list, int list_len)
+{
+ u64 __iomem *mtts;
+ int i;
+
+ mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * MTHCA_MTT_SEG_SIZE +
+ start_index * sizeof (u64);
+ for (i = 0; i < list_len; ++i)
+ mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT),
+ mtts + i);
+}
+
+void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
+ int start_index, u64 *buffer_list, int list_len)
+{
+ __be64 *mtts;
+ dma_addr_t dma_handle;
+ int i;
+ int s = start_index * sizeof (u64);
+
+ /* For Arbel, all MTTs must fit in the same page. */
+ BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
+ /* Require full segments */
+ BUG_ON(s % MTHCA_MTT_SEG_SIZE);
+
+ mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
+ s / MTHCA_MTT_SEG_SIZE, &dma_handle);
+
+ BUG_ON(!mtts);
+
+ for (i = 0; i < list_len; ++i)
+ mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT);
+
+ dma_sync_single(&dev->pdev->dev, dma_handle, list_len * sizeof (u64), DMA_TO_DEVICE);
+}
+
+int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+ int start_index, u64 *buffer_list, int list_len)
+{
+ int size = mthca_write_mtt_size(dev);
+ int chunk;
+
+ if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy)
+ return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
+
+ while (list_len > 0) {
+ chunk = min(size, list_len);
+ if (mthca_is_memfree(dev))
+ mthca_arbel_write_mtt_seg(dev, mtt, start_index,
+ buffer_list, chunk);
+ else
+ mthca_tavor_write_mtt_seg(dev, mtt, start_index,
+ buffer_list, chunk);
+
+ list_len -= chunk;
+ start_index += chunk;
+ buffer_list += chunk;
+ }
+
+ return 0;
+}
+
static inline u32 tavor_hw_index_to_key(u32 ind)
{
return ind;
@@ -524,7 +602,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
if (err)
goto err_out_mpt_free;
- mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key);
+ mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key, NULL);
BUG_ON(!mr->mem.arbel.mpt);
} else
mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
@@ -538,7 +616,8 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
if (mthca_is_memfree(dev)) {
mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
- mr->mtt->first_seg);
+ mr->mtt->first_seg,
+ &mr->mem.arbel.dma_handle);
BUG_ON(!mr->mem.arbel.mtts);
} else
mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
@@ -712,6 +791,9 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
MTHCA_MTT_FLAG_PRESENT);
+ dma_sync_single(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
+ list_len * sizeof(u64), DMA_TO_DEVICE);
+
fmr->mem.arbel.mpt->key = cpu_to_be32(key);
fmr->mem.arbel.mpt->lkey = cpu_to_be32(key);
fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
@@ -761,7 +843,7 @@ void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
int mthca_init_mr_table(struct mthca_dev *dev)
{
unsigned long addr;
- int err, i;
+ int mpts, mtts, err, i;
err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
dev->limits.num_mpts,
@@ -795,13 +877,21 @@ int mthca_init_mr_table(struct mthca_dev *dev)
err = -EINVAL;
goto err_fmr_mpt;
}
+ mpts = mtts = 1 << i;
+ } else {
+ mpts = dev->limits.num_mtt_segs;
+ mtts = dev->limits.num_mpts;
+ }
+
+ if (!mthca_is_memfree(dev) &&
+ (dev->mthca_flags & MTHCA_FLAG_FMR)) {
addr = pci_resource_start(dev->pdev, 4) +
((pci_resource_len(dev->pdev, 4) - 1) &
dev->mr_table.mpt_base);
dev->mr_table.tavor_fmr.mpt_base =
- ioremap(addr, (1 << i) * sizeof(struct mthca_mpt_entry));
+ ioremap(addr, mpts * sizeof(struct mthca_mpt_entry));
if (!dev->mr_table.tavor_fmr.mpt_base) {
mthca_warn(dev, "MPT ioremap for FMR failed.\n");
@@ -814,19 +904,21 @@ int mthca_init_mr_table(struct mthca_dev *dev)
dev->mr_table.mtt_base);
dev->mr_table.tavor_fmr.mtt_base =
- ioremap(addr, (1 << i) * MTHCA_MTT_SEG_SIZE);
+ ioremap(addr, mtts * MTHCA_MTT_SEG_SIZE);
if (!dev->mr_table.tavor_fmr.mtt_base) {
mthca_warn(dev, "MTT ioremap for FMR failed.\n");
err = -ENOMEM;
goto err_fmr_mtt;
}
+ }
- err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, i);
+ if (dev->limits.fmr_reserved_mtts) {
+ err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, fls(mtts - 1));
if (err)
goto err_fmr_mtt_buddy;
/* Prevent regular MRs from using FMR keys */
- err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, i);
+ err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, fls(mtts - 1));
if (err)
goto err_reserve_fmr;
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index 58d44aa3c302..26bf86d1cfcd 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -277,7 +277,7 @@ u64 mthca_make_profile(struct mthca_dev *dev,
* out of the MR pool. They don't use additional memory, but
* we assign them as part of the HCA profile anyway.
*/
- if (mthca_is_memfree(dev))
+ if (mthca_is_memfree(dev) || BITS_PER_LONG == 64)
dev->limits.fmr_reserved_mtts = 0;
else
dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 7b96751695ea..0725ad7ad9bf 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1015,6 +1015,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
int shift, n, len;
int i, j, k;
int err = 0;
+ int write_mtt_size;
shift = ffs(region->page_size) - 1;
@@ -1040,6 +1041,8 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
i = n = 0;
+ write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
+
list_for_each_entry(chunk, &region->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
@@ -1047,14 +1050,11 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
pages[i++] = sg_dma_address(&chunk->page_list[j]) +
region->page_size * k;
/*
- * Be friendly to WRITE_MTT command
- * and leave two empty slots for the
- * index and reserved fields of the
- * mailbox.
+ * Be friendly to write_mtt and pass it chunks
+ * of appropriate size.
*/
- if (i == PAGE_SIZE / sizeof (u64) - 2) {
- err = mthca_write_mtt(dev, mr->mtt,
- n, pages, i);
+ if (i == write_mtt_size) {
+ err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
if (err)
goto mtt_done;
n += i;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 9a5bece3fa5c..1d266ac2e094 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -89,6 +89,7 @@ struct mthca_fmr {
struct {
struct mthca_mpt_entry *mpt;
__be64 *mtts;
+ dma_addr_t dma_handle;
} arbel;
} mem;
};
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 5f5214c0337d..71dc84bd4254 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -399,7 +399,7 @@ static int to_ib_qp_access_flags(int mthca_flags)
static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
struct mthca_qp_path *path)
{
- memset(ib_ah_attr, 0, sizeof *path);
+ memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports)
@@ -573,6 +573,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
goto out;
}
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ err = 0;
+ goto out;
+ }
+
if ((attr_mask & IB_QP_PKEY_INDEX) &&
attr->pkey_index >= dev->limits.pkey_table_len) {
mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 10684da33d58..61974b0296ca 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -116,11 +116,16 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
struct mthca_srq *srq,
struct mthca_arbel_srq_context *context)
{
- int logsize;
+ int logsize, max;
memset(context, 0, sizeof *context);
- logsize = ilog2(srq->max);
+ /*
+ * Put max in a temporary variable to work around gcc bug
+ * triggered by ilog2() on sparc64.
+ */
+ max = srq->max;
+ logsize = ilog2(max);
context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn);
context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
context->db_index = cpu_to_be32(srq->db_index);
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
index c75322d820d4..af78ccc4ce71 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -1,6 +1,6 @@
config INFINIBAND_IPOIB
tristate "IP-over-InfiniBand"
- depends on INFINIBAND && NETDEVICES && INET
+ depends on INFINIBAND && NETDEVICES && INET && (IPV6 || IPV6=n)
---help---
Support for the IP-over-InfiniBand protocol (IPoIB). This
transports IP packets over InfiniBand so you can use your IB
@@ -8,6 +8,20 @@ config INFINIBAND_IPOIB
See Documentation/infiniband/ipoib.txt for more information
+config INFINIBAND_IPOIB_CM
+ bool "IP-over-InfiniBand Connected Mode support"
+ depends on INFINIBAND_IPOIB && EXPERIMENTAL
+ default n
+ ---help---
+ This option enables experimental support for IPoIB connected mode.
+ After enabling this option, you need to switch to connected mode through
+ /sys/class/net/ibXXX/mode to actually create connections, and then increase
+ the interface MTU with e.g. ifconfig ib0 mtu 65520.
+
+ WARNING: Enabling connected mode will trigger some
+ packet drops for multicast and UD mode traffic from this interface,
+ unless you limit mtu for these destinations to 2044.
+
config INFINIBAND_IPOIB_DEBUG
bool "IP-over-InfiniBand debugging" if EMBEDDED
depends on INFINIBAND_IPOIB
diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile
index 8935e74ae3f8..98ee38e8c2c4 100644
--- a/drivers/infiniband/ulp/ipoib/Makefile
+++ b/drivers/infiniband/ulp/ipoib/Makefile
@@ -5,5 +5,6 @@ ib_ipoib-y := ipoib_main.o \
ipoib_multicast.o \
ipoib_verbs.o \
ipoib_vlan.o
+ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o
ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 07deee8f81ce..2594db2030b3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -62,6 +62,10 @@ enum {
IPOIB_ENCAP_LEN = 4,
+ IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */
+ IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN,
+ IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE,
+ IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
IPOIB_RX_RING_SIZE = 128,
IPOIB_TX_RING_SIZE = 64,
IPOIB_MAX_QUEUE_SIZE = 8192,
@@ -81,6 +85,8 @@ enum {
IPOIB_MCAST_RUN = 6,
IPOIB_STOP_REAPER = 7,
IPOIB_MCAST_STARTED = 8,
+ IPOIB_FLAG_NETIF_STOPPED = 9,
+ IPOIB_FLAG_ADMIN_CM = 10,
IPOIB_MAX_BACKOFF_SECONDS = 16,
@@ -90,6 +96,13 @@ enum {
IPOIB_MCAST_FLAG_ATTACHED = 3,
};
+#define IPOIB_OP_RECV (1ul << 31)
+#ifdef CONFIG_INFINIBAND_IPOIB_CM
+#define IPOIB_CM_OP_SRQ (1ul << 30)
+#else
+#define IPOIB_CM_OP_SRQ (0)
+#endif
+
/* structs */
struct ipoib_header {
@@ -113,6 +126,59 @@ struct ipoib_tx_buf {
u64 mapping;
};
+struct ib_cm_id;
+
+struct ipoib_cm_data {
+ __be32 qpn; /* High byte MUST be ignored on receive */
+ __be32 mtu;
+};
+
+struct ipoib_cm_rx {
+ struct ib_cm_id *id;
+ struct ib_qp *qp;
+ struct list_head list;
+ struct net_device *dev;
+ unsigned long jiffies;
+};
+
+struct ipoib_cm_tx {
+ struct ib_cm_id *id;
+ struct ib_cq *cq;
+ struct ib_qp *qp;
+ struct list_head list;
+ struct net_device *dev;
+ struct ipoib_neigh *neigh;
+ struct ipoib_path *path;
+ struct ipoib_tx_buf *tx_ring;
+ unsigned tx_head;
+ unsigned tx_tail;
+ unsigned long flags;
+ u32 mtu;
+ struct ib_wc ibwc[IPOIB_NUM_WC];
+};
+
+struct ipoib_cm_rx_buf {
+ struct sk_buff *skb;
+ u64 mapping[IPOIB_CM_RX_SG];
+};
+
+struct ipoib_cm_dev_priv {
+ struct ib_srq *srq;
+ struct ipoib_cm_rx_buf *srq_ring;
+ struct ib_cm_id *id;
+ struct list_head passive_ids;
+ struct work_struct start_task;
+ struct work_struct reap_task;
+ struct work_struct skb_task;
+ struct delayed_work stale_task;
+ struct sk_buff_head skb_queue;
+ struct list_head start_list;
+ struct list_head reap_list;
+ struct ib_wc ibwc[IPOIB_NUM_WC];
+ struct ib_sge rx_sge[IPOIB_CM_RX_SG];
+ struct ib_recv_wr rx_wr;
+};
+
/*
* Device private locking: tx_lock protects members used in TX fast
* path (and we use LLTX so upper layers don't do extra locking).
@@ -179,6 +245,10 @@ struct ipoib_dev_priv {
struct list_head child_intfs;
struct list_head list;
+#ifdef CONFIG_INFINIBAND_IPOIB_CM
+ struct ipoib_cm_dev_priv cm;
+#endif
+
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
struct list_head fs_list;
struct dentry *mcg_dentry;
@@ -212,6 +282,9 @@ struct ipoib_path {
struct ipoib_neigh {
struct ipoib_ah *ah;
+#ifdef CONFIG_INFINIBAND_IPOIB_CM
+ struct ipoib_cm_tx *cm;
+#endif
union ib_gid dgid;
struct sk_buff_head queue;
@@ -315,6 +388,146 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
void ipoib_pkey_poll(struct work_struct *work);
int ipoib_pkey_dev_delay_open(struct net_device *dev);
+#ifdef CONFIG_INFINIBAND_IPOIB_CM
+
+#define IPOIB_FLAGS_RC 0x80
+#define IPOIB_FLAGS_UC 0x40
+
+/* We don't support UC connections at the moment */
+#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC))
+
+static inline int ipoib_cm_admin_enabled(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ return IPOIB_CM_SUPPORTED(dev->dev_addr) &&
+ test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
+}
+
+static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ return IPOIB_CM_SUPPORTED(n->ha) &&
+ test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
+}
+
+static inline int ipoib_cm_up(struct ipoib_neigh *neigh)
+
+{
+ return test_bit(IPOIB_FLAG_OPER_UP, &neigh->cm->flags);
+}
+
+static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh)
+{
+ return neigh->cm;
+}
+
+static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx)
+{
+ neigh->cm = tx;
+}
+
+void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx);
+int ipoib_cm_dev_open(struct net_device *dev);
+void ipoib_cm_dev_stop(struct net_device *dev);
+int ipoib_cm_dev_init(struct net_device *dev);
+int ipoib_cm_add_mode_attr(struct net_device *dev);
+void ipoib_cm_dev_cleanup(struct net_device *dev);
+struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
+ struct ipoib_neigh *neigh);
+void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx);
+void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
+ unsigned int mtu);
+void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
+#else
+
+struct ipoib_cm_tx;
+
+static inline int ipoib_cm_admin_enabled(struct net_device *dev)
+{
+ return 0;
+}
+static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
+
+{
+ return 0;
+}
+
+static inline int ipoib_cm_up(struct ipoib_neigh *neigh)
+
+{
+ return 0;
+}
+
+static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh)
+{
+ return NULL;
+}
+
+static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx)
+{
+}
+
+static inline
+void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
+{
+ return;
+}
+
+static inline
+int ipoib_cm_dev_open(struct net_device *dev)
+{
+ return 0;
+}
+
+static inline
+void ipoib_cm_dev_stop(struct net_device *dev)
+{
+ return;
+}
+
+static inline
+int ipoib_cm_dev_init(struct net_device *dev)
+{
+ return -ENOSYS;
+}
+
+static inline
+void ipoib_cm_dev_cleanup(struct net_device *dev)
+{
+ return;
+}
+
+static inline
+struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
+ struct ipoib_neigh *neigh)
+{
+ return NULL;
+}
+
+static inline
+void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
+{
+ return;
+}
+
+static inline
+int ipoib_cm_add_mode_attr(struct net_device *dev)
+{
+ return 0;
+}
+
+static inline void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
+ unsigned int mtu)
+{
+ dev_kfree_skb_any(skb);
+}
+
+static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+}
+
+#endif
+
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
void ipoib_create_debug_files(struct net_device *dev);
void ipoib_delete_debug_files(struct net_device *dev);
@@ -392,4 +605,6 @@ extern int ipoib_debug_level;
#define IPOIB_GID_ARG(gid) IPOIB_GID_RAW_ARG((gid).raw)
+#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff)
+
#endif /* _IPOIB_H */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
new file mode 100644
index 000000000000..4d59682f7d4a
--- /dev/null
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -0,0 +1,1237 @@
+/*
+ * Copyright (c) 2006 Mellanox Technologies. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include <rdma/ib_cm.h>
+#include <rdma/ib_cache.h>
+#include <net/dst.h>
+#include <net/icmp.h>
+#include <linux/icmpv6.h>
+
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
+static int data_debug_level;
+
+module_param_named(cm_data_debug_level, data_debug_level, int, 0644);
+MODULE_PARM_DESC(cm_data_debug_level,
+ "Enable data path debug tracing for connected mode if > 0");
+#endif
+
+#include "ipoib.h"
+
+#define IPOIB_CM_IETF_ID 0x1000000000000000ULL
+
+#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ)
+#define IPOIB_CM_RX_TIMEOUT (2 * 256 * HZ)
+#define IPOIB_CM_RX_DELAY (3 * 256 * HZ)
+#define IPOIB_CM_RX_UPDATE_MASK (0x3)
+
+struct ipoib_cm_id {
+ struct ib_cm_id *id;
+ int flags;
+ u32 remote_qpn;
+ u32 remote_mtu;
+};
+
+static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
+ struct ib_cm_event *event);
+
+static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv,
+ u64 mapping[IPOIB_CM_RX_SG])
+{
+ int i;
+
+ ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
+
+ for (i = 0; i < IPOIB_CM_RX_SG - 1; ++i)
+ ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
+}
+
+static int ipoib_cm_post_receive(struct net_device *dev, int id)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ib_recv_wr *bad_wr;
+ int i, ret;
+
+ priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ;
+
+ for (i = 0; i < IPOIB_CM_RX_SG; ++i)
+ priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
+
+ ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
+ if (unlikely(ret)) {
+ ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
+ ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[id].mapping);
+ dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
+ priv->cm.srq_ring[id].skb = NULL;
+ }
+
+ return ret;
+}
+
+static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id,
+ u64 mapping[IPOIB_CM_RX_SG])
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct sk_buff *skb;
+ int i;
+
+ skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12);
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ /*
+ * IPoIB adds a 4 byte header. So we need 12 more bytes to align the
+ * IP header to a multiple of 16.
+ */
+ skb_reserve(skb, 12);
+
+ mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE,
+ DMA_FROM_DEVICE);
+ if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {
+ dev_kfree_skb_any(skb);
+ return -EIO;
+ }
+
+ for (i = 0; i < IPOIB_CM_RX_SG - 1; i++) {
+ struct page *page = alloc_page(GFP_ATOMIC);
+
+ if (!page)
+ goto partial_error;
+ skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);
+
+ mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page,
+ 0, PAGE_SIZE, DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1])))
+ goto partial_error;
+ }
+
+ priv->cm.srq_ring[id].skb = skb;
+ return 0;
+
+partial_error:
+
+ ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
+
+ for (; i >= 0; --i)
+ ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
+
+ dev_kfree_skb_any(skb);
+ return -ENOMEM;
+}
+
+static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
+ struct ipoib_cm_rx *p)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ib_qp_init_attr attr = {
+ .send_cq = priv->cq, /* does not matter, we never send anything */
+ .recv_cq = priv->cq,
+ .srq = priv->cm.srq,
+ .cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */
+ .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
+ .sq_sig_type = IB_SIGNAL_ALL_WR,
+ .qp_type = IB_QPT_RC,
+ .qp_context = p,
+ };
+ return ib_create_qp(priv->pd, &attr);
+}
+
+static int ipoib_cm_modify_rx_qp(struct net_device *dev,
+ struct ib_cm_id *cm_id, struct ib_qp *qp,
+ unsigned psn)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ib_qp_attr qp_attr;
+ int qp_attr_mask, ret;
+
+ qp_attr.qp_state = IB_QPS_INIT;
+ ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+ if (ret) {
+ ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret);
+ return ret;
+ }
+ ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+ if (ret) {
+ ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret);
+ return ret;
+ }
+ qp_attr.qp_state = IB_QPS_RTR;
+ ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+ if (ret) {
+ ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
+ return ret;
+ }
+ qp_attr.rq_psn = psn;
+ ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+ if (ret) {
+ ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
+ return ret;
+ }
+ return 0;
+}
+
+static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
+ struct ib_qp *qp, struct ib_cm_req_event_param *req,
+ unsigned psn)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_cm_data data = {};
+ struct ib_cm_rep_param rep = {};
+
+ data.qpn = cpu_to_be32(priv->qp->qp_num);
+ data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
+
+ rep.private_data = &data;
+ rep.private_data_len = sizeof data;
+ rep.flow_control = 0;
+ rep.rnr_retry_count = req->rnr_retry_count;
+ rep.target_ack_delay = 20; /* FIXME */
+ rep.srq = 1;
+ rep.qp_num = qp->qp_num;
+ rep.starting_psn = psn;
+ return ib_send_cm_rep(cm_id, &rep);
+}
+
+static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+ struct net_device *dev = cm_id->context;
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_cm_rx *p;
+ unsigned long flags;
+ unsigned psn;
+ int ret;
+
+ ipoib_dbg(priv, "REQ arrived\n");
+ p = kzalloc(sizeof *p, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ p->dev = dev;
+ p->id = cm_id;
+ p->qp = ipoib_cm_create_rx_qp(dev, p);
+ if (IS_ERR(p->qp)) {
+ ret = PTR_ERR(p->qp);
+ goto err_qp;
+ }
+
+ psn = random32() & 0xffffff;
+ ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
+ if (ret)
+ goto err_modify;
+
+ ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn);
+ if (ret) {
+ ipoib_warn(priv, "failed to send REP: %d\n", ret);
+ goto err_rep;
+ }
+
+ cm_id->context = p;
+ p->jiffies = jiffies;
+ spin_lock_irqsave(&priv->lock, flags);
+ list_add(&p->list, &priv->cm.passive_ids);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ queue_delayed_work(ipoib_workqueue,
+ &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
+ return 0;
+
+err_rep:
+err_modify:
+ ib_destroy_qp(p->qp);
+err_qp:
+ kfree(p);
+ return ret;
+}
+
+static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
+ struct ib_cm_event *event)
+{
+ struct ipoib_cm_rx *p;
+ struct ipoib_dev_priv *priv;
+ unsigned long flags;
+ int ret;
+
+ switch (event->event) {
+ case IB_CM_REQ_RECEIVED:
+ return ipoib_cm_req_handler(cm_id, event);
+ case IB_CM_DREQ_RECEIVED:
+ p = cm_id->context;
+ ib_send_cm_drep(cm_id, NULL, 0);
+ /* Fall through */
+ case IB_CM_REJ_RECEIVED:
+ p = cm_id->context;
+ priv = netdev_priv(p->dev);
+ spin_lock_irqsave(&priv->lock, flags);
+ if (list_empty(&p->list))
+ ret = 0; /* Connection is going away already. */
+ else {
+ list_del_init(&p->list);
+ ret = -ECONNRESET;
+ }
+ spin_unlock_irqrestore(&priv->lock, flags);
+ if (ret) {
+ ib_destroy_qp(p->qp);
+ kfree(p);
+ return ret;
+ }
+ return 0;
+ default:
+ return 0;
+ }
+}
+/* Adjust length of skb with fragments to match received data */
+static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
+ unsigned int length)
+{
+ int i, num_frags;
+ unsigned int size;
+
+ /* put header into skb */
+ size = min(length, hdr_space);
+ skb->tail += size;
+ skb->len += size;
+ length -= size;
+
+ num_frags = skb_shinfo(skb)->nr_frags;
+ for (i = 0; i < num_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ if (length == 0) {
+ /* don't need this page */
+ __free_page(frag->page);
+ --skb_shinfo(skb)->nr_frags;
+ } else {
+ size = min(length, (unsigned) PAGE_SIZE);
+
+ frag->size = size;
+ skb->data_len += size;
+ skb->truesize += size;
+ skb->len += size;
+ length -= size;
+ }
+ }
+}
+
+void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ;
+ struct sk_buff *skb;
+ struct ipoib_cm_rx *p;
+ unsigned long flags;
+ u64 mapping[IPOIB_CM_RX_SG];
+
+ ipoib_dbg_data(priv, "cm recv completion: id %d, op %d, status: %d\n",
+ wr_id, wc->opcode, wc->status);
+
+ if (unlikely(wr_id >= ipoib_recvq_size)) {
+ ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
+ wr_id, ipoib_recvq_size);
+ return;
+ }
+
+ skb = priv->cm.srq_ring[wr_id].skb;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ ipoib_dbg(priv, "cm recv error "
+ "(status=%d, wrid=%d vend_err %x)\n",
+ wc->status, wr_id, wc->vendor_err);
+ ++priv->stats.rx_dropped;
+ goto repost;
+ }
+
+ if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) {
+ p = wc->qp->qp_context;
+ if (time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
+ spin_lock_irqsave(&priv->lock, flags);
+ p->jiffies = jiffies;
+ /* Move this entry to list head, but do
+ * not re-add it if it has been removed. */
+ if (!list_empty(&p->list))
+ list_move(&p->list, &priv->cm.passive_ids);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ queue_delayed_work(ipoib_workqueue,
+ &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
+ }
+ }
+
+ if (unlikely(ipoib_cm_alloc_rx_skb(dev, wr_id, mapping))) {
+ /*
+ * If we can't allocate a new RX buffer, dump
+ * this packet and reuse the old buffer.
+ */
+ ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id);
+ ++priv->stats.rx_dropped;
+ goto repost;
+ }
+
+ ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[wr_id].mapping);
+ memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, sizeof mapping);
+
+ ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
+ wc->byte_len, wc->slid);
+
+ skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len);
+
+ skb->protocol = ((struct ipoib_header *) skb->data)->proto;
+ skb->mac.raw = skb->data;
+ skb_pull(skb, IPOIB_ENCAP_LEN);
+
+ dev->last_rx = jiffies;
+ ++priv->stats.rx_packets;
+ priv->stats.rx_bytes += skb->len;
+
+ skb->dev = dev;
+ /* XXX get correct PACKET_ type here */
+ skb->pkt_type = PACKET_HOST;
+ netif_rx_ni(skb);
+
+repost:
+ if (unlikely(ipoib_cm_post_receive(dev, wr_id)))
+ ipoib_warn(priv, "ipoib_cm_post_receive failed "
+ "for buf %d\n", wr_id);
+}
+
+static inline int post_send(struct ipoib_dev_priv *priv,
+ struct ipoib_cm_tx *tx,
+ unsigned int wr_id,
+ u64 addr, int len)
+{
+ struct ib_send_wr *bad_wr;
+
+ priv->tx_sge.addr = addr;
+ priv->tx_sge.length = len;
+
+ priv->tx_wr.wr_id = wr_id;
+
+ return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
+}
+
+void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_tx_buf *tx_req;
+ u64 addr;
+
+ if (unlikely(skb->len > tx->mtu)) {
+ ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
+ skb->len, tx->mtu);
+ ++priv->stats.tx_dropped;
+ ++priv->stats.tx_errors;
+ ipoib_cm_skb_too_long(dev, skb, tx->mtu - INFINIBAND_ALEN);
+ return;
+ }
+
+ ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
+ tx->tx_head, skb->len, tx->qp->qp_num);
+
+ /*
+ * We put the skb into the tx_ring _before_ we call post_send()
+ * because it's entirely possible that the completion handler will
+ * run before we execute anything after the post_send(). That
+ * means we have to make sure everything is properly recorded and
+ * our state is consistent before we call post_send().
+ */
+ tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
+ tx_req->skb = skb;
+ addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
+ ++priv->stats.tx_errors;
+ dev_kfree_skb_any(skb);
+ return;
+ }
+
+ tx_req->mapping = addr;
+
+ if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
+ addr, skb->len))) {
+ ipoib_warn(priv, "post_send failed\n");
+ ++priv->stats.tx_errors;
+ ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
+ dev_kfree_skb_any(skb);
+ } else {
+ dev->trans_start = jiffies;
+ ++tx->tx_head;
+
+ if (tx->tx_head - tx->tx_tail == ipoib_sendq_size) {
+ ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
+ tx->qp->qp_num);
+ netif_stop_queue(dev);
+ set_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags);
+ }
+ }
+}
+
+static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx,
+ struct ib_wc *wc)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ unsigned int wr_id = wc->wr_id;
+ struct ipoib_tx_buf *tx_req;
+ unsigned long flags;
+
+ ipoib_dbg_data(priv, "cm send completion: id %d, op %d, status: %d\n",
+ wr_id, wc->opcode, wc->status);
+
+ if (unlikely(wr_id >= ipoib_sendq_size)) {
+ ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n",
+ wr_id, ipoib_sendq_size);
+ return;
+ }
+
+ tx_req = &tx->tx_ring[wr_id];
+
+ ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);
+
+ /* FIXME: is this right? Shouldn't we only increment on success? */
+ ++priv->stats.tx_packets;
+ priv->stats.tx_bytes += tx_req->skb->len;
+
+ dev_kfree_skb_any(tx_req->skb);
+
+ spin_lock_irqsave(&priv->tx_lock, flags);
+ ++tx->tx_tail;
+ if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) &&
+ tx->tx_head - tx->tx_tail <= ipoib_sendq_size >> 1) {
+ clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags);
+ netif_wake_queue(dev);
+ }
+
+ if (wc->status != IB_WC_SUCCESS &&
+ wc->status != IB_WC_WR_FLUSH_ERR) {
+ struct ipoib_neigh *neigh;
+
+ ipoib_dbg(priv, "failed cm send event "
+ "(status=%d, wrid=%d vend_err %x)\n",
+ wc->status, wr_id, wc->vendor_err);
+
+ spin_lock(&priv->lock);
+ neigh = tx->neigh;
+
+ if (neigh) {
+ neigh->cm = NULL;
+ list_del(&neigh->list);
+ if (neigh->ah)
+ ipoib_put_ah(neigh->ah);
+ ipoib_neigh_free(dev, neigh);
+
+ tx->neigh = NULL;
+ }
+
+ /* queue would be re-started anyway when TX is destroyed,
+ * but it makes sense to do it ASAP here. */
+ if (test_and_clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags))
+ netif_wake_queue(dev);
+
+ if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
+ list_move(&tx->list, &priv->cm.reap_list);
+ queue_work(ipoib_workqueue, &priv->cm.reap_task);
+ }
+
+ clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
+
+ spin_unlock(&priv->lock);
+ }
+
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+}
+
+static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
+{
+ struct ipoib_cm_tx *tx = tx_ptr;
+ int n, i;
+
+ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ do {
+ n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc);
+ for (i = 0; i < n; ++i)
+ ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i);
+ } while (n == IPOIB_NUM_WC);
+}
+
+int ipoib_cm_dev_open(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ int ret;
+
+ if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
+ return 0;
+
+ priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
+ if (IS_ERR(priv->cm.id)) {
+ printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
+ return IS_ERR(priv->cm.id);
+ }
+
+ ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
+ 0, NULL);
+ if (ret) {
+ printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name,
+ IPOIB_CM_IETF_ID | priv->qp->qp_num);
+ ib_destroy_cm_id(priv->cm.id);
+ return ret;
+ }
+ return 0;
+}
+
+void ipoib_cm_dev_stop(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_cm_rx *p;
+ unsigned long flags;
+
+ if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
+ return;
+
+ ib_destroy_cm_id(priv->cm.id);
+ spin_lock_irqsave(&priv->lock, flags);
+ while (!list_empty(&priv->cm.passive_ids)) {
+ p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
+ list_del_init(&p->list);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ ib_destroy_cm_id(p->id);
+ ib_destroy_qp(p->qp);
+ kfree(p);
+ spin_lock_irqsave(&priv->lock, flags);
+ }
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ cancel_delayed_work(&priv->cm.stale_task);
+}
+
+static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+ struct ipoib_cm_tx *p = cm_id->context;
+ struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_cm_data *data = event->private_data;
+ struct sk_buff_head skqueue;
+ struct ib_qp_attr qp_attr;
+ int qp_attr_mask, ret;
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ p->mtu = be32_to_cpu(data->mtu);
+
+ if (p->mtu < priv->dev->mtu + IPOIB_ENCAP_LEN) {
+ ipoib_warn(priv, "Rejecting connection: mtu %d < device mtu %d + 4\n",
+ p->mtu, priv->dev->mtu);
+ return -EINVAL;
+ }
+
+ qp_attr.qp_state = IB_QPS_RTR;
+ ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+ if (ret) {
+ ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
+ return ret;
+ }
+
+ qp_attr.rq_psn = 0 /* FIXME */;
+ ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
+ if (ret) {
+ ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
+ return ret;
+ }
+
+ qp_attr.qp_state = IB_QPS_RTS;
+ ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+ if (ret) {
+ ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
+ return ret;
+ }
+ ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
+ if (ret) {
+ ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
+ return ret;
+ }
+
+ skb_queue_head_init(&skqueue);
+
+ spin_lock_irqsave(&priv->lock, flags);
+ set_bit(IPOIB_FLAG_OPER_UP, &p->flags);
+ if (p->neigh)
+ while ((skb = __skb_dequeue(&p->neigh->queue)))
+ __skb_queue_tail(&skqueue, skb);
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ while ((skb = __skb_dequeue(&skqueue))) {
+ skb->dev = p->dev;
+ if (dev_queue_xmit(skb))
+ ipoib_warn(priv, "dev_queue_xmit failed "
+ "to requeue packet\n");
+ }
+
+ ret = ib_send_cm_rtu(cm_id, NULL, 0);
+ if (ret) {
+ ipoib_warn(priv, "failed to send RTU: %d\n", ret);
+ return ret;
+ }
+ return 0;
+}
+
+static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ib_qp_init_attr attr = {};
+ attr.recv_cq = priv->cq;
+ attr.srq = priv->cm.srq;
+ attr.cap.max_send_wr = ipoib_sendq_size;
+ attr.cap.max_send_sge = 1;
+ attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ attr.qp_type = IB_QPT_RC;
+ attr.send_cq = cq;
+ return ib_create_qp(priv->pd, &attr);
+}
+
+static int ipoib_cm_send_req(struct net_device *dev,
+ struct ib_cm_id *id, struct ib_qp *qp,
+ u32 qpn,
+ struct ib_sa_path_rec *pathrec)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_cm_data data = {};
+ struct ib_cm_req_param req = {};
+
+ data.qpn = cpu_to_be32(priv->qp->qp_num);
+ data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
+
+ req.primary_path = pathrec;
+ req.alternate_path = NULL;
+ req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn);
+ req.qp_num = qp->qp_num;
+ req.qp_type = qp->qp_type;
+ req.private_data = &data;
+ req.private_data_len = sizeof data;
+ req.flow_control = 0;
+
+ req.starting_psn = 0; /* FIXME */
+
+ /*
+ * Pick some arbitrary defaults here; we could make these
+ * module parameters if anyone cared about setting them.
+ */
+ req.responder_resources = 4;
+ req.remote_cm_response_timeout = 20;
+ req.local_cm_response_timeout = 20;
+ req.retry_count = 0; /* RFC draft warns against retries */
+ req.rnr_retry_count = 0; /* RFC draft warns against retries */
+ req.max_cm_retries = 15;
+ req.srq = 1;
+ return ib_send_cm_req(id, &req);
+}
+
+static int ipoib_cm_modify_tx_init(struct net_device *dev,
+ struct ib_cm_id *cm_id, struct ib_qp *qp)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ib_qp_attr qp_attr;
+ int qp_attr_mask, ret;
+ ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index);
+ if (ret) {
+ ipoib_warn(priv, "pkey 0x%x not in cache: %d\n", priv->pkey, ret);
+ return ret;
+ }
+
+ qp_attr.qp_state = IB_QPS_INIT;
+ qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+ qp_attr.port_num = priv->port;
+ qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
+
+ ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+ if (ret) {
+ ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n", ret);
+ return ret;
+ }
+ return 0;
+}
+
+static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
+ struct ib_sa_path_rec *pathrec)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ int ret;
+
+ p->tx_ring = kzalloc(ipoib_sendq_size * sizeof *p->tx_ring,
+ GFP_KERNEL);
+ if (!p->tx_ring) {
+ ipoib_warn(priv, "failed to allocate tx ring\n");
+ ret = -ENOMEM;
+ goto err_tx;
+ }
+
+ p->cq = ib_create_cq(priv->ca, ipoib_cm_tx_completion, NULL, p,
+ ipoib_sendq_size + 1);
+ if (IS_ERR(p->cq)) {
+ ret = PTR_ERR(p->cq);
+ ipoib_warn(priv, "failed to allocate tx cq: %d\n", ret);
+ goto err_cq;
+ }
+
+ ret = ib_req_notify_cq(p->cq, IB_CQ_NEXT_COMP);
+ if (ret) {
+ ipoib_warn(priv, "failed to request completion notification: %d\n", ret);
+ goto err_req_notify;
+ }
+
+ p->qp = ipoib_cm_create_tx_qp(p->dev, p->cq);
+ if (IS_ERR(p->qp)) {
+ ret = PTR_ERR(p->qp);
+ ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret);
+ goto err_qp;
+ }
+
+ p->id = ib_create_cm_id(priv->ca, ipoib_cm_tx_handler, p);
+ if (IS_ERR(p->id)) {
+ ret = PTR_ERR(p->id);
+ ipoib_warn(priv, "failed to create tx cm id: %d\n", ret);
+ goto err_id;
+ }
+
+ ret = ipoib_cm_modify_tx_init(p->dev, p->id, p->qp);
+ if (ret) {
+ ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret);
+ goto err_modify;
+ }
+
+ ret = ipoib_cm_send_req(p->dev, p->id, p->qp, qpn, pathrec);
+ if (ret) {
+ ipoib_warn(priv, "failed to send cm req: %d\n", ret);
+ goto err_send_cm;
+ }
+
+ ipoib_dbg(priv, "Request connection 0x%x for gid " IPOIB_GID_FMT " qpn 0x%x\n",
+ p->qp->qp_num, IPOIB_GID_ARG(pathrec->dgid), qpn);
+
+ return 0;
+
+err_send_cm:
+err_modify:
+ ib_destroy_cm_id(p->id);
+err_id:
+ p->id = NULL;
+ ib_destroy_qp(p->qp);
+err_req_notify:
+err_qp:
+ p->qp = NULL;
+ ib_destroy_cq(p->cq);
+err_cq:
+ p->cq = NULL;
+err_tx:
+ return ret;
+}
+
+static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ struct ipoib_tx_buf *tx_req;
+
+ ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
+ p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail);
+
+ if (p->id)
+ ib_destroy_cm_id(p->id);
+
+ if (p->qp)
+ ib_destroy_qp(p->qp);
+
+ if (p->cq)
+ ib_destroy_cq(p->cq);
+
+ if (test_bit(IPOIB_FLAG_NETIF_STOPPED, &p->flags))
+ netif_wake_queue(p->dev);
+
+ if (p->tx_ring) {
+ while ((int) p->tx_tail - (int) p->tx_head < 0) {
+ tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
+ ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
+ DMA_TO_DEVICE);
+ dev_kfree_skb_any(tx_req->skb);
+ ++p->tx_tail;
+ }
+
+ kfree(p->tx_ring);
+ }
+
+ kfree(p);
+}
+
+static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
+ struct ib_cm_event *event)
+{
+ struct ipoib_cm_tx *tx = cm_id->context;
+ struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+ struct net_device *dev = priv->dev;
+ struct ipoib_neigh *neigh;
+ unsigned long flags;
+ int ret;
+
+ switch (event->event) {
+ case IB_CM_DREQ_RECEIVED:
+ ipoib_dbg(priv, "DREQ received.\n");
+ ib_send_cm_drep(cm_id, NULL, 0);
+ break;
+ case IB_CM_REP_RECEIVED:
+ ipoib_dbg(priv, "REP received.\n");
+ ret = ipoib_cm_rep_handler(cm_id, event);
+ if (ret)
+ ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
+ NULL, 0, NULL, 0);
+ break;
+ case IB_CM_REQ_ERROR:
+ case IB_CM_REJ_RECEIVED:
+ case IB_CM_TIMEWAIT_EXIT:
+ ipoib_dbg(priv, "CM error %d.\n", event->event);
+ spin_lock_irqsave(&priv->tx_lock, flags);
+ spin_lock(&priv->lock);
+ neigh = tx->neigh;
+
+ if (neigh) {
+ neigh->cm = NULL;
+ list_del(&neigh->list);
+ if (neigh->ah)
+ ipoib_put_ah(neigh->ah);
+ ipoib_neigh_free(dev, neigh);
+
+ tx->neigh = NULL;
+ }
+
+ if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
+ list_move(&tx->list, &priv->cm.reap_list);
+ queue_work(ipoib_workqueue, &priv->cm.reap_task);
+ }
+
+ spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
+ struct ipoib_neigh *neigh)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_cm_tx *tx;
+
+ tx = kzalloc(sizeof *tx, GFP_ATOMIC);
+ if (!tx)
+ return NULL;
+
+ neigh->cm = tx;
+ tx->neigh = neigh;
+ tx->path = path;
+ tx->dev = dev;
+ list_add(&tx->list, &priv->cm.start_list);
+ set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
+ queue_work(ipoib_workqueue, &priv->cm.start_task);
+ return tx;
+}
+
+void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
+ if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
+ list_move(&tx->list, &priv->cm.reap_list);
+ queue_work(ipoib_workqueue, &priv->cm.reap_task);
+ ipoib_dbg(priv, "Reap connection for gid " IPOIB_GID_FMT "\n",
+ IPOIB_GID_ARG(tx->neigh->dgid));
+ tx->neigh = NULL;
+ }
+}
+
+static void ipoib_cm_tx_start(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
+ cm.start_task);
+ struct net_device *dev = priv->dev;
+ struct ipoib_neigh *neigh;
+ struct ipoib_cm_tx *p;
+ unsigned long flags;
+ int ret;
+
+ struct ib_sa_path_rec pathrec;
+ u32 qpn;
+
+ spin_lock_irqsave(&priv->tx_lock, flags);
+ spin_lock(&priv->lock);
+ while (!list_empty(&priv->cm.start_list)) {
+ p = list_entry(priv->cm.start_list.next, typeof(*p), list);
+ list_del_init(&p->list);
+ neigh = p->neigh;
+ qpn = IPOIB_QPN(neigh->neighbour->ha);
+ memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
+ spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+ ret = ipoib_cm_tx_init(p, qpn, &pathrec);
+ spin_lock_irqsave(&priv->tx_lock, flags);
+ spin_lock(&priv->lock);
+ if (ret) {
+ neigh = p->neigh;
+ if (neigh) {
+ neigh->cm = NULL;
+ list_del(&neigh->list);
+ if (neigh->ah)
+ ipoib_put_ah(neigh->ah);
+ ipoib_neigh_free(dev, neigh);
+ }
+ list_del(&p->list);
+ kfree(p);
+ }
+ }
+ spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+}
+
+static void ipoib_cm_tx_reap(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
+ cm.reap_task);
+ struct ipoib_cm_tx *p;
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->tx_lock, flags);
+ spin_lock(&priv->lock);
+ while (!list_empty(&priv->cm.reap_list)) {
+ p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
+ list_del(&p->list);
+ spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+ ipoib_cm_tx_destroy(p);
+ spin_lock_irqsave(&priv->tx_lock, flags);
+ spin_lock(&priv->lock);
+ }
+ spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+}
+
+static void ipoib_cm_skb_reap(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
+ cm.skb_task);
+ struct net_device *dev = priv->dev;
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ unsigned mtu = priv->mcast_mtu;
+
+ spin_lock_irqsave(&priv->tx_lock, flags);
+ spin_lock(&priv->lock);
+ while ((skb = skb_dequeue(&priv->cm.skb_queue))) {
+ spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+ if (skb->protocol == htons(ETH_P_IP))
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+#endif
+ dev_kfree_skb_any(skb);
+ spin_lock_irqsave(&priv->tx_lock, flags);
+ spin_lock(&priv->lock);
+ }
+ spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+}
+
+void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
+ unsigned int mtu)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ int e = skb_queue_empty(&priv->cm.skb_queue);
+
+ if (skb->dst)
+ skb->dst->ops->update_pmtu(skb->dst, mtu);
+
+ skb_queue_tail(&priv->cm.skb_queue, skb);
+ if (e)
+ queue_work(ipoib_workqueue, &priv->cm.skb_task);
+}
+
+static void ipoib_cm_stale_task(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
+ cm.stale_task.work);
+ struct ipoib_cm_rx *p;
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ while (!list_empty(&priv->cm.passive_ids)) {
+ /* List if sorted by LRU, start from tail,
+ * stop when we see a recently used entry */
+ p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
+ if (time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
+ break;
+ list_del_init(&p->list);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ ib_destroy_cm_id(p->id);
+ ib_destroy_qp(p->qp);
+ kfree(p);
+ spin_lock_irqsave(&priv->lock, flags);
+ }
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+
+static ssize_t show_mode(struct device *d, struct device_attribute *attr,
+ char *buf)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d));
+
+ if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
+ return sprintf(buf, "connected\n");
+ else
+ return sprintf(buf, "datagram\n");
+}
+
+static ssize_t set_mode(struct device *d, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct net_device *dev = to_net_dev(d);
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+ /* flush paths if we switch modes so that connections are restarted */
+ if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) {
+ set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
+ ipoib_warn(priv, "enabling connected mode "
+ "will cause multicast packet drops\n");
+ ipoib_flush_paths(dev);
+ return count;
+ }
+
+ if (!strcmp(buf, "datagram\n")) {
+ clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
+ dev->mtu = min(priv->mcast_mtu, dev->mtu);
+ ipoib_flush_paths(dev);
+ return count;
+ }
+
+ return -EINVAL;
+}
+
+static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
+
+int ipoib_cm_add_mode_attr(struct net_device *dev)
+{
+ return device_create_file(&dev->dev, &dev_attr_mode);
+}
+
+int ipoib_cm_dev_init(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ib_srq_init_attr srq_init_attr = {
+ .attr = {
+ .max_wr = ipoib_recvq_size,
+ .max_sge = IPOIB_CM_RX_SG
+ }
+ };
+ int ret, i;
+
+ INIT_LIST_HEAD(&priv->cm.passive_ids);
+ INIT_LIST_HEAD(&priv->cm.reap_list);
+ INIT_LIST_HEAD(&priv->cm.start_list);
+ INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);
+ INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);
+ INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap);
+ INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task);
+
+ skb_queue_head_init(&priv->cm.skb_queue);
+
+ priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
+ if (IS_ERR(priv->cm.srq)) {
+ ret = PTR_ERR(priv->cm.srq);
+ priv->cm.srq = NULL;
+ return ret;
+ }
+
+ priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring,
+ GFP_KERNEL);
+ if (!priv->cm.srq_ring) {
+ printk(KERN_WARNING "%s: failed to allocate CM ring (%d entries)\n",
+ priv->ca->name, ipoib_recvq_size);
+ ipoib_cm_dev_cleanup(dev);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < IPOIB_CM_RX_SG; ++i)
+ priv->cm.rx_sge[i].lkey = priv->mr->lkey;
+
+ priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
+ for (i = 1; i < IPOIB_CM_RX_SG; ++i)
+ priv->cm.rx_sge[i].length = PAGE_SIZE;
+ priv->cm.rx_wr.next = NULL;
+ priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
+ priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG;
+
+ for (i = 0; i < ipoib_recvq_size; ++i) {
+ if (ipoib_cm_alloc_rx_skb(dev, i, priv->cm.srq_ring[i].mapping)) {
+ ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
+ ipoib_cm_dev_cleanup(dev);
+ return -ENOMEM;
+ }
+ if (ipoib_cm_post_receive(dev, i)) {
+ ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
+ ipoib_cm_dev_cleanup(dev);
+ return -EIO;
+ }
+ }
+
+ priv->dev->dev_addr[0] = IPOIB_FLAGS_RC;
+ return 0;
+}
+
+void ipoib_cm_dev_cleanup(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ int i, ret;
+
+ if (!priv->cm.srq)
+ return;
+
+ ipoib_dbg(priv, "Cleanup ipoib connected mode.\n");
+
+ ret = ib_destroy_srq(priv->cm.srq);
+ if (ret)
+ ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret);
+
+ priv->cm.srq = NULL;
+ if (!priv->cm.srq_ring)
+ return;
+ for (i = 0; i < ipoib_recvq_size; ++i)
+ if (priv->cm.srq_ring[i].skb) {
+ ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[i].mapping);
+ dev_kfree_skb_any(priv->cm.srq_ring[i].skb);
+ priv->cm.srq_ring[i].skb = NULL;
+ }
+ kfree(priv->cm.srq_ring);
+ priv->cm.srq_ring = NULL;
+}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index f1cb83688b31..44c174182a82 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -146,7 +146,7 @@ static int ipoib_mcg_open(struct inode *inode, struct file *file)
return 0;
}
-static struct file_operations ipoib_mcg_fops = {
+static const struct file_operations ipoib_mcg_fops = {
.owner = THIS_MODULE,
.open = ipoib_mcg_open,
.read = seq_read,
@@ -252,7 +252,7 @@ static int ipoib_path_open(struct inode *inode, struct file *file)
return 0;
}
-static struct file_operations ipoib_path_fops = {
+static const struct file_operations ipoib_path_fops = {
.owner = THIS_MODULE,
.open = ipoib_path_open,
.read = seq_read,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 59d9594ed6d9..f2aa923ddbea 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -50,8 +50,6 @@ MODULE_PARM_DESC(data_debug_level,
"Enable data path debug tracing if > 0");
#endif
-#define IPOIB_OP_RECV (1ul << 31)
-
static DEFINE_MUTEX(pkey_mutex);
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
@@ -268,10 +266,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->tx_lock, flags);
++priv->tx_tail;
- if (netif_queue_stopped(dev) &&
- test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
- priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
+ if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) &&
+ priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) {
+ clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
netif_wake_queue(dev);
+ }
spin_unlock_irqrestore(&priv->tx_lock, flags);
if (wc->status != IB_WC_SUCCESS &&
@@ -283,7 +282,9 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc)
{
- if (wc->wr_id & IPOIB_OP_RECV)
+ if (wc->wr_id & IPOIB_CM_OP_SRQ)
+ ipoib_cm_handle_rx_wc(dev, wc);
+ else if (wc->wr_id & IPOIB_OP_RECV)
ipoib_ib_handle_rx_wc(dev, wc);
else
ipoib_ib_handle_tx_wc(dev, wc);
@@ -327,12 +328,12 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_tx_buf *tx_req;
u64 addr;
- if (unlikely(skb->len > dev->mtu + INFINIBAND_ALEN)) {
+ if (unlikely(skb->len > priv->mcast_mtu + INFINIBAND_ALEN)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
- skb->len, dev->mtu + INFINIBAND_ALEN);
+ skb->len, priv->mcast_mtu + INFINIBAND_ALEN);
++priv->stats.tx_dropped;
++priv->stats.tx_errors;
- dev_kfree_skb_any(skb);
+ ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
return;
}
@@ -372,6 +373,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
netif_stop_queue(dev);
+ set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
}
}
}
@@ -424,6 +426,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
return -1;
}
+ ret = ipoib_cm_dev_open(dev);
+ if (ret) {
+ ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
+ ipoib_ib_dev_stop(dev);
+ return -1;
+ }
+
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
@@ -509,6 +518,8 @@ int ipoib_ib_dev_stop(struct net_device *dev)
clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+ ipoib_cm_dev_stop(dev);
+
/*
* Move our QP to the error state and then reinitialize in
* when all work requests have completed or have been flushed.
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 705eb1d0e554..18d27fd352ad 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -49,8 +49,6 @@
#include <net/dst.h>
-#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff)
-
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
MODULE_LICENSE("Dual BSD/GPL");
@@ -145,6 +143,8 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev);
+ clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
+
/*
* Now flush workqueue to make sure a scheduled task doesn't
* bring our internal state back up.
@@ -178,8 +178,18 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN)
+ /* dev->mtu > 2K ==> connected mode */
+ if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) {
+ if (new_mtu > priv->mcast_mtu)
+ ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
+ priv->mcast_mtu);
+ dev->mtu = new_mtu;
+ return 0;
+ }
+
+ if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) {
return -EINVAL;
+ }
priv->admin_mtu = new_mtu;
@@ -414,6 +424,20 @@ static void path_rec_completion(int status,
memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
sizeof(union ib_gid));
+ if (ipoib_cm_enabled(dev, neigh->neighbour)) {
+ if (!ipoib_cm_get(neigh))
+ ipoib_cm_set(neigh, ipoib_cm_create_tx(dev,
+ path,
+ neigh));
+ if (!ipoib_cm_get(neigh)) {
+ list_del(&neigh->list);
+ if (neigh->ah)
+ ipoib_put_ah(neigh->ah);
+ ipoib_neigh_free(dev, neigh);
+ continue;
+ }
+ }
+
while ((skb = __skb_dequeue(&neigh->queue)))
__skb_queue_tail(&skqueue, skb);
}
@@ -520,7 +544,25 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
sizeof(union ib_gid));
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+ if (ipoib_cm_enabled(dev, neigh->neighbour)) {
+ if (!ipoib_cm_get(neigh))
+ ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh));
+ if (!ipoib_cm_get(neigh)) {
+ list_del(&neigh->list);
+ if (neigh->ah)
+ ipoib_put_ah(neigh->ah);
+ ipoib_neigh_free(dev, neigh);
+ goto err_drop;
+ }
+ if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
+ __skb_queue_tail(&neigh->queue, skb);
+ else {
+ ipoib_warn(priv, "queue length limit %d. Packet drop.\n",
+ skb_queue_len(&neigh->queue));
+ goto err_drop;
+ }
+ } else
+ ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
} else {
neigh->ah = NULL;
@@ -538,6 +580,7 @@ err_list:
err_path:
ipoib_neigh_free(dev, neigh);
+err_drop:
++priv->stats.tx_dropped;
dev_kfree_skb_any(skb);
@@ -640,7 +683,12 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
neigh = *to_ipoib_neigh(skb->dst->neighbour);
- if (likely(neigh->ah)) {
+ if (ipoib_cm_get(neigh)) {
+ if (ipoib_cm_up(neigh)) {
+ ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
+ goto out;
+ }
+ } else if (neigh->ah) {
if (unlikely(memcmp(&neigh->dgid.raw,
skb->dst->neighbour->ha + 4,
sizeof(union ib_gid)))) {
@@ -805,6 +853,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour)
neigh->neighbour = neighbour;
*to_ipoib_neigh(neighbour) = neigh;
skb_queue_head_init(&neigh->queue);
+ ipoib_cm_set(neigh, NULL);
return neigh;
}
@@ -818,6 +867,8 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh)
++priv->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
+ if (ipoib_cm_get(neigh))
+ ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
kfree(neigh);
}
@@ -958,16 +1009,17 @@ struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
return netdev_priv(dev);
}
-static ssize_t show_pkey(struct class_device *cdev, char *buf)
+static ssize_t show_pkey(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- struct ipoib_dev_priv *priv =
- netdev_priv(container_of(cdev, struct net_device, class_dev));
+ struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
return sprintf(buf, "0x%04x\n", priv->pkey);
}
-static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
+static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
-static ssize_t create_child(struct class_device *cdev,
+static ssize_t create_child(struct device *dev,
+ struct device_attribute *attr,
const char *buf, size_t count)
{
int pkey;
@@ -985,14 +1037,14 @@ static ssize_t create_child(struct class_device *cdev,
*/
pkey |= 0x8000;
- ret = ipoib_vlan_add(container_of(cdev, struct net_device, class_dev),
- pkey);
+ ret = ipoib_vlan_add(to_net_dev(dev), pkey);
return ret ? ret : count;
}
-static CLASS_DEVICE_ATTR(create_child, S_IWUGO, NULL, create_child);
+static DEVICE_ATTR(create_child, S_IWUGO, NULL, create_child);
-static ssize_t delete_child(struct class_device *cdev,
+static ssize_t delete_child(struct device *dev,
+ struct device_attribute *attr,
const char *buf, size_t count)
{
int pkey;
@@ -1004,18 +1056,16 @@ static ssize_t delete_child(struct class_device *cdev,
if (pkey < 0 || pkey > 0xffff)
return -EINVAL;
- ret = ipoib_vlan_delete(container_of(cdev, struct net_device, class_dev),
- pkey);
+ ret = ipoib_vlan_delete(to_net_dev(dev), pkey);
return ret ? ret : count;
}
-static CLASS_DEVICE_ATTR(delete_child, S_IWUGO, NULL, delete_child);
+static DEVICE_ATTR(delete_child, S_IWUGO, NULL, delete_child);
int ipoib_add_pkey_attr(struct net_device *dev)
{
- return class_device_create_file(&dev->class_dev,
- &class_device_attr_pkey);
+ return device_create_file(&dev->dev, &dev_attr_pkey);
}
static struct net_device *ipoib_add_port(const char *format,
@@ -1081,13 +1131,13 @@ static struct net_device *ipoib_add_port(const char *format,
ipoib_create_debug_files(priv->dev);
+ if (ipoib_cm_add_mode_attr(priv->dev))
+ goto sysfs_failed;
if (ipoib_add_pkey_attr(priv->dev))
goto sysfs_failed;
- if (class_device_create_file(&priv->dev->class_dev,
- &class_device_attr_create_child))
+ if (device_create_file(&priv->dev->dev, &dev_attr_create_child))
goto sysfs_failed;
- if (class_device_create_file(&priv->dev->class_dev,
- &class_device_attr_delete_child))
+ if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
goto sysfs_failed;
return priv->dev;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index b04b72ca32ed..b303ce6bc21e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -60,14 +60,11 @@ static DEFINE_MUTEX(mcast_mutex);
/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
struct ipoib_mcast {
struct ib_sa_mcmember_rec mcmember;
+ struct ib_sa_multicast *mc;
struct ipoib_ah *ah;
struct rb_node rb_node;
struct list_head list;
- struct completion done;
-
- int query_id;
- struct ib_sa_query *query;
unsigned long created;
unsigned long backoff;
@@ -299,18 +296,22 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
return 0;
}
-static void
+static int
ipoib_mcast_sendonly_join_complete(int status,
- struct ib_sa_mcmember_rec *mcmember,
- void *mcast_ptr)
+ struct ib_sa_multicast *multicast)
{
- struct ipoib_mcast *mcast = mcast_ptr;
+ struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev);
+ /* We trap for port events ourselves. */
+ if (status == -ENETRESET)
+ return 0;
+
if (!status)
- ipoib_mcast_join_finish(mcast, mcmember);
- else {
+ status = ipoib_mcast_join_finish(mcast, &multicast->rec);
+
+ if (status) {
if (mcast->logcount++ < 20)
ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for "
IPOIB_GID_FMT ", status %d\n",
@@ -325,11 +326,10 @@ ipoib_mcast_sendonly_join_complete(int status,
spin_unlock_irq(&priv->tx_lock);
/* Clear the busy flag so we try again */
- clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
- mcast->query = NULL;
+ status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
+ &mcast->flags);
}
-
- complete(&mcast->done);
+ return status;
}
static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
@@ -359,35 +359,33 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
rec.port_gid = priv->local_gid;
rec.pkey = cpu_to_be16(priv->pkey);
- init_completion(&mcast->done);
-
- ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, &rec,
- IB_SA_MCMEMBER_REC_MGID |
- IB_SA_MCMEMBER_REC_PORT_GID |
- IB_SA_MCMEMBER_REC_PKEY |
- IB_SA_MCMEMBER_REC_JOIN_STATE,
- 1000, GFP_ATOMIC,
- ipoib_mcast_sendonly_join_complete,
- mcast, &mcast->query);
- if (ret < 0) {
- ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n",
+ mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
+ priv->port, &rec,
+ IB_SA_MCMEMBER_REC_MGID |
+ IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_PKEY |
+ IB_SA_MCMEMBER_REC_JOIN_STATE,
+ GFP_ATOMIC,
+ ipoib_mcast_sendonly_join_complete,
+ mcast);
+ if (IS_ERR(mcast->mc)) {
+ ret = PTR_ERR(mcast->mc);
+ clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
ret);
} else {
ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT
", starting join\n",
IPOIB_GID_ARG(mcast->mcmember.mgid));
-
- mcast->query_id = ret;
}
return ret;
}
-static void ipoib_mcast_join_complete(int status,
- struct ib_sa_mcmember_rec *mcmember,
- void *mcast_ptr)
+static int ipoib_mcast_join_complete(int status,
+ struct ib_sa_multicast *multicast)
{
- struct ipoib_mcast *mcast = mcast_ptr;
+ struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -395,24 +393,25 @@ static void ipoib_mcast_join_complete(int status,
" (status %d)\n",
IPOIB_GID_ARG(mcast->mcmember.mgid), status);
- if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) {
+ /* We trap for port events ourselves. */
+ if (status == -ENETRESET)
+ return 0;
+
+ if (!status)
+ status = ipoib_mcast_join_finish(mcast, &multicast->rec);
+
+ if (!status) {
mcast->backoff = 1;
mutex_lock(&mcast_mutex);
if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
queue_delayed_work(ipoib_workqueue,
&priv->mcast_task, 0);
mutex_unlock(&mcast_mutex);
- complete(&mcast->done);
- return;
- }
-
- if (status == -EINTR) {
- complete(&mcast->done);
- return;
+ return 0;
}
- if (status && mcast->logcount++ < 20) {
- if (status == -ETIMEDOUT || status == -EINTR) {
+ if (mcast->logcount++ < 20) {
+ if (status == -ETIMEDOUT) {
ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT
", status %d\n",
IPOIB_GID_ARG(mcast->mcmember.mgid),
@@ -429,24 +428,18 @@ static void ipoib_mcast_join_complete(int status,
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
- mutex_lock(&mcast_mutex);
+ /* Clear the busy flag so we try again */
+ status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ mutex_lock(&mcast_mutex);
spin_lock_irq(&priv->lock);
- mcast->query = NULL;
-
- if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) {
- if (status == -ETIMEDOUT)
- queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
- 0);
- else
- queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
- mcast->backoff * HZ);
- } else
- complete(&mcast->done);
+ if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
+ queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
+ mcast->backoff * HZ);
spin_unlock_irq(&priv->lock);
mutex_unlock(&mcast_mutex);
- return;
+ return status;
}
static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
@@ -495,15 +488,14 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
rec.hop_limit = priv->broadcast->mcmember.hop_limit;
}
- init_completion(&mcast->done);
-
- ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port,
- &rec, comp_mask, mcast->backoff * 1000,
- GFP_ATOMIC, ipoib_mcast_join_complete,
- mcast, &mcast->query);
-
- if (ret < 0) {
- ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret);
+ set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
+ &rec, comp_mask, GFP_KERNEL,
+ ipoib_mcast_join_complete, mcast);
+ if (IS_ERR(mcast->mc)) {
+ clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ ret = PTR_ERR(mcast->mc);
+ ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
mcast->backoff *= 2;
if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
@@ -515,8 +507,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
&priv->mcast_task,
mcast->backoff * HZ);
mutex_unlock(&mcast_mutex);
- } else
- mcast->query_id = ret;
+ }
}
void ipoib_mcast_join_task(struct work_struct *work)
@@ -541,7 +532,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
priv->local_rate = attr.active_speed *
ib_width_enum_to_int(attr.active_width);
} else
- ipoib_warn(priv, "ib_query_port failed\n");
+ ipoib_warn(priv, "ib_query_port failed\n");
}
if (!priv->broadcast) {
@@ -568,7 +559,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
}
if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
- ipoib_mcast_join(dev, priv->broadcast, 0);
+ if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
+ ipoib_mcast_join(dev, priv->broadcast, 0);
return;
}
@@ -597,7 +589,9 @@ void ipoib_mcast_join_task(struct work_struct *work)
priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) -
IPOIB_ENCAP_LEN;
- dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
+
+ if (!ipoib_cm_admin_enabled(dev))
+ dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
@@ -623,26 +617,9 @@ int ipoib_mcast_start_thread(struct net_device *dev)
return 0;
}
-static void wait_for_mcast_join(struct ipoib_dev_priv *priv,
- struct ipoib_mcast *mcast)
-{
- spin_lock_irq(&priv->lock);
- if (mcast && mcast->query) {
- ib_sa_cancel_query(mcast->query_id, mcast->query);
- mcast->query = NULL;
- spin_unlock_irq(&priv->lock);
- ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
- IPOIB_GID_ARG(mcast->mcmember.mgid));
- wait_for_completion(&mcast->done);
- }
- else
- spin_unlock_irq(&priv->lock);
-}
-
int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ipoib_mcast *mcast;
ipoib_dbg_mcast(priv, "stopping multicast thread\n");
@@ -658,52 +635,27 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
if (flush)
flush_workqueue(ipoib_workqueue);
- wait_for_mcast_join(priv, priv->broadcast);
-
- list_for_each_entry(mcast, &priv->multicast_list, list)
- wait_for_mcast_join(priv, mcast);
-
return 0;
}
static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ib_sa_mcmember_rec rec = {
- .join_state = 1
- };
int ret = 0;
- if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags))
- return 0;
-
- ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
- IPOIB_GID_ARG(mcast->mcmember.mgid));
-
- rec.mgid = mcast->mcmember.mgid;
- rec.port_gid = priv->local_gid;
- rec.pkey = cpu_to_be16(priv->pkey);
+ if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
+ ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
+ IPOIB_GID_ARG(mcast->mcmember.mgid));
- /* Remove ourselves from the multicast group */
- ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
- &mcast->mcmember.mgid);
- if (ret)
- ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
+ /* Remove ourselves from the multicast group */
+ ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
+ &mcast->mcmember.mgid);
+ if (ret)
+ ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
+ }
- /*
- * Just make one shot at leaving and don't wait for a reply;
- * if we fail, too bad.
- */
- ret = ib_sa_mcmember_rec_delete(&ipoib_sa_client, priv->ca, priv->port, &rec,
- IB_SA_MCMEMBER_REC_MGID |
- IB_SA_MCMEMBER_REC_PORT_GID |
- IB_SA_MCMEMBER_REC_PKEY |
- IB_SA_MCMEMBER_REC_JOIN_STATE,
- 0, GFP_ATOMIC, NULL,
- mcast, &mcast->query);
- if (ret < 0)
- ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed "
- "for leave (result = %d)\n", ret);
+ if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+ ib_sa_free_multicast(mcast->mc);
return 0;
}
@@ -756,7 +708,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
dev_kfree_skb_any(skb);
}
- if (mcast->query)
+ if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
ipoib_dbg_mcast(priv, "no address vector, "
"but multicast join already started\n");
else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
@@ -914,7 +866,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
/* We have to cancel outside of the spinlock */
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
- wait_for_mcast_join(priv, mcast);
ipoib_mcast_leave(mcast->dev, mcast);
ipoib_mcast_free(mcast);
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 7b717c648f72..3cb551b88756 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -168,35 +168,41 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
.qp_type = IB_QPT_UD
};
+ int ret, size;
+
priv->pd = ib_alloc_pd(priv->ca);
if (IS_ERR(priv->pd)) {
printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
return -ENODEV;
}
- priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev,
- ipoib_sendq_size + ipoib_recvq_size + 1);
+ priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(priv->mr)) {
+ printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name);
+ goto out_free_pd;
+ }
+
+ size = ipoib_sendq_size + ipoib_recvq_size + 1;
+ ret = ipoib_cm_dev_init(dev);
+ if (!ret)
+ size += ipoib_recvq_size;
+
+ priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size);
if (IS_ERR(priv->cq)) {
printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
- goto out_free_pd;
+ goto out_free_mr;
}
if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP))
goto out_free_cq;
- priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(priv->mr)) {
- printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name);
- goto out_free_cq;
- }
-
init_attr.send_cq = priv->cq;
init_attr.recv_cq = priv->cq,
priv->qp = ib_create_qp(priv->pd, &init_attr);
if (IS_ERR(priv->qp)) {
printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
- goto out_free_mr;
+ goto out_free_cq;
}
priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
@@ -212,12 +218,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
return 0;
-out_free_mr:
- ib_dereg_mr(priv->mr);
-
out_free_cq:
ib_destroy_cq(priv->cq);
+out_free_mr:
+ ib_dereg_mr(priv->mr);
+
out_free_pd:
ib_dealloc_pd(priv->pd);
return -ENODEV;
@@ -235,12 +241,14 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
}
- if (ib_dereg_mr(priv->mr))
- ipoib_warn(priv, "ib_dereg_mr failed\n");
-
if (ib_destroy_cq(priv->cq))
ipoib_warn(priv, "ib_cq_destroy failed\n");
+ ipoib_cm_dev_cleanup(dev);
+
+ if (ib_dereg_mr(priv->mr))
+ ipoib_warn(priv, "ib_dereg_mr failed\n");
+
if (ib_dealloc_pd(priv->pd))
ipoib_warn(priv, "ib_dealloc_pd failed\n");
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index f887780e8093..6762988439d1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -42,15 +42,15 @@
#include "ipoib.h"
-static ssize_t show_parent(struct class_device *class_dev, char *buf)
+static ssize_t show_parent(struct device *d, struct device_attribute *attr,
+ char *buf)
{
- struct net_device *dev =
- container_of(class_dev, struct net_device, class_dev);
+ struct net_device *dev = to_net_dev(d);
struct ipoib_dev_priv *priv = netdev_priv(dev);
return sprintf(buf, "%s\n", priv->parent->name);
}
-static CLASS_DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL);
+static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL);
int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
{
@@ -115,11 +115,12 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
ipoib_create_debug_files(priv->dev);
+ if (ipoib_cm_add_mode_attr(priv->dev))
+ goto sysfs_failed;
if (ipoib_add_pkey_attr(priv->dev))
goto sysfs_failed;
- if (class_device_create_file(&priv->dev->class_dev,
- &class_device_attr_parent))
+ if (device_create_file(&priv->dev->dev, &dev_attr_parent))
goto sysfs_failed;
list_add_tail(&priv->list, &ppriv->child_intfs);
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 0a7d1ab60e6d..89e37283c836 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -567,7 +567,7 @@ void iser_rcv_completion(struct iser_desc *rx_desc,
opcode = hdr->opcode & ISCSI_OPCODE_MASK;
if (opcode == ISCSI_OP_SCSI_CMD_RSP) {
- itt = hdr->itt & ISCSI_ITT_MASK; /* mask out cid and age bits */
+ itt = get_itt(hdr->itt); /* mask out cid and age bits */
if (!(itt < session->cmds_max))
iser_err("itt can't be matched to task!!!"
"conn %p opcode %d cmds_max %d itt %d\n",
@@ -625,7 +625,7 @@ void iser_snd_completion(struct iser_desc *tx_desc)
/* this arithmetic is legal by libiscsi dd_data allocation */
mtask = (void *) ((long)(void *)tx_desc -
sizeof(struct iscsi_mgmt_task));
- if (mtask->hdr->itt == cpu_to_be32(ISCSI_RESERVED_TAG)) {
+ if (mtask->hdr->itt == RESERVED_ITT) {
struct iscsi_session *session = conn->session;
spin_lock(&conn->session->lock);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 72611fd15103..5e8ac577f0ad 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -548,6 +548,7 @@ static int srp_reconnect_target(struct srp_target_port *target)
target->tx_head = 0;
target->tx_tail = 0;
+ target->qp_in_error = 0;
ret = srp_connect_target(target);
if (ret)
goto err;
@@ -878,6 +879,7 @@ static void srp_completion(struct ib_cq *cq, void *target_ptr)
printk(KERN_ERR PFX "failed %s status %d\n",
wc.wr_id & SRP_OP_RECV ? "receive" : "send",
wc.status);
+ target->qp_in_error = 1;
break;
}
@@ -1337,6 +1339,8 @@ static int srp_abort(struct scsi_cmnd *scmnd)
printk(KERN_ERR "SRP abort called\n");
+ if (target->qp_in_error)
+ return FAILED;
if (srp_find_req(target, scmnd, &req))
return FAILED;
if (srp_send_tsk_mgmt(target, req, SRP_TSK_ABORT_TASK))
@@ -1365,6 +1369,8 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
printk(KERN_ERR "SRP reset_device called\n");
+ if (target->qp_in_error)
+ return FAILED;
if (srp_find_req(target, scmnd, &req))
return FAILED;
if (srp_send_tsk_mgmt(target, req, SRP_TSK_LUN_RESET))
@@ -1801,6 +1807,7 @@ static ssize_t srp_create_target(struct class_device *class_dev,
goto err_free;
}
+ target->qp_in_error = 0;
ret = srp_connect_target(target);
if (ret) {
printk(KERN_ERR PFX "Connection failed\n");
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index c21772317b86..2f3319c719a5 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -158,6 +158,7 @@ struct srp_target_port {
struct completion done;
int status;
enum srp_target_state state;
+ int qp_in_error;
};
struct srp_iu {