From 4196670be786d529ab7f6c18f5077141ce1b787e Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 2 Feb 2014 17:06:47 +0200 Subject: IB/mlx4: Don't allocate range of steerable UD QPs for Ethernet-only device When the device has only Ethernet ports, don't try to allocate range of steerable UD QPs since they aren't needed. This fixes an issue where mlx4 VFs tried to allocate a range of UD steerable QPs, but failed to do so. Fixes: c1c98501121e ("IB/mlx4: Add support for steerable IB UD QPs") Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/mlx4') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index c2702f549f10..64ca4087fb52 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1810,6 +1810,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) int i, j; int err; struct mlx4_ib_iboe *iboe; + int ib_num_ports = 0; pr_info_once("%s", mlx4_ib_version); @@ -1985,10 +1986,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->counters[i] = -1; } + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) + ib_num_ports++; + spin_lock_init(&ibdev->sm_lock); mutex_init(&ibdev->cap_mask_mutex); - if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) { + if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED && + ib_num_ports) { ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS; err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count, MLX4_IB_UC_STEER_QPN_ALIGN, -- cgit v1.2.3 From acc4fccf4eff5b29e545995b75de77e60ea44aae Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 5 Feb 2014 15:12:58 +0200 Subject: IB/mlx4: Make sure GID index 0 is always occupied Make sure that for Ethernet ports, the port GID table index 0 is always occupied with a default GID of the relevant IPv6 link-local adderss. This provides better user experience for legacy applications that don't use the RDMA CM and were working on index 0 prior to the IP addressing change. Also, as GIDs are generated from IP addresses of the network devices that are associated with the port, it's basically possible that the GID table will be empty if no IP address was assigned. This doesn't comply with the IB spec section 4.1.1 "GID usage and properties". Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 92 +++++++++++++++++++++++++++++---------- 1 file changed, 68 insertions(+), 24 deletions(-) (limited to 'drivers/infiniband/hw/mlx4') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 64ca4087fb52..b99d0ecc6a89 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1357,6 +1357,21 @@ static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_board_id }; +static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, + struct net_device *dev) +{ + memcpy(eui, dev->dev_addr, 3); + memcpy(eui + 5, dev->dev_addr + 3, 3); + if (vlan_id < 0x1000) { + eui[3] = vlan_id >> 8; + eui[4] = vlan_id & 0xff; + } else { + eui[3] = 0xff; + eui[4] = 0xfe; + } + eui[0] ^= 2; +} + static void update_gids_task(struct work_struct *work) { struct update_gid_work *gw = container_of(work, struct update_gid_work, work); @@ -1425,7 +1440,8 @@ free: } static int update_gid_table(struct mlx4_ib_dev *dev, int port, - union ib_gid *gid, int clear) + union ib_gid *gid, int clear, + int default_gid) { struct update_gid_work *work; int i; @@ -1434,26 +1450,31 @@ static int update_gid_table(struct mlx4_ib_dev *dev, int port, int found = -1; int max_gids; - max_gids = dev->dev->caps.gid_table_len[port]; - for (i = 0; i < max_gids; ++i) { - if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid, - sizeof(*gid))) - found = i; - - if (clear) { - if (found >= 0) { - need_update = 1; - dev->iboe.gid_table[port - 1][found] = zgid; - break; - } - } else { - if (found >= 0) - break; - - if (free < 0 && - !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, + if (default_gid) { + free = 0; + } else { + max_gids = dev->dev->caps.gid_table_len[port]; + for (i = 1; i < max_gids; ++i) { + if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid, sizeof(*gid))) - free = i; + found = i; + + if (clear) { + if (found >= 0) { + need_update = 1; + dev->iboe.gid_table[port - 1][found] = + zgid; + break; + } + } else { + if (found >= 0) + break; + + if (free < 0 && + !memcmp(&dev->iboe.gid_table[port - 1][i], + &zgid, sizeof(*gid))) + free = i; + } } } @@ -1478,6 +1499,13 @@ static int update_gid_table(struct mlx4_ib_dev *dev, int port, return 0; } +static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid) +{ + gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev); +} + + static int reset_gid_table(struct mlx4_ib_dev *dev) { struct update_gid_work *work; @@ -1502,6 +1530,12 @@ static int mlx4_ib_addr_event(int event, struct net_device *event_netdev, struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ? rdma_vlan_dev_real_dev(event_netdev) : event_netdev; + union ib_gid default_gid; + + mlx4_make_default_gid(real_dev, &default_gid); + + if (!memcmp(gid, &default_gid, sizeof(*gid))) + return 0; if (event != NETDEV_DOWN && event != NETDEV_UP) return 0; @@ -1520,7 +1554,7 @@ static int mlx4_ib_addr_event(int event, struct net_device *event_netdev, (!netif_is_bond_master(real_dev) && (real_dev == iboe->netdevs[port - 1]))) update_gid_table(ibdev, port, gid, - event == NETDEV_DOWN); + event == NETDEV_DOWN, 0); spin_unlock(&iboe->lock); return 0; @@ -1607,7 +1641,7 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev, /*ifa->ifa_address;*/ ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid); - update_gid_table(ibdev, port, &gid, 0); + update_gid_table(ibdev, port, &gid, 0, 0); } endfor_ifa(in_dev); in_dev_put(in_dev); @@ -1619,7 +1653,7 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev, read_lock_bh(&in6_dev->lock); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { pgid = (union ib_gid *)&ifp->addr; - update_gid_table(ibdev, port, pgid, 0); + update_gid_table(ibdev, port, pgid, 0, 0); } read_unlock_bh(&in6_dev->lock); in6_dev_put(in6_dev); @@ -1627,6 +1661,14 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev, #endif } +static void mlx4_ib_set_default_gid(struct mlx4_ib_dev *ibdev, + struct net_device *dev, u8 port) +{ + union ib_gid gid; + mlx4_make_default_gid(dev, &gid); + update_gid_table(ibdev, port, &gid, 0, 1); +} + static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) { struct net_device *dev; @@ -1660,7 +1702,9 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) struct net_device *curr_master; iboe->netdevs[port - 1] = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); - + if (iboe->netdevs[port - 1]) + mlx4_ib_set_default_gid(ibdev, + iboe->netdevs[port - 1], port); if (iboe->netdevs[port - 1] && netif_is_bond_slave(iboe->netdevs[port - 1])) { rtnl_lock(); -- cgit v1.2.3 From 4ce5a5744a2f5479e58c6788cbe3987b8071b62e Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 5 Feb 2014 15:12:59 +0200 Subject: IB/mlx4: Move rtnl locking to the right place On the one hand, the invocation of netdev_master_upper_dev_get() within mlx4_ib_scan_netdevs() must be done with rtnl lock held. On the other hand, it's wrong to call rtnl_lock() from within this function since it's also called by our netdev notifier callback. Therefore move the locking to mlx4_ib_add() so that both cases are covered. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw/mlx4') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b99d0ecc6a89..93f492f62997 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1707,10 +1707,8 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) iboe->netdevs[port - 1], port); if (iboe->netdevs[port - 1] && netif_is_bond_slave(iboe->netdevs[port - 1])) { - rtnl_lock(); iboe->masters[port - 1] = netdev_master_upper_dev_get( iboe->netdevs[port - 1]); - rtnl_unlock(); } curr_master = iboe->masters[port - 1]; @@ -2100,7 +2098,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) } } #endif + rtnl_lock(); mlx4_ib_scan_netdevs(ibdev); + rtnl_unlock(); mlx4_ib_init_gid_table(ibdev); } -- cgit v1.2.3 From ddf8bd349115c2bc85a62e3d94018c9976ac72f7 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 5 Feb 2014 15:13:00 +0200 Subject: IB/mlx4: Do IBoE locking earlier when initializing the GID table Updating the GID table under IBoE requires read/write from/to shared data structures. These data structures are protected with the device iboe lock. The flows that modify the GID table start from 1. Initializing the GID table 2. NETDEV events 3. INET or INET6 events This patch makes sure that the flow of initializing the GID table is consistent with the other two flows w.r.t on what step the lock is taken. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw/mlx4') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 93f492f62997..3bafd0bebd4c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1570,7 +1570,6 @@ static u8 mlx4_ib_get_dev_port(struct net_device *dev, rdma_vlan_dev_real_dev(dev) : dev; iboe = &ibdev->iboe; - spin_lock(&iboe->lock); for (port = 1; port <= MLX4_MAX_PORTS; ++port) if ((netif_is_bond_master(real_dev) && @@ -1579,8 +1578,6 @@ static u8 mlx4_ib_get_dev_port(struct net_device *dev, (real_dev == iboe->netdevs[port - 1]))) break; - spin_unlock(&iboe->lock); - if ((port == 0) || (port > MLX4_MAX_PORTS)) return 0; else @@ -1672,11 +1669,13 @@ static void mlx4_ib_set_default_gid(struct mlx4_ib_dev *ibdev, static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) { struct net_device *dev; + struct mlx4_ib_iboe *iboe = &ibdev->iboe; if (reset_gid_table(ibdev)) return -1; read_lock(&dev_base_lock); + spin_lock(&iboe->lock); for_each_netdev(&init_net, dev) { u8 port = mlx4_ib_get_dev_port(dev, ibdev); @@ -1684,6 +1683,7 @@ static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) mlx4_ib_get_dev_addr(dev, ibdev, port); } + spin_unlock(&iboe->lock); read_unlock(&dev_base_lock); return 0; -- cgit v1.2.3 From 5071456fe244e409adcda7226e5f0b5d1b879fd3 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 5 Feb 2014 15:13:01 +0200 Subject: IB/mlx4: Do IBoE GID table resets per-port The IBoE code used to reset the GID table did it for all Ethernet ports of the device. Since the whole architecture of generating GIDs and responding to events is port-based, this is inefficient and can lead to wrong content in the GID table. Change the reset flow to be per-port. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband/hw/mlx4') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 3bafd0bebd4c..8e10630561c1 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1408,7 +1408,6 @@ static void reset_gids_task(struct work_struct *work) struct mlx4_cmd_mailbox *mailbox; union ib_gid *gids; int err; - int i; struct mlx4_dev *dev = gw->dev->dev; mailbox = mlx4_alloc_cmd_mailbox(dev); @@ -1420,18 +1419,16 @@ static void reset_gids_task(struct work_struct *work) gids = mailbox->buf; memcpy(gids, gw->gids, sizeof(gw->gids)); - for (i = 1; i < gw->dev->num_ports + 1; i++) { - if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, i) == - IB_LINK_LAYER_ETHERNET) { - err = mlx4_cmd(dev, mailbox->dma, - MLX4_SET_PORT_GID_TABLE << 8 | i, - 1, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, - MLX4_CMD_WRAPPED); - if (err) - pr_warn(KERN_WARNING - "set port %d command failed\n", i); - } + if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) == + IB_LINK_LAYER_ETHERNET) { + err = mlx4_cmd(dev, mailbox->dma, + MLX4_SET_PORT_GID_TABLE << 8 | gw->port, + 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + pr_warn(KERN_WARNING + "set port %d command failed\n", gw->port); } mlx4_free_cmd_mailbox(dev, mailbox); @@ -1506,7 +1503,7 @@ static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid) } -static int reset_gid_table(struct mlx4_ib_dev *dev) +static int reset_gid_table(struct mlx4_ib_dev *dev, u8 port) { struct update_gid_work *work; @@ -1514,10 +1511,12 @@ static int reset_gid_table(struct mlx4_ib_dev *dev) work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return -ENOMEM; - memset(dev->iboe.gid_table, 0, sizeof(dev->iboe.gid_table)); + + memset(dev->iboe.gid_table[port - 1], 0, sizeof(work->gids)); memset(work->gids, 0, sizeof(work->gids)); INIT_WORK(&work->work, reset_gids_task); work->dev = dev; + work->port = port; queue_work(wq, &work->work); return 0; } @@ -1670,9 +1669,11 @@ static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) { struct net_device *dev; struct mlx4_ib_iboe *iboe = &ibdev->iboe; + int i; - if (reset_gid_table(ibdev)) - return -1; + for (i = 1; i <= ibdev->num_ports; ++i) + if (reset_gid_table(ibdev, i)) + return -1; read_lock(&dev_base_lock); spin_lock(&iboe->lock); -- cgit v1.2.3 From ad4885d279b63c65347220236d07669a2f59634b Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 5 Feb 2014 15:13:02 +0200 Subject: IB/mlx4: Build the port IBoE GID table properly under bonding When scanning netdevices we need to check a few more conditions and cases to build the IBoE GID table properly. For example, under bonding we must make sure that when a port is down, the bond IP address isn't programmed as a GID, since doing so will cause failure with IB core flows that selects ports by GID. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 41 +++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw/mlx4') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8e10630561c1..fdb5c7c9e127 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1507,7 +1507,6 @@ static int reset_gid_table(struct mlx4_ib_dev *dev, u8 port) { struct update_gid_work *work; - work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return -ENOMEM; @@ -1699,25 +1698,57 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) spin_lock(&iboe->lock); mlx4_foreach_ib_transport_port(port, ibdev->dev) { + enum ib_port_state port_state = IB_PORT_NOP; struct net_device *old_master = iboe->masters[port - 1]; + struct net_device *curr_netdev; struct net_device *curr_master; + iboe->netdevs[port - 1] = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); if (iboe->netdevs[port - 1]) mlx4_ib_set_default_gid(ibdev, iboe->netdevs[port - 1], port); + curr_netdev = iboe->netdevs[port - 1]; + if (iboe->netdevs[port - 1] && netif_is_bond_slave(iboe->netdevs[port - 1])) { iboe->masters[port - 1] = netdev_master_upper_dev_get( iboe->netdevs[port - 1]); + } else { + iboe->masters[port - 1] = NULL; } curr_master = iboe->masters[port - 1]; + if (curr_netdev) { + port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + mlx4_ib_set_default_gid(ibdev, curr_netdev, port); + } else { + reset_gid_table(ibdev, port); + } + /* if using bonding/team and a slave port is down, we don't the bond IP + * based gids in the table since flows that select port by gid may get + * the down port. + */ + if (curr_master && (port_state == IB_PORT_DOWN)) { + reset_gid_table(ibdev, port); + mlx4_ib_set_default_gid(ibdev, curr_netdev, port); + } /* if bonding is used it is possible that we add it to masters - only after IP address is assigned to the net bonding - interface */ - if (curr_master && (old_master != curr_master)) + * only after IP address is assigned to the net bonding + * interface. + */ + if (curr_master && (old_master != curr_master)) { + reset_gid_table(ibdev, port); + mlx4_ib_set_default_gid(ibdev, curr_netdev, port); mlx4_ib_get_dev_addr(curr_master, ibdev, port); + } + + if (!curr_master && (old_master != curr_master)) { + reset_gid_table(ibdev, port); + mlx4_ib_set_default_gid(ibdev, curr_netdev, port); + mlx4_ib_get_dev_addr(curr_netdev, ibdev, port); + } } spin_unlock(&iboe->lock); @@ -2099,6 +2130,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) } } #endif + for (i = 1 ; i <= ibdev->num_ports ; ++i) + reset_gid_table(ibdev, i); rtnl_lock(); mlx4_ib_scan_netdevs(ibdev); rtnl_unlock(); -- cgit v1.2.3