aboutsummaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/health.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 09:41:05 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 09:41:05 -0800
commitb0f85fa11aefc4f3e03306b4cd47f113bd57dcba (patch)
tree1333d36d99fde3f97210795941fc246f0ad08a75 /drivers/net/ethernet/mellanox/mlx5/core/health.c
parentccc9d4a6d640cbde05d519edeb727881646cf71b (diff)
parentf32bfb9a8ca083f8d148ea90ae5ba66f4831836e (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: Changes of note: 1) Allow to schedule ICMP packets in IPVS, from Alex Gartrell. 2) Provide FIB table ID in ipv4 route dumps just as ipv6 does, from David Ahern. 3) Allow the user to ask for the statistics to be filtered out of ipv4/ipv6 address netlink dumps. From Sowmini Varadhan. 4) More work to pass the network namespace context around deep into various packet path APIs, starting with the netfilter hooks. From Eric W Biederman. 5) Add layer 2 TX/RX checksum offloading to qeth driver, from Thomas Richter. 6) Use usec resolution for SYN/ACK RTTs in TCP, from Yuchung Cheng. 7) Support Very High Throughput in wireless MESH code, from Bob Copeland. 8) Allow setting the ageing_time in switchdev/rocker. From Scott Feldman. 9) Properly autoload L2TP type modules, from Stephen Hemminger. 10) Fix and enable offload features by default in 8139cp driver, from David Woodhouse. 11) Support both ipv4 and ipv6 sockets in a single vxlan device, from Jiri Benc. 12) Fix CWND limiting of thin streams in TCP, from Bendik Rønning Opstad. 13) Fix IPSEC flowcache overflows on large systems, from Steffen Klassert. 14) Convert bridging to track VLANs using rhashtable entries rather than a bitmap. From Nikolay Aleksandrov. 15) Make TCP listener handling completely lockless, this is a major accomplishment. Incoming request sockets now live in the established hash table just like any other socket too. From Eric Dumazet. 15) Provide more bridging attributes to netlink, from Nikolay Aleksandrov. 16) Use hash based algorithm for ipv4 multipath routing, this was very long overdue. From Peter Nørlund. 17) Several y2038 cures, mostly avoiding timespec. From Arnd Bergmann. 18) Allow non-root execution of EBPF programs, from Alexei Starovoitov. 19) Support SO_INCOMING_CPU as setsockopt, from Eric Dumazet. This influences the port binding selection logic used by SO_REUSEPORT. 20) Add ipv6 support to VRF, from David Ahern. 21) Add support for Mellanox Spectrum switch ASIC, from Jiri Pirko. 22) Add rtl8xxxu Realtek wireless driver, from Jes Sorensen. 23) Implement RACK loss recovery in TCP, from Yuchung Cheng. 24) Support multipath routes in MPLS, from Roopa Prabhu. 25) Fix POLLOUT notification for listening sockets in AF_UNIX, from Eric Dumazet. 26) Add new QED Qlogic river, from Yuval Mintz, Manish Chopra, and Sudarsana Kalluru. 27) Don't fetch timestamps on AF_UNIX sockets, from Hannes Frederic Sowa. 28) Support ipv6 geneve tunnels, from John W Linville. 29) Add flood control support to switchdev layer, from Ido Schimmel. 30) Fix CHECKSUM_PARTIAL handling of potentially fragmented frames, from Hannes Frederic Sowa. 31) Support persistent maps and progs in bpf, from Daniel Borkmann. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1790 commits) sh_eth: use DMA barriers switchdev: respect SKIP_EOPNOTSUPP flag in case there is no recursion net: sched: kill dead code in sch_choke.c irda: Delete an unnecessary check before the function call "irlmp_unregister_service" net: dsa: mv88e6xxx: include DSA ports in VLANs net: dsa: mv88e6xxx: disable SA learning for DSA and CPU ports net/core: fix for_each_netdev_feature vlan: Invoke driver vlan hooks only if device is present arcnet/com20020: add LEDS_CLASS dependency bpf, verifier: annotate verbose printer with __printf dp83640: Only wait for timestamps for packets with timestamping enabled. ptp: Change ptp_class to a proper bitmask dp83640: Prune rx timestamp list before reading from it dp83640: Delay scheduled work. dp83640: Include hash in timestamp/packet matching ipv6: fix tunnel error handling net/mlx5e: Fix LSO vlan insertion net/mlx5e: Re-eanble client vlan TX acceleration net/mlx5e: Return error in case mlx5e_set_features() fails net/mlx5e: Don't allow more than max supported channels ...
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/health.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c224
1 files changed, 173 insertions, 51 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 292d76f2a904..f5deb642d0d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -34,6 +34,7 @@
#include <linux/module.h>
#include <linux/random.h>
#include <linux/vmalloc.h>
+#include <linux/hardirq.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h>
#include "mlx5_core.h"
@@ -46,39 +47,113 @@ enum {
enum {
MLX5_HEALTH_SYNDR_FW_ERR = 0x1,
MLX5_HEALTH_SYNDR_IRISC_ERR = 0x7,
+ MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR = 0x8,
MLX5_HEALTH_SYNDR_CRC_ERR = 0x9,
MLX5_HEALTH_SYNDR_FETCH_PCI_ERR = 0xa,
MLX5_HEALTH_SYNDR_HW_FTL_ERR = 0xb,
MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR = 0xc,
MLX5_HEALTH_SYNDR_EQ_ERR = 0xd,
+ MLX5_HEALTH_SYNDR_EQ_INV = 0xe,
MLX5_HEALTH_SYNDR_FFSER_ERR = 0xf,
+ MLX5_HEALTH_SYNDR_HIGH_TEMP = 0x10
};
-static DEFINE_SPINLOCK(health_lock);
-static LIST_HEAD(health_list);
-static struct work_struct health_work;
+enum {
+ MLX5_NIC_IFC_FULL = 0,
+ MLX5_NIC_IFC_DISABLED = 1,
+ MLX5_NIC_IFC_NO_DRAM_NIC = 2
+};
-static void health_care(struct work_struct *work)
+static u8 get_nic_interface(struct mlx5_core_dev *dev)
{
- struct mlx5_core_health *health, *n;
- struct mlx5_core_dev *dev;
- struct mlx5_priv *priv;
- LIST_HEAD(tlist);
+ return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3;
+}
+
+static void trigger_cmd_completions(struct mlx5_core_dev *dev)
+{
+ unsigned long flags;
+ u64 vector;
- spin_lock_irq(&health_lock);
- list_splice_init(&health_list, &tlist);
+ /* wait for pending handlers to complete */
+ synchronize_irq(dev->priv.msix_arr[MLX5_EQ_VEC_CMD].vector);
+ spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
+ vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
+ if (!vector)
+ goto no_trig;
+
+ vector |= MLX5_TRIGGERED_CMD_COMP;
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+
+ mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
+ mlx5_cmd_comp_handler(dev, vector);
+ return;
+
+no_trig:
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+}
+
+static int in_fatal(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
- spin_unlock_irq(&health_lock);
+ if (get_nic_interface(dev) == MLX5_NIC_IFC_DISABLED)
+ return 1;
- list_for_each_entry_safe(health, n, &tlist, list) {
- priv = container_of(health, struct mlx5_priv, health);
- dev = container_of(priv, struct mlx5_core_dev, priv);
- mlx5_core_warn(dev, "handling bad device here\n");
- /* nothing yet */
- spin_lock_irq(&health_lock);
- list_del_init(&health->list);
- spin_unlock_irq(&health_lock);
+ if (ioread32be(&h->fw_ver) == 0xffffffff)
+ return 1;
+
+ return 0;
+}
+
+void mlx5_enter_error_state(struct mlx5_core_dev *dev)
+{
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ return;
+
+ mlx5_core_err(dev, "start\n");
+ if (pci_channel_offline(dev->pdev) || in_fatal(dev))
+ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+
+ mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0);
+ mlx5_core_err(dev, "end\n");
+}
+
+static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
+{
+ u8 nic_interface = get_nic_interface(dev);
+
+ switch (nic_interface) {
+ case MLX5_NIC_IFC_FULL:
+ mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n");
+ break;
+
+ case MLX5_NIC_IFC_DISABLED:
+ mlx5_core_warn(dev, "starting teardown\n");
+ break;
+
+ case MLX5_NIC_IFC_NO_DRAM_NIC:
+ mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
+ break;
+ default:
+ mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
+ nic_interface);
}
+
+ mlx5_disable_device(dev);
+}
+
+static void health_care(struct work_struct *work)
+{
+ struct mlx5_core_health *health;
+ struct mlx5_core_dev *dev;
+ struct mlx5_priv *priv;
+
+ health = container_of(work, struct mlx5_core_health, work);
+ priv = container_of(health, struct mlx5_priv, health);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+ mlx5_core_warn(dev, "handling bad device here\n");
+ mlx5_handle_bad_state(dev);
}
static const char *hsynd_str(u8 synd)
@@ -88,6 +163,8 @@ static const char *hsynd_str(u8 synd)
return "firmware internal error";
case MLX5_HEALTH_SYNDR_IRISC_ERR:
return "irisc not responding";
+ case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR:
+ return "unrecoverable hardware error";
case MLX5_HEALTH_SYNDR_CRC_ERR:
return "firmware CRC error";
case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR:
@@ -98,48 +175,81 @@ static const char *hsynd_str(u8 synd)
return "async EQ buffer overrun";
case MLX5_HEALTH_SYNDR_EQ_ERR:
return "EQ error";
+ case MLX5_HEALTH_SYNDR_EQ_INV:
+ return "Invalid EQ refrenced";
case MLX5_HEALTH_SYNDR_FFSER_ERR:
return "FFSER error";
+ case MLX5_HEALTH_SYNDR_HIGH_TEMP:
+ return "High temprature";
default:
return "unrecognized error";
}
}
-static u16 read_be16(__be16 __iomem *p)
+static u16 get_maj(u32 fw)
{
- return swab16(readl((__force u16 __iomem *) p));
+ return fw >> 28;
}
-static u32 read_be32(__be32 __iomem *p)
+static u16 get_min(u32 fw)
{
- return swab32(readl((__force u32 __iomem *) p));
+ return fw >> 16 & 0xfff;
+}
+
+static u16 get_sub(u32 fw)
+{
+ return fw & 0xffff;
}
static void print_health_info(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
struct health_buffer __iomem *h = health->health;
+ char fw_str[18];
+ u32 fw;
int i;
+ /* If the syndrom is 0, the device is OK and no need to print buffer */
+ if (!ioread8(&h->synd))
+ return;
+
for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
- pr_info("assert_var[%d] 0x%08x\n", i, read_be32(h->assert_var + i));
+ dev_err(&dev->pdev->dev, "assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i));
+
+ dev_err(&dev->pdev->dev, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr));
+ dev_err(&dev->pdev->dev, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra));
+ fw = ioread32be(&h->fw_ver);
+ sprintf(fw_str, "%d.%d.%d", get_maj(fw), get_min(fw), get_sub(fw));
+ dev_err(&dev->pdev->dev, "fw_ver %s\n", fw_str);
+ dev_err(&dev->pdev->dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
+ dev_err(&dev->pdev->dev, "irisc_index %d\n", ioread8(&h->irisc_index));
+ dev_err(&dev->pdev->dev, "synd 0x%x: %s\n", ioread8(&h->synd), hsynd_str(ioread8(&h->synd)));
+ dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
+}
+
+static unsigned long get_next_poll_jiffies(void)
+{
+ unsigned long next;
- pr_info("assert_exit_ptr 0x%08x\n", read_be32(&h->assert_exit_ptr));
- pr_info("assert_callra 0x%08x\n", read_be32(&h->assert_callra));
- pr_info("fw_ver 0x%08x\n", read_be32(&h->fw_ver));
- pr_info("hw_id 0x%08x\n", read_be32(&h->hw_id));
- pr_info("irisc_index %d\n", readb(&h->irisc_index));
- pr_info("synd 0x%x: %s\n", readb(&h->synd), hsynd_str(readb(&h->synd)));
- pr_info("ext_sync 0x%04x\n", read_be16(&h->ext_sync));
+ get_random_bytes(&next, sizeof(next));
+ next %= HZ;
+ next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
+
+ return next;
}
static void poll_health(unsigned long data)
{
struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data;
struct mlx5_core_health *health = &dev->priv.health;
- unsigned long next;
u32 count;
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ trigger_cmd_completions(dev);
+ mod_timer(&health->timer, get_next_poll_jiffies());
+ return;
+ }
+
count = ioread32be(health->health_counter);
if (count == health->prev)
++health->miss_counter;
@@ -148,18 +258,16 @@ static void poll_health(unsigned long data)
health->prev = count;
if (health->miss_counter == MAX_MISSES) {
- mlx5_core_err(dev, "device's health compromised\n");
+ dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n");
print_health_info(dev);
- spin_lock_irq(&health_lock);
- list_add_tail(&health->list, &health_list);
- spin_unlock_irq(&health_lock);
-
- queue_work(mlx5_core_wq, &health_work);
} else {
- get_random_bytes(&next, sizeof(next));
- next %= HZ;
- next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
- mod_timer(&health->timer, next);
+ mod_timer(&health->timer, get_next_poll_jiffies());
+ }
+
+ if (in_fatal(dev) && !health->sick) {
+ health->sick = true;
+ print_health_info(dev);
+ queue_work(health->wq, &health->work);
}
}
@@ -167,7 +275,6 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
- INIT_LIST_HEAD(&health->list);
init_timer(&health->timer);
health->health = &dev->iseg->health;
health->health_counter = &dev->iseg->health_counter;
@@ -183,18 +290,33 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev)
struct mlx5_core_health *health = &dev->priv.health;
del_timer_sync(&health->timer);
-
- spin_lock_irq(&health_lock);
- if (!list_empty(&health->list))
- list_del_init(&health->list);
- spin_unlock_irq(&health_lock);
}
-void mlx5_health_cleanup(void)
+void mlx5_health_cleanup(struct mlx5_core_dev *dev)
{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ destroy_workqueue(health->wq);
}
-void __init mlx5_health_init(void)
+int mlx5_health_init(struct mlx5_core_dev *dev)
{
- INIT_WORK(&health_work, health_care);
+ struct mlx5_core_health *health;
+ char *name;
+
+ health = &dev->priv.health;
+ name = kmalloc(64, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ strcpy(name, "mlx5_health");
+ strcat(name, dev_name(&dev->pdev->dev));
+ health->wq = create_singlethread_workqueue(name);
+ kfree(name);
+ if (!health->wq)
+ return -ENOMEM;
+
+ INIT_WORK(&health->work, health_care);
+
+ return 0;
}