aboutsummaryrefslogtreecommitdiff
path: root/drivers/misc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/Kconfig2
-rw-r--r--drivers/misc/ad525x_dpot-i2c.c1
-rw-r--r--drivers/misc/genwqe/card_base.h2
-rw-r--r--drivers/misc/genwqe/card_ddcb.c2
-rw-r--r--drivers/misc/genwqe/card_dev.c4
-rw-r--r--drivers/misc/genwqe/card_utils.c5
-rw-r--r--drivers/misc/hpilo.c6
-rw-r--r--drivers/misc/kgdbts.c10
-rw-r--r--drivers/misc/lkdtm.c8
-rw-r--r--drivers/misc/mei/amthif.c2
-rw-r--r--drivers/misc/mei/bus-fixup.c4
-rw-r--r--drivers/misc/mei/bus.c218
-rw-r--r--drivers/misc/mei/client.h12
-rw-r--r--drivers/misc/mei/debugfs.c2
-rw-r--r--drivers/misc/mei/hbm.c6
-rw-r--r--drivers/misc/mei/hw-me.c4
-rw-r--r--drivers/misc/mei/init.c4
-rw-r--r--drivers/misc/mei/interrupt.c4
-rw-r--r--drivers/misc/mei/mei_dev.h19
-rw-r--r--drivers/misc/mic/Kconfig25
-rw-r--r--drivers/misc/mic/Makefile2
-rw-r--r--drivers/misc/mic/bus/Makefile1
-rw-r--r--drivers/misc/mic/bus/cosm_bus.c141
-rw-r--r--drivers/misc/mic/bus/cosm_bus.h134
-rw-r--r--drivers/misc/mic/bus/mic_bus.c24
-rw-r--r--drivers/misc/mic/bus/scif_bus.c9
-rw-r--r--drivers/misc/mic/bus/scif_bus.h6
-rw-r--r--drivers/misc/mic/card/mic_device.c88
-rw-r--r--drivers/misc/mic/card/mic_x100.c2
-rw-r--r--drivers/misc/mic/common/mic_dev.h13
-rw-r--r--drivers/misc/mic/cosm/Makefile10
-rw-r--r--drivers/misc/mic/cosm/cosm_debugfs.c156
-rw-r--r--drivers/misc/mic/cosm/cosm_main.c388
-rw-r--r--drivers/misc/mic/cosm/cosm_main.h70
-rw-r--r--drivers/misc/mic/cosm/cosm_scif_server.c405
-rw-r--r--drivers/misc/mic/cosm/cosm_sysfs.c461
-rw-r--r--drivers/misc/mic/cosm_client/Makefile7
-rw-r--r--drivers/misc/mic/cosm_client/cosm_scif_client.c275
-rw-r--r--drivers/misc/mic/host/Makefile1
-rw-r--r--drivers/misc/mic/host/mic_boot.c317
-rw-r--r--drivers/misc/mic/host/mic_debugfs.c114
-rw-r--r--drivers/misc/mic/host/mic_device.h88
-rw-r--r--drivers/misc/mic/host/mic_fops.c4
-rw-r--r--drivers/misc/mic/host/mic_intr.c46
-rw-r--r--drivers/misc/mic/host/mic_main.c223
-rw-r--r--drivers/misc/mic/host/mic_smpt.c30
-rw-r--r--drivers/misc/mic/host/mic_sysfs.c459
-rw-r--r--drivers/misc/mic/host/mic_virtio.c17
-rw-r--r--drivers/misc/mic/host/mic_virtio.h2
-rw-r--r--drivers/misc/mic/host/mic_x100.c46
-rw-r--r--drivers/misc/mic/scif/Makefile5
-rw-r--r--drivers/misc/mic/scif/scif_api.c234
-rw-r--r--drivers/misc/mic/scif/scif_debugfs.c85
-rw-r--r--drivers/misc/mic/scif/scif_dma.c1979
-rw-r--r--drivers/misc/mic/scif/scif_epd.c26
-rw-r--r--drivers/misc/mic/scif/scif_epd.h50
-rw-r--r--drivers/misc/mic/scif/scif_fd.c178
-rw-r--r--drivers/misc/mic/scif/scif_fence.c771
-rw-r--r--drivers/misc/mic/scif/scif_main.c111
-rw-r--r--drivers/misc/mic/scif/scif_main.h37
-rw-r--r--drivers/misc/mic/scif/scif_map.h25
-rw-r--r--drivers/misc/mic/scif/scif_mmap.c699
-rw-r--r--drivers/misc/mic/scif/scif_nm.c20
-rw-r--r--drivers/misc/mic/scif/scif_nodeqp.c149
-rw-r--r--drivers/misc/mic/scif/scif_nodeqp.h42
-rw-r--r--drivers/misc/mic/scif/scif_peer_bus.c179
-rw-r--r--drivers/misc/mic/scif/scif_peer_bus.h42
-rw-r--r--drivers/misc/mic/scif/scif_rma.c1775
-rw-r--r--drivers/misc/mic/scif/scif_rma.h464
-rw-r--r--drivers/misc/mic/scif/scif_rma_list.c291
-rw-r--r--drivers/misc/mic/scif/scif_rma_list.h57
-rw-r--r--drivers/misc/sgi-gru/gruhandles.c6
-rw-r--r--drivers/misc/sgi-gru/gruhandles.h1
-rw-r--r--drivers/misc/sgi-gru/grukdump.c16
-rw-r--r--drivers/misc/sgi-gru/grukservices.c15
-rw-r--r--drivers/misc/sgi-gru/grumain.c4
-rw-r--r--drivers/misc/sgi-gru/grutlbpurge.c25
-rw-r--r--drivers/misc/sram.c196
-rw-r--r--drivers/misc/ti-st/st_core.c18
-rw-r--r--drivers/misc/vmw_balloon.c843
-rw-r--r--drivers/misc/vmw_vmci/vmci_datagram.c3
81 files changed, 10375 insertions, 1850 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index ccccc2943f2f..22892c701c63 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -414,7 +414,7 @@ config TI_DAC7512
config VMWARE_BALLOON
tristate "VMware Balloon Driver"
- depends on X86 && HYPERVISOR_GUEST
+ depends on VMWARE_VMCI && X86 && HYPERVISOR_GUEST
help
This is VMware physical memory management driver which acts
like a "balloon" that can be inflated to reclaim physical pages
diff --git a/drivers/misc/ad525x_dpot-i2c.c b/drivers/misc/ad525x_dpot-i2c.c
index d11187d36ddd..4f832002d116 100644
--- a/drivers/misc/ad525x_dpot-i2c.c
+++ b/drivers/misc/ad525x_dpot-i2c.c
@@ -117,4 +117,3 @@ module_i2c_driver(ad_dpot_i2c_driver);
MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
MODULE_DESCRIPTION("digital potentiometer I2C bus driver");
MODULE_LICENSE("GPL");
-MODULE_ALIAS("i2c:ad_dpot");
diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h
index e7353449874b..cb851c14ca4b 100644
--- a/drivers/misc/genwqe/card_base.h
+++ b/drivers/misc/genwqe/card_base.h
@@ -514,7 +514,7 @@ int __genwqe_execute_ddcb(struct genwqe_dev *cd,
/**
* __genwqe_execute_raw_ddcb() - Execute DDCB request without addr translation
*
- * This version will not do address translation or any modifcation of
+ * This version will not do address translation or any modification of
* the DDCB data. It is used e.g. for the MoveFlash DDCB which is
* entirely prepared by the driver itself. That means the appropriate
* DMA addresses are already in the DDCB and do not need any
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c
index 6d51e5f08664..353ee0cc733d 100644
--- a/drivers/misc/genwqe/card_ddcb.c
+++ b/drivers/misc/genwqe/card_ddcb.c
@@ -203,7 +203,7 @@ struct genwqe_ddcb_cmd *ddcb_requ_alloc(void)
{
struct ddcb_requ *req;
- req = kzalloc(sizeof(*req), GFP_ATOMIC);
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
if (!req)
return NULL;
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
index 70e62d6a3231..7f1b282d7d96 100644
--- a/drivers/misc/genwqe/card_dev.c
+++ b/drivers/misc/genwqe/card_dev.c
@@ -449,7 +449,7 @@ static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma)
if (get_order(vsize) > MAX_ORDER)
return -ENOMEM;
- dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC);
+ dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL);
if (dma_map == NULL)
return -ENOMEM;
@@ -785,7 +785,7 @@ static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
map_addr = (m->addr & PAGE_MASK);
map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
- dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC);
+ dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL);
if (dma_map == NULL)
return -ENOMEM;
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c
index 1ca94e6fa8fb..222367cc8c81 100644
--- a/drivers/misc/genwqe/card_utils.c
+++ b/drivers/misc/genwqe/card_utils.c
@@ -220,7 +220,8 @@ void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
if (get_order(size) > MAX_ORDER)
return NULL;
- return pci_alloc_consistent(cd->pci_dev, size, dma_handle);
+ return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle,
+ GFP_KERNEL);
}
void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
@@ -229,7 +230,7 @@ void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
if (vaddr == NULL)
return;
- pci_free_consistent(cd->pci_dev, size, vaddr, dma_handle);
+ dma_free_coherent(&cd->pci_dev->dev, size, vaddr, dma_handle);
}
static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list,
diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c
index b83e3ca12a41..d6a901cd4222 100644
--- a/drivers/misc/hpilo.c
+++ b/drivers/misc/hpilo.c
@@ -2,7 +2,7 @@
* Driver for the HP iLO management processor.
*
* Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
- * David Altobelli <david.altobelli@hp.com>
+ * David Altobelli <david.altobelli@hpe.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -902,11 +902,11 @@ static void __exit ilo_exit(void)
MODULE_VERSION("1.4.1");
MODULE_ALIAS(ILO_NAME);
MODULE_DESCRIPTION(ILO_NAME);
-MODULE_AUTHOR("David Altobelli <david.altobelli@hp.com>");
+MODULE_AUTHOR("David Altobelli <david.altobelli@hpe.com>");
MODULE_LICENSE("GPL v2");
module_param(max_ccb, uint, 0444);
-MODULE_PARM_DESC(max_ccb, "Maximum number of HP iLO channels to attach (16)");
+MODULE_PARM_DESC(max_ccb, "Maximum number of HP iLO channels to attach (8-24)(default=16)");
module_init(ilo_init);
module_exit(ilo_exit);
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index 9a60bd4d3c49..99635dd9dbac 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -1112,6 +1112,7 @@ static int __init init_kgdbts(void)
return configure_kgdbts();
}
+device_initcall(init_kgdbts);
static int kgdbts_get_char(void)
{
@@ -1180,10 +1181,9 @@ static struct kgdb_io kgdbts_io_ops = {
.post_exception = kgdbts_post_exp_handler,
};
-module_init(init_kgdbts);
+/*
+ * not really modular, but the easiest way to keep compat with existing
+ * bootargs behaviour is to continue using module_param here.
+ */
module_param_call(kgdbts, param_set_kgdbts_var, param_get_string, &kps, 0644);
MODULE_PARM_DESC(kgdbts, "<A|V1|V2>[F#|S#][N#]");
-MODULE_DESCRIPTION("KGDB Test Suite");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Wind River Systems, Inc.");
-
diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
index b5abe34120b8..11fdadc68e53 100644
--- a/drivers/misc/lkdtm.c
+++ b/drivers/misc/lkdtm.c
@@ -472,7 +472,7 @@ static void lkdtm_do_action(enum ctype which)
break;
}
case CT_ACCESS_USERSPACE: {
- unsigned long user_addr, tmp;
+ unsigned long user_addr, tmp = 0;
unsigned long *ptr;
user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
@@ -483,6 +483,12 @@ static void lkdtm_do_action(enum ctype which)
return;
}
+ if (copy_to_user((void __user *)user_addr, &tmp, sizeof(tmp))) {
+ pr_warn("copy_to_user failed\n");
+ vm_munmap(user_addr, PAGE_SIZE);
+ return;
+ }
+
ptr = (unsigned long *)user_addr;
pr_info("attempting bad read at %p\n", ptr);
diff --git a/drivers/misc/mei/amthif.c b/drivers/misc/mei/amthif.c
index 1e42781592d8..cd0403f09267 100644
--- a/drivers/misc/mei/amthif.c
+++ b/drivers/misc/mei/amthif.c
@@ -458,7 +458,7 @@ void mei_amthif_complete(struct mei_device *dev, struct mei_cl_cb *cb)
return;
}
- if (dev->iamthif_canceled != 1) {
+ if (!dev->iamthif_canceled) {
dev->iamthif_state = MEI_IAMTHIF_READ_COMPLETE;
dev->iamthif_stall_timer = 0;
list_add_tail(&cb->list, &dev->amthif_rd_complete_list.list);
diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index 3e536ca85f7d..020de5919c21 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -285,11 +285,11 @@ static struct mei_fixup {
};
/**
- * mei_cl_dev_fixup - run fixup handlers
+ * mei_cldev_fixup - run fixup handlers
*
* @cldev: me client device
*/
-void mei_cl_dev_fixup(struct mei_cl_device *cldev)
+void mei_cl_bus_dev_fixup(struct mei_cl_device *cldev)
{
struct mei_fixup *f;
const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index eef1c6b46ad8..0b05aa938799 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -91,7 +91,7 @@ out:
* __mei_cl_recv - internal client receive (read)
*
* @cl: host client
- * @buf: buffer to send
+ * @buf: buffer to receive
* @length: buffer length
*
* Return: read size in bytes of < 0 on error
@@ -165,7 +165,7 @@ out:
}
/**
- * mei_cl_send - me device send (write)
+ * mei_cldev_send - me device send (write)
*
* @cldev: me client device
* @buf: buffer to send
@@ -173,7 +173,7 @@ out:
*
* Return: written size in bytes or < 0 on error
*/
-ssize_t mei_cl_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
+ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
{
struct mei_cl *cl = cldev->cl;
@@ -182,18 +182,18 @@ ssize_t mei_cl_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
return __mei_cl_send(cl, buf, length, 1);
}
-EXPORT_SYMBOL_GPL(mei_cl_send);
+EXPORT_SYMBOL_GPL(mei_cldev_send);
/**
- * mei_cl_recv - client receive (read)
+ * mei_cldev_recv - client receive (read)
*
* @cldev: me client device
- * @buf: buffer to send
+ * @buf: buffer to receive
* @length: buffer length
*
* Return: read size in bytes of < 0 on error
*/
-ssize_t mei_cl_recv(struct mei_cl_device *cldev, u8 *buf, size_t length)
+ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length)
{
struct mei_cl *cl = cldev->cl;
@@ -202,15 +202,15 @@ ssize_t mei_cl_recv(struct mei_cl_device *cldev, u8 *buf, size_t length)
return __mei_cl_recv(cl, buf, length);
}
-EXPORT_SYMBOL_GPL(mei_cl_recv);
+EXPORT_SYMBOL_GPL(mei_cldev_recv);
/**
- * mei_bus_event_work - dispatch rx event for a bus device
+ * mei_cl_bus_event_work - dispatch rx event for a bus device
* and schedule new work
*
* @work: work
*/
-static void mei_bus_event_work(struct work_struct *work)
+static void mei_cl_bus_event_work(struct work_struct *work)
{
struct mei_cl_device *cldev;
@@ -272,7 +272,7 @@ void mei_cl_bus_rx_event(struct mei_cl *cl)
}
/**
- * mei_cl_register_event_cb - register event callback
+ * mei_cldev_register_event_cb - register event callback
*
* @cldev: me client devices
* @event_cb: callback function
@@ -283,9 +283,9 @@ void mei_cl_bus_rx_event(struct mei_cl *cl)
* -EALREADY if an callback is already registered
* <0 on other errors
*/
-int mei_cl_register_event_cb(struct mei_cl_device *cldev,
- unsigned long events_mask,
- mei_cl_event_cb_t event_cb, void *context)
+int mei_cldev_register_event_cb(struct mei_cl_device *cldev,
+ unsigned long events_mask,
+ mei_cldev_event_cb_t event_cb, void *context)
{
int ret;
@@ -296,7 +296,7 @@ int mei_cl_register_event_cb(struct mei_cl_device *cldev,
cldev->events_mask = events_mask;
cldev->event_cb = event_cb;
cldev->event_context = context;
- INIT_WORK(&cldev->event_work, mei_bus_event_work);
+ INIT_WORK(&cldev->event_work, mei_cl_bus_event_work);
if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) {
ret = mei_cl_read_start(cldev->cl, 0, NULL);
@@ -314,42 +314,81 @@ int mei_cl_register_event_cb(struct mei_cl_device *cldev,
return 0;
}
-EXPORT_SYMBOL_GPL(mei_cl_register_event_cb);
+EXPORT_SYMBOL_GPL(mei_cldev_register_event_cb);
/**
- * mei_cl_get_drvdata - driver data getter
+ * mei_cldev_get_drvdata - driver data getter
*
* @cldev: mei client device
*
* Return: driver private data
*/
-void *mei_cl_get_drvdata(const struct mei_cl_device *cldev)
+void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev)
{
return dev_get_drvdata(&cldev->dev);
}
-EXPORT_SYMBOL_GPL(mei_cl_get_drvdata);
+EXPORT_SYMBOL_GPL(mei_cldev_get_drvdata);
/**
- * mei_cl_set_drvdata - driver data setter
+ * mei_cldev_set_drvdata - driver data setter
*
* @cldev: mei client device
* @data: data to store
*/
-void mei_cl_set_drvdata(struct mei_cl_device *cldev, void *data)
+void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data)
{
dev_set_drvdata(&cldev->dev, data);
}
-EXPORT_SYMBOL_GPL(mei_cl_set_drvdata);
+EXPORT_SYMBOL_GPL(mei_cldev_set_drvdata);
+
+/**
+ * mei_cldev_uuid - return uuid of the underlying me client
+ *
+ * @cldev: mei client device
+ *
+ * Return: me client uuid
+ */
+const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev)
+{
+ return mei_me_cl_uuid(cldev->me_cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_uuid);
+
+/**
+ * mei_cldev_ver - return protocol version of the underlying me client
+ *
+ * @cldev: mei client device
+ *
+ * Return: me client protocol version
+ */
+u8 mei_cldev_ver(const struct mei_cl_device *cldev)
+{
+ return mei_me_cl_ver(cldev->me_cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_ver);
+
+/**
+ * mei_cldev_enabled - check whether the device is enabled
+ *
+ * @cldev: mei client device
+ *
+ * Return: true if me client is initialized and connected
+ */
+bool mei_cldev_enabled(struct mei_cl_device *cldev)
+{
+ return cldev->cl && mei_cl_is_connected(cldev->cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_enabled);
/**
- * mei_cl_enable_device - enable me client device
+ * mei_cldev_enable_device - enable me client device
* create connection with me client
*
* @cldev: me client device
*
* Return: 0 on success and < 0 on error
*/
-int mei_cl_enable_device(struct mei_cl_device *cldev)
+int mei_cldev_enable(struct mei_cl_device *cldev)
{
struct mei_device *bus = cldev->bus;
struct mei_cl *cl;
@@ -389,17 +428,17 @@ out:
return ret;
}
-EXPORT_SYMBOL_GPL(mei_cl_enable_device);
+EXPORT_SYMBOL_GPL(mei_cldev_enable);
/**
- * mei_cl_disable_device - disable me client device
+ * mei_cldev_disable - disable me client device
* disconnect form the me client
*
* @cldev: me client device
*
* Return: 0 on success and < 0 on error
*/
-int mei_cl_disable_device(struct mei_cl_device *cldev)
+int mei_cldev_disable(struct mei_cl_device *cldev)
{
struct mei_device *bus;
struct mei_cl *cl;
@@ -437,7 +476,7 @@ out:
mutex_unlock(&bus->device_lock);
return err;
}
-EXPORT_SYMBOL_GPL(mei_cl_disable_device);
+EXPORT_SYMBOL_GPL(mei_cldev_disable);
/**
* mei_cl_device_find - find matching entry in the driver id table
@@ -453,17 +492,26 @@ struct mei_cl_device_id *mei_cl_device_find(struct mei_cl_device *cldev,
{
const struct mei_cl_device_id *id;
const uuid_le *uuid;
+ u8 version;
+ bool match;
uuid = mei_me_cl_uuid(cldev->me_cl);
+ version = mei_me_cl_ver(cldev->me_cl);
id = cldrv->id_table;
while (uuid_le_cmp(NULL_UUID_LE, id->uuid)) {
if (!uuid_le_cmp(*uuid, id->uuid)) {
+ match = true;
- if (!cldev->name[0])
- return id;
+ if (cldev->name[0])
+ if (strncmp(cldev->name, id->name,
+ sizeof(id->name)))
+ match = false;
- if (!strncmp(cldev->name, id->name, sizeof(id->name)))
+ if (id->version != MEI_CL_VERSION_ANY)
+ if (id->version != version)
+ match = false;
+ if (match)
return id;
}
@@ -590,6 +638,19 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *a,
}
static DEVICE_ATTR_RO(uuid);
+static ssize_t version_show(struct device *dev, struct device_attribute *a,
+ char *buf)
+{
+ struct mei_cl_device *cldev = to_mei_cl_device(dev);
+ u8 version = mei_me_cl_ver(cldev->me_cl);
+ size_t len;
+
+ len = snprintf(buf, PAGE_SIZE, "%02X", version);
+
+ return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
+}
+static DEVICE_ATTR_RO(version);
+
static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
char *buf)
{
@@ -597,20 +658,19 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
size_t len;
- len = snprintf(buf, PAGE_SIZE, "mei:%s:" MEI_CL_UUID_FMT ":",
- cldev->name, MEI_CL_UUID_ARGS(uuid->b));
-
+ len = snprintf(buf, PAGE_SIZE, "mei:%s:%pUl:", cldev->name, uuid);
return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
}
static DEVICE_ATTR_RO(modalias);
-static struct attribute *mei_cl_dev_attrs[] = {
+static struct attribute *mei_cldev_attrs[] = {
&dev_attr_name.attr,
&dev_attr_uuid.attr,
+ &dev_attr_version.attr,
&dev_attr_modalias.attr,
NULL,
};
-ATTRIBUTE_GROUPS(mei_cl_dev);
+ATTRIBUTE_GROUPS(mei_cldev);
/**
* mei_cl_device_uevent - me client bus uevent handler
@@ -624,6 +684,10 @@ static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env)
{
struct mei_cl_device *cldev = to_mei_cl_device(dev);
const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+ u8 version = mei_me_cl_ver(cldev->me_cl);
+
+ if (add_uevent_var(env, "MEI_CL_VERSION=%d", version))
+ return -ENOMEM;
if (add_uevent_var(env, "MEI_CL_UUID=%pUl", uuid))
return -ENOMEM;
@@ -631,8 +695,8 @@ static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env)
if (add_uevent_var(env, "MEI_CL_NAME=%s", cldev->name))
return -ENOMEM;
- if (add_uevent_var(env, "MODALIAS=mei:%s:" MEI_CL_UUID_FMT ":",
- cldev->name, MEI_CL_UUID_ARGS(uuid->b)))
+ if (add_uevent_var(env, "MODALIAS=mei:%s:%pUl:%02X:",
+ cldev->name, uuid, version))
return -ENOMEM;
return 0;
@@ -640,7 +704,7 @@ static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env)
static struct bus_type mei_cl_bus_type = {
.name = "mei",
- .dev_groups = mei_cl_dev_groups,
+ .dev_groups = mei_cldev_groups,
.match = mei_cl_device_match,
.probe = mei_cl_device_probe,
.remove = mei_cl_device_remove,
@@ -661,7 +725,7 @@ static void mei_dev_bus_put(struct mei_device *bus)
put_device(bus->dev);
}
-static void mei_cl_dev_release(struct device *dev)
+static void mei_cl_bus_dev_release(struct device *dev)
{
struct mei_cl_device *cldev = to_mei_cl_device(dev);
@@ -674,19 +738,32 @@ static void mei_cl_dev_release(struct device *dev)
}
static struct device_type mei_cl_device_type = {
- .release = mei_cl_dev_release,
+ .release = mei_cl_bus_dev_release,
};
/**
- * mei_cl_dev_alloc - initialize and allocate mei client device
+ * mei_cl_bus_set_name - set device name for me client device
+ *
+ * @cldev: me client device
+ */
+static inline void mei_cl_bus_set_name(struct mei_cl_device *cldev)
+{
+ dev_set_name(&cldev->dev, "mei:%s:%pUl:%02X",
+ cldev->name,
+ mei_me_cl_uuid(cldev->me_cl),
+ mei_me_cl_ver(cldev->me_cl));
+}
+
+/**
+ * mei_cl_bus_dev_alloc - initialize and allocate mei client device
*
* @bus: mei device
* @me_cl: me client
*
* Return: allocated device structur or NULL on allocation failure
*/
-static struct mei_cl_device *mei_cl_dev_alloc(struct mei_device *bus,
- struct mei_me_client *me_cl)
+static struct mei_cl_device *mei_cl_bus_dev_alloc(struct mei_device *bus,
+ struct mei_me_client *me_cl)
{
struct mei_cl_device *cldev;
@@ -700,6 +777,7 @@ static struct mei_cl_device *mei_cl_dev_alloc(struct mei_device *bus,
cldev->dev.type = &mei_cl_device_type;
cldev->bus = mei_dev_bus_get(bus);
cldev->me_cl = mei_me_cl_get(me_cl);
+ mei_cl_bus_set_name(cldev);
cldev->is_added = 0;
INIT_LIST_HEAD(&cldev->bus_list);
@@ -715,15 +793,15 @@ static struct mei_cl_device *mei_cl_dev_alloc(struct mei_device *bus,
*
* Return: true if the device is eligible for enumeration
*/
-static bool mei_cl_dev_setup(struct mei_device *bus,
- struct mei_cl_device *cldev)
+static bool mei_cl_bus_dev_setup(struct mei_device *bus,
+ struct mei_cl_device *cldev)
{
cldev->do_match = 1;
- mei_cl_dev_fixup(cldev);
+ mei_cl_bus_dev_fixup(cldev);
+ /* the device name can change during fix up */
if (cldev->do_match)
- dev_set_name(&cldev->dev, "mei:%s:%pUl",
- cldev->name, mei_me_cl_uuid(cldev->me_cl));
+ mei_cl_bus_set_name(cldev);
return cldev->do_match == 1;
}
@@ -739,7 +817,9 @@ static int mei_cl_bus_dev_add(struct mei_cl_device *cldev)
{
int ret;
- dev_dbg(cldev->bus->dev, "adding %pUL\n", mei_me_cl_uuid(cldev->me_cl));
+ dev_dbg(cldev->bus->dev, "adding %pUL:%02X\n",
+ mei_me_cl_uuid(cldev->me_cl),
+ mei_me_cl_ver(cldev->me_cl));
ret = device_add(&cldev->dev);
if (!ret)
cldev->is_added = 1;
@@ -762,17 +842,20 @@ static void mei_cl_bus_dev_stop(struct mei_cl_device *cldev)
* mei_cl_bus_dev_destroy - destroy me client devices object
*
* @cldev: me client device
+ *
+ * Locking: called under "dev->cl_bus_lock" lock
*/
static void mei_cl_bus_dev_destroy(struct mei_cl_device *cldev)
{
+
+ WARN_ON(!mutex_is_locked(&cldev->bus->cl_bus_lock));
+
if (!cldev->is_added)
return;
device_del(&cldev->dev);
- mutex_lock(&cldev->bus->cl_bus_lock);
list_del_init(&cldev->bus_list);
- mutex_unlock(&cldev->bus->cl_bus_lock);
cldev->is_added = 0;
put_device(&cldev->dev);
@@ -798,35 +881,40 @@ void mei_cl_bus_remove_devices(struct mei_device *bus)
{
struct mei_cl_device *cldev, *next;
+ mutex_lock(&bus->cl_bus_lock);
list_for_each_entry_safe(cldev, next, &bus->device_list, bus_list)
mei_cl_bus_remove_device(cldev);
+ mutex_unlock(&bus->cl_bus_lock);
}
/**
- * mei_cl_dev_init - allocate and initializes an mei client devices
+ * mei_cl_bus_dev_init - allocate and initializes an mei client devices
* based on me client
*
* @bus: mei device
* @me_cl: me client
+ *
+ * Locking: called under "dev->cl_bus_lock" lock
*/
-static void mei_cl_dev_init(struct mei_device *bus, struct mei_me_client *me_cl)
+static void mei_cl_bus_dev_init(struct mei_device *bus,
+ struct mei_me_client *me_cl)
{
struct mei_cl_device *cldev;
+ WARN_ON(!mutex_is_locked(&bus->cl_bus_lock));
+
dev_dbg(bus->dev, "initializing %pUl", mei_me_cl_uuid(me_cl));
if (me_cl->bus_added)
return;
- cldev = mei_cl_dev_alloc(bus, me_cl);
+ cldev = mei_cl_bus_dev_alloc(bus, me_cl);
if (!cldev)
return;
- mutex_lock(&cldev->bus->cl_bus_lock);
me_cl->bus_added = true;
list_add_tail(&cldev->bus_list, &bus->device_list);
- mutex_unlock(&cldev->bus->cl_bus_lock);
}
@@ -841,12 +929,13 @@ void mei_cl_bus_rescan(struct mei_device *bus)
struct mei_cl_device *cldev, *n;
struct mei_me_client *me_cl;
+ mutex_lock(&bus->cl_bus_lock);
+
down_read(&bus->me_clients_rwsem);
list_for_each_entry(me_cl, &bus->me_clients, list)
- mei_cl_dev_init(bus, me_cl);
+ mei_cl_bus_dev_init(bus, me_cl);
up_read(&bus->me_clients_rwsem);
- mutex_lock(&bus->cl_bus_lock);
list_for_each_entry_safe(cldev, n, &bus->device_list, bus_list) {
if (!mei_me_cl_is_active(cldev->me_cl)) {
@@ -857,7 +946,7 @@ void mei_cl_bus_rescan(struct mei_device *bus)
if (cldev->is_added)
continue;
- if (mei_cl_dev_setup(bus, cldev))
+ if (mei_cl_bus_dev_setup(bus, cldev))
mei_cl_bus_dev_add(cldev);
else {
list_del_init(&cldev->bus_list);
@@ -869,7 +958,8 @@ void mei_cl_bus_rescan(struct mei_device *bus)
dev_dbg(bus->dev, "rescan end");
}
-int __mei_cl_driver_register(struct mei_cl_driver *cldrv, struct module *owner)
+int __mei_cldev_driver_register(struct mei_cl_driver *cldrv,
+ struct module *owner)
{
int err;
@@ -885,15 +975,15 @@ int __mei_cl_driver_register(struct mei_cl_driver *cldrv, struct module *owner)
return 0;
}
-EXPORT_SYMBOL_GPL(__mei_cl_driver_register);
+EXPORT_SYMBOL_GPL(__mei_cldev_driver_register);
-void mei_cl_driver_unregister(struct mei_cl_driver *cldrv)
+void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv)
{
driver_unregister(&cldrv->driver);
pr_debug("mei: driver [%s] unregistered\n", cldrv->driver.name);
}
-EXPORT_SYMBOL_GPL(mei_cl_driver_unregister);
+EXPORT_SYMBOL_GPL(mei_cldev_driver_unregister);
int __init mei_cl_bus_init(void)
diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h
index 1c7cad07d731..04e1aa39243f 100644
--- a/drivers/misc/mei/client.h
+++ b/drivers/misc/mei/client.h
@@ -68,6 +68,18 @@ static inline const uuid_le *mei_me_cl_uuid(const struct mei_me_client *me_cl)
return &me_cl->props.protocol_name;
}
+/**
+ * mei_me_cl_ver - return me client protocol version
+ *
+ * @me_cl: me client
+ *
+ * Return: me client protocol version
+ */
+static inline u8 mei_me_cl_ver(const struct mei_me_client *me_cl)
+{
+ return me_cl->props.protocol_version;
+}
+
/*
* MEI IO Functions
*/
diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c
index 8504dbeacd3b..a138d8a27ab5 100644
--- a/drivers/misc/mei/debugfs.c
+++ b/drivers/misc/mei/debugfs.c
@@ -215,7 +215,7 @@ int mei_dbgfs_register(struct mei_device *dev, const char *name)
f = debugfs_create_file("active", S_IRUSR, dir,
dev, &mei_dbgfs_fops_active);
if (!f) {
- dev_err(dev->dev, "meclients: registration failed\n");
+ dev_err(dev->dev, "active: registration failed\n");
goto err;
}
f = debugfs_create_file("devstate", S_IRUSR, dir,
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index 6d7c188fb65c..e7b7aad0999b 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -281,7 +281,7 @@ int mei_hbm_start_req(struct mei_device *dev)
return 0;
}
-/*
+/**
* mei_hbm_enum_clients_req - sends enumeration client request message.
*
* @dev: the device structure
@@ -314,7 +314,7 @@ static int mei_hbm_enum_clients_req(struct mei_device *dev)
return 0;
}
-/*
+/**
* mei_hbm_me_cl_add - add new me client to the list
*
* @dev: the device structure
@@ -569,7 +569,7 @@ static int mei_hbm_prop_req(struct mei_device *dev)
return 0;
}
-/*
+/**
* mei_hbm_pg - sends pg command
*
* @dev: the device structure
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index 65511d39d89b..25b1997a62cb 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -150,7 +150,7 @@ static inline u32 mei_me_d0i3c_read(const struct mei_device *dev)
u32 reg;
reg = mei_me_reg_read(to_me_hw(dev), H_D0I3C);
- trace_mei_reg_read(dev->dev, "H_D0I3C", H_CSR, reg);
+ trace_mei_reg_read(dev->dev, "H_D0I3C", H_D0I3C, reg);
return reg;
}
@@ -163,7 +163,7 @@ static inline u32 mei_me_d0i3c_read(const struct mei_device *dev)
*/
static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg)
{
- trace_mei_reg_write(dev->dev, "H_D0I3C", H_CSR, reg);
+ trace_mei_reg_write(dev->dev, "H_D0I3C", H_D0I3C, reg);
mei_me_reg_write(to_me_hw(dev), H_D0I3C, reg);
}
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index e374661652cd..3edafc8d3ad4 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -329,10 +329,10 @@ void mei_stop(struct mei_device *dev)
{
dev_dbg(dev->dev, "stopping the device.\n");
- mei_cancel_work(dev);
-
mei_cl_bus_remove_devices(dev);
+ mei_cancel_work(dev);
+
mutex_lock(&dev->device_lock);
mei_wd_stop(dev);
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
index c418d7888994..64b568a0268d 100644
--- a/drivers/misc/mei/interrupt.c
+++ b/drivers/misc/mei/interrupt.c
@@ -21,6 +21,7 @@
#include <linux/fs.h>
#include <linux/jiffies.h>
#include <linux/slab.h>
+#include <linux/pm_runtime.h>
#include <linux/mei.h>
@@ -147,6 +148,9 @@ int mei_cl_irq_read_msg(struct mei_cl *cl,
cb->read_time = jiffies;
cl_dbg(dev, cl, "completed read length = %lu\n", cb->buf_idx);
list_move_tail(&cb->list, &complete_list->list);
+ } else {
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_request_autosuspend(dev->dev);
}
out:
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index d74b6aa8ae27..4250555d5e72 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -275,32 +275,33 @@ struct mei_cl {
struct mei_cl_device *cldev;
};
-/** struct mei_hw_ops
+/**
+ * struct mei_hw_ops - hw specific ops
*
* @host_is_ready : query for host readiness
-
+ *
* @hw_is_ready : query if hw is ready
* @hw_reset : reset hw
* @hw_start : start hw after reset
* @hw_config : configure hw
-
+ *
* @fw_status : get fw status registers
* @pg_state : power gating state of the device
* @pg_in_transition : is device now in pg transition
* @pg_is_enabled : is power gating enabled
-
+ *
* @intr_clear : clear pending interrupts
* @intr_enable : enable interrupts
* @intr_disable : disable interrupts
-
+ *
* @hbuf_free_slots : query for write buffer empty slots
* @hbuf_is_ready : query if write buffer is empty
* @hbuf_max_len : query for write buffer max len
-
+ *
* @write : write a message to FW
-
+ *
* @rdbuf_full_slots : query how many slots are filled
-
+ *
* @read_hdr : get first 4 bytes (header)
* @read : read a buffer from the FW
*/
@@ -340,7 +341,7 @@ struct mei_hw_ops {
/* MEI bus API*/
void mei_cl_bus_rescan(struct mei_device *bus);
-void mei_cl_dev_fixup(struct mei_cl_device *dev);
+void mei_cl_bus_dev_fixup(struct mei_cl_device *dev);
ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length,
bool blocking);
ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length);
diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
index e9f2f56c370d..40677df7f996 100644
--- a/drivers/misc/mic/Kconfig
+++ b/drivers/misc/mic/Kconfig
@@ -36,7 +36,7 @@ comment "Intel MIC Host Driver"
config INTEL_MIC_HOST
tristate "Intel MIC Host Driver"
- depends on 64BIT && PCI && X86 && INTEL_MIC_BUS && SCIF_BUS
+ depends on 64BIT && PCI && X86 && INTEL_MIC_BUS && SCIF_BUS && MIC_COSM
select VHOST_RING
help
This enables Host Driver support for the Intel Many Integrated
@@ -56,7 +56,7 @@ comment "Intel MIC Card Driver"
config INTEL_MIC_CARD
tristate "Intel MIC Card Driver"
- depends on 64BIT && X86 && INTEL_MIC_BUS && SCIF_BUS
+ depends on 64BIT && X86 && INTEL_MIC_BUS && SCIF_BUS && MIC_COSM
select VIRTIO
help
This enables card driver support for the Intel Many Integrated
@@ -74,7 +74,8 @@ comment "SCIF Driver"
config SCIF
tristate "SCIF Driver"
- depends on 64BIT && PCI && X86 && SCIF_BUS
+ depends on 64BIT && PCI && X86 && SCIF_BUS && IOMMU_SUPPORT
+ select IOMMU_IOVA
help
This enables SCIF Driver support for the Intel Many Integrated
Core (MIC) family of PCIe form factor coprocessor devices that
@@ -88,3 +89,21 @@ config SCIF
More information about the Intel MIC family as well as the Linux
OS and tools for MIC to use with this driver are available from
<http://software.intel.com/en-us/mic-developer>.
+
+comment "Intel MIC Coprocessor State Management (COSM) Drivers"
+
+config MIC_COSM
+ tristate "Intel MIC Coprocessor State Management (COSM) Drivers"
+ depends on 64BIT && PCI && X86 && SCIF
+ help
+ This enables COSM driver support for the Intel Many
+ Integrated Core (MIC) family of PCIe form factor coprocessor
+ devices. COSM drivers implement functions such as boot,
+ shutdown, reset and reboot of MIC devices.
+
+ If you are building a host kernel with an Intel MIC device then
+ say M (recommended) or Y, else say N. If unsure say N.
+
+ More information about the Intel MIC family as well as the Linux
+ OS and tools for MIC to use with this driver are available from
+ <http://software.intel.com/en-us/mic-developer>.
diff --git a/drivers/misc/mic/Makefile b/drivers/misc/mic/Makefile
index a74042c58649..e288a1106738 100644
--- a/drivers/misc/mic/Makefile
+++ b/drivers/misc/mic/Makefile
@@ -6,3 +6,5 @@ obj-$(CONFIG_INTEL_MIC_HOST) += host/
obj-$(CONFIG_INTEL_MIC_CARD) += card/
obj-y += bus/
obj-$(CONFIG_SCIF) += scif/
+obj-$(CONFIG_MIC_COSM) += cosm/
+obj-$(CONFIG_MIC_COSM) += cosm_client/
diff --git a/drivers/misc/mic/bus/Makefile b/drivers/misc/mic/bus/Makefile
index 1ed37e234c96..761842b0d0bb 100644
--- a/drivers/misc/mic/bus/Makefile
+++ b/drivers/misc/mic/bus/Makefile
@@ -4,3 +4,4 @@
#
obj-$(CONFIG_INTEL_MIC_BUS) += mic_bus.o
obj-$(CONFIG_SCIF_BUS) += scif_bus.o
+obj-$(CONFIG_MIC_COSM) += cosm_bus.o
diff --git a/drivers/misc/mic/bus/cosm_bus.c b/drivers/misc/mic/bus/cosm_bus.c
new file mode 100644
index 000000000000..d31d6c6e6cb1
--- /dev/null
+++ b/drivers/misc/mic/bus/cosm_bus.c
@@ -0,0 +1,141 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC COSM Bus Driver
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include "cosm_bus.h"
+
+/* Unique numbering for cosm devices. */
+static DEFINE_IDA(cosm_index_ida);
+
+static int cosm_dev_probe(struct device *d)
+{
+ struct cosm_device *dev = dev_to_cosm(d);
+ struct cosm_driver *drv = drv_to_cosm(dev->dev.driver);
+
+ return drv->probe(dev);
+}
+
+static int cosm_dev_remove(struct device *d)
+{
+ struct cosm_device *dev = dev_to_cosm(d);
+ struct cosm_driver *drv = drv_to_cosm(dev->dev.driver);
+
+ drv->remove(dev);
+ return 0;
+}
+
+static struct bus_type cosm_bus = {
+ .name = "cosm_bus",
+ .probe = cosm_dev_probe,
+ .remove = cosm_dev_remove,
+};
+
+int cosm_register_driver(struct cosm_driver *driver)
+{
+ driver->driver.bus = &cosm_bus;
+ return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(cosm_register_driver);
+
+void cosm_unregister_driver(struct cosm_driver *driver)
+{
+ driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(cosm_unregister_driver);
+
+static inline void cosm_release_dev(struct device *d)
+{
+ struct cosm_device *cdev = dev_to_cosm(d);
+
+ kfree(cdev);
+}
+
+struct cosm_device *
+cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops)
+{
+ struct cosm_device *cdev;
+ int ret;
+
+ cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+ if (!cdev)
+ return ERR_PTR(-ENOMEM);
+
+ cdev->dev.parent = pdev;
+ cdev->dev.release = cosm_release_dev;
+ cdev->hw_ops = hw_ops;
+ dev_set_drvdata(&cdev->dev, cdev);
+ cdev->dev.bus = &cosm_bus;
+
+ /* Assign a unique device index and hence name */
+ ret = ida_simple_get(&cosm_index_ida, 0, 0, GFP_KERNEL);
+ if (ret < 0)
+ goto free_cdev;
+
+ cdev->index = ret;
+ cdev->dev.id = ret;
+ dev_set_name(&cdev->dev, "cosm-dev%u", cdev->index);
+
+ ret = device_register(&cdev->dev);
+ if (ret)
+ goto ida_remove;
+ return cdev;
+ida_remove:
+ ida_simple_remove(&cosm_index_ida, cdev->index);
+free_cdev:
+ put_device(&cdev->dev);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(cosm_register_device);
+
+void cosm_unregister_device(struct cosm_device *dev)
+{
+ int index = dev->index; /* save for after device release */
+
+ device_unregister(&dev->dev);
+ ida_simple_remove(&cosm_index_ida, index);
+}
+EXPORT_SYMBOL_GPL(cosm_unregister_device);
+
+struct cosm_device *cosm_find_cdev_by_id(int id)
+{
+ struct device *dev = subsys_find_device_by_id(&cosm_bus, id, NULL);
+
+ return dev ? container_of(dev, struct cosm_device, dev) : NULL;
+}
+EXPORT_SYMBOL_GPL(cosm_find_cdev_by_id);
+
+static int __init cosm_init(void)
+{
+ return bus_register(&cosm_bus);
+}
+
+static void __exit cosm_exit(void)
+{
+ bus_unregister(&cosm_bus);
+ ida_destroy(&cosm_index_ida);
+}
+
+core_initcall(cosm_init);
+module_exit(cosm_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC card OS state management bus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/cosm_bus.h b/drivers/misc/mic/bus/cosm_bus.h
new file mode 100644
index 000000000000..f7c57f266916
--- /dev/null
+++ b/drivers/misc/mic/bus/cosm_bus.h
@@ -0,0 +1,134 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC COSM Bus Driver
+ */
+#ifndef _COSM_BUS_H_
+#define _COSM_BUS_H_
+
+#include <linux/scif.h>
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+
+/**
+ * cosm_device - representation of a cosm device
+ *
+ * @attr_group: Pointer to list of sysfs attribute groups.
+ * @sdev: Device for sysfs entries.
+ * @state: MIC state.
+ * @shutdown_status: MIC status reported by card for shutdown/crashes.
+ * @shutdown_status_int: Internal shutdown status maintained by the driver
+ * @cosm_mutex: Mutex for synchronizing access to data structures.
+ * @reset_trigger_work: Work for triggering reset requests.
+ * @scif_work: Work for handling per device SCIF connections
+ * @cmdline: Kernel command line.
+ * @firmware: Firmware file name.
+ * @ramdisk: Ramdisk file name.
+ * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates.
+ * @log_buf_addr: Log buffer address for MIC.
+ * @log_buf_len: Log buffer length address for MIC.
+ * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes.
+ * @hw_ops: the hardware bus ops for this device.
+ * @dev: underlying device.
+ * @index: unique position on the cosm bus
+ * @dbg_dir: debug fs directory
+ * @newepd: new endpoint from scif accept to be assigned to this cdev
+ * @epd: SCIF endpoint for this cdev
+ * @heartbeat_watchdog_enable: if heartbeat watchdog is enabled for this cdev
+ * @sysfs_heartbeat_enable: sysfs setting for disabling heartbeat notification
+ */
+struct cosm_device {
+ const struct attribute_group **attr_group;
+ struct device *sdev;
+ u8 state;
+ u8 shutdown_status;
+ u8 shutdown_status_int;
+ struct mutex cosm_mutex;
+ struct work_struct reset_trigger_work;
+ struct work_struct scif_work;
+ char *cmdline;
+ char *firmware;
+ char *ramdisk;
+ char *bootmode;
+ void *log_buf_addr;
+ int *log_buf_len;
+ struct kernfs_node *state_sysfs;
+ struct cosm_hw_ops *hw_ops;
+ struct device dev;
+ int index;
+ struct dentry *dbg_dir;
+ scif_epd_t newepd;
+ scif_epd_t epd;
+ bool heartbeat_watchdog_enable;
+ bool sysfs_heartbeat_enable;
+};
+
+/**
+ * cosm_driver - operations for a cosm driver
+ *
+ * @driver: underlying device driver (populate name and owner).
+ * @probe: the function to call when a device is found. Returns 0 or -errno.
+ * @remove: the function to call when a device is removed.
+ */
+struct cosm_driver {
+ struct device_driver driver;
+ int (*probe)(struct cosm_device *dev);
+ void (*remove)(struct cosm_device *dev);
+};
+
+/**
+ * cosm_hw_ops - cosm bus ops
+ *
+ * @reset: trigger MIC reset
+ * @force_reset: force MIC reset
+ * @post_reset: inform MIC reset is complete
+ * @ready: is MIC ready for OS download
+ * @start: boot MIC
+ * @stop: prepare MIC for reset
+ * @family: return MIC HW family string
+ * @stepping: return MIC HW stepping string
+ * @aper: return MIC PCIe aperture
+ */
+struct cosm_hw_ops {
+ void (*reset)(struct cosm_device *cdev);
+ void (*force_reset)(struct cosm_device *cdev);
+ void (*post_reset)(struct cosm_device *cdev, enum mic_states state);
+ bool (*ready)(struct cosm_device *cdev);
+ int (*start)(struct cosm_device *cdev, int id);
+ void (*stop)(struct cosm_device *cdev, bool force);
+ ssize_t (*family)(struct cosm_device *cdev, char *buf);
+ ssize_t (*stepping)(struct cosm_device *cdev, char *buf);
+ struct mic_mw *(*aper)(struct cosm_device *cdev);
+};
+
+struct cosm_device *
+cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops);
+void cosm_unregister_device(struct cosm_device *dev);
+int cosm_register_driver(struct cosm_driver *drv);
+void cosm_unregister_driver(struct cosm_driver *drv);
+struct cosm_device *cosm_find_cdev_by_id(int id);
+
+static inline struct cosm_device *dev_to_cosm(struct device *dev)
+{
+ return container_of(dev, struct cosm_device, dev);
+}
+
+static inline struct cosm_driver *drv_to_cosm(struct device_driver *drv)
+{
+ return container_of(drv, struct cosm_driver, driver);
+}
+#endif /* _COSM_BUS_H */
diff --git a/drivers/misc/mic/bus/mic_bus.c b/drivers/misc/mic/bus/mic_bus.c
index 961ae90aae47..be37890abb93 100644
--- a/drivers/misc/mic/bus/mic_bus.c
+++ b/drivers/misc/mic/bus/mic_bus.c
@@ -25,9 +25,6 @@
#include <linux/idr.h>
#include <linux/mic_bus.h>
-/* Unique numbering for mbus devices. */
-static DEFINE_IDA(mbus_index_ida);
-
static ssize_t device_show(struct device *d,
struct device_attribute *attr, char *buf)
{
@@ -147,7 +144,8 @@ static void mbus_release_dev(struct device *d)
struct mbus_device *
mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
- struct mbus_hw_ops *hw_ops, void __iomem *mmio_va)
+ struct mbus_hw_ops *hw_ops, int index,
+ void __iomem *mmio_va)
{
int ret;
struct mbus_device *mbdev;
@@ -166,13 +164,7 @@ mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
mbdev->dev.release = mbus_release_dev;
mbdev->hw_ops = hw_ops;
mbdev->dev.bus = &mic_bus;
-
- /* Assign a unique device index and hence name. */
- ret = ida_simple_get(&mbus_index_ida, 0, 0, GFP_KERNEL);
- if (ret < 0)
- goto free_mbdev;
-
- mbdev->index = ret;
+ mbdev->index = index;
dev_set_name(&mbdev->dev, "mbus-dev%u", mbdev->index);
/*
* device_register() causes the bus infrastructure to look for a
@@ -180,22 +172,17 @@ mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
*/
ret = device_register(&mbdev->dev);
if (ret)
- goto ida_remove;
+ goto free_mbdev;
return mbdev;
-ida_remove:
- ida_simple_remove(&mbus_index_ida, mbdev->index);
free_mbdev:
- kfree(mbdev);
+ put_device(&mbdev->dev);
return ERR_PTR(ret);
}
EXPORT_SYMBOL_GPL(mbus_register_device);
void mbus_unregister_device(struct mbus_device *mbdev)
{
- int index = mbdev->index; /* save for after device release */
-
device_unregister(&mbdev->dev);
- ida_simple_remove(&mbus_index_ida, index);
}
EXPORT_SYMBOL_GPL(mbus_unregister_device);
@@ -207,7 +194,6 @@ static int __init mbus_init(void)
static void __exit mbus_exit(void)
{
bus_unregister(&mic_bus);
- ida_destroy(&mbus_index_ida);
}
core_initcall(mbus_init);
diff --git a/drivers/misc/mic/bus/scif_bus.c b/drivers/misc/mic/bus/scif_bus.c
index 2da7ceed015d..ff6e01c25810 100644
--- a/drivers/misc/mic/bus/scif_bus.c
+++ b/drivers/misc/mic/bus/scif_bus.c
@@ -28,7 +28,6 @@ static ssize_t device_show(struct device *d,
return sprintf(buf, "0x%04x\n", dev->id.device);
}
-
static DEVICE_ATTR_RO(device);
static ssize_t vendor_show(struct device *d,
@@ -38,7 +37,6 @@ static ssize_t vendor_show(struct device *d,
return sprintf(buf, "0x%04x\n", dev->id.vendor);
}
-
static DEVICE_ATTR_RO(vendor);
static ssize_t modalias_show(struct device *d,
@@ -49,7 +47,6 @@ static ssize_t modalias_show(struct device *d,
return sprintf(buf, "scif:d%08Xv%08X\n",
dev->id.device, dev->id.vendor);
}
-
static DEVICE_ATTR_RO(modalias);
static struct attribute *scif_dev_attrs[] = {
@@ -144,7 +141,8 @@ struct scif_hw_dev *
scif_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
struct mic_mw *mmio, struct mic_mw *aper, void *dp,
- void __iomem *rdp, struct dma_chan **chan, int num_chan)
+ void __iomem *rdp, struct dma_chan **chan, int num_chan,
+ bool card_rel_da)
{
int ret;
struct scif_hw_dev *sdev;
@@ -171,6 +169,7 @@ scif_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
dma_set_mask(&sdev->dev, DMA_BIT_MASK(64));
sdev->dma_ch = chan;
sdev->num_dma_ch = num_chan;
+ sdev->card_rel_da = card_rel_da;
dev_set_name(&sdev->dev, "scif-dev%u", sdev->dnode);
/*
* device_register() causes the bus infrastructure to look for a
@@ -181,7 +180,7 @@ scif_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
goto free_sdev;
return sdev;
free_sdev:
- kfree(sdev);
+ put_device(&sdev->dev);
return ERR_PTR(ret);
}
EXPORT_SYMBOL_GPL(scif_register_device);
diff --git a/drivers/misc/mic/bus/scif_bus.h b/drivers/misc/mic/bus/scif_bus.h
index 335a228a8236..94f29ac608b6 100644
--- a/drivers/misc/mic/bus/scif_bus.h
+++ b/drivers/misc/mic/bus/scif_bus.h
@@ -46,6 +46,8 @@ struct scif_hw_dev_id {
* @rdp - Remote device page
* @dma_ch - Array of DMA channels
* @num_dma_ch - Number of DMA channels available
+ * @card_rel_da - Set to true if DMA addresses programmed in the DMA engine
+ * are relative to the card point of view
*/
struct scif_hw_dev {
struct scif_hw_ops *hw_ops;
@@ -59,6 +61,7 @@ struct scif_hw_dev {
void __iomem *rdp;
struct dma_chan **dma_ch;
int num_dma_ch;
+ bool card_rel_da;
};
/**
@@ -114,7 +117,8 @@ scif_register_device(struct device *pdev, int id,
struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
struct mic_mw *mmio, struct mic_mw *aper,
void *dp, void __iomem *rdp,
- struct dma_chan **chan, int num_chan);
+ struct dma_chan **chan, int num_chan,
+ bool card_rel_da);
void scif_unregister_device(struct scif_hw_dev *sdev);
static inline struct scif_hw_dev *dev_to_scif(struct device *dev)
diff --git a/drivers/misc/mic/card/mic_device.c b/drivers/misc/mic/card/mic_device.c
index 6338908b2252..d0edaf7e0cd5 100644
--- a/drivers/misc/mic/card/mic_device.c
+++ b/drivers/misc/mic/card/mic_device.c
@@ -37,71 +37,6 @@
#include "mic_virtio.h"
static struct mic_driver *g_drv;
-static struct mic_irq *shutdown_cookie;
-
-static void mic_notify_host(u8 state)
-{
- struct mic_driver *mdrv = g_drv;
- struct mic_bootparam __iomem *bootparam = mdrv->dp;
-
- iowrite8(state, &bootparam->shutdown_status);
- dev_dbg(mdrv->dev, "%s %d system_state %d\n",
- __func__, __LINE__, state);
- mic_send_intr(&mdrv->mdev, ioread8(&bootparam->c2h_shutdown_db));
-}
-
-static int mic_panic_event(struct notifier_block *this, unsigned long event,
- void *ptr)
-{
- struct mic_driver *mdrv = g_drv;
- struct mic_bootparam __iomem *bootparam = mdrv->dp;
-
- iowrite8(-1, &bootparam->h2c_config_db);
- iowrite8(-1, &bootparam->h2c_shutdown_db);
- mic_notify_host(MIC_CRASHED);
- return NOTIFY_DONE;
-}
-
-static struct notifier_block mic_panic = {
- .notifier_call = mic_panic_event,
-};
-
-static irqreturn_t mic_shutdown_isr(int irq, void *data)
-{
- struct mic_driver *mdrv = g_drv;
- struct mic_bootparam __iomem *bootparam = mdrv->dp;
-
- mic_ack_interrupt(&g_drv->mdev);
- if (ioread8(&bootparam->shutdown_card))
- orderly_poweroff(true);
- return IRQ_HANDLED;
-}
-
-static int mic_shutdown_init(void)
-{
- int rc = 0;
- struct mic_driver *mdrv = g_drv;
- struct mic_bootparam __iomem *bootparam = mdrv->dp;
- int shutdown_db;
-
- shutdown_db = mic_next_card_db();
- shutdown_cookie = mic_request_card_irq(mic_shutdown_isr, NULL,
- "Shutdown", mdrv, shutdown_db);
- if (IS_ERR(shutdown_cookie))
- rc = PTR_ERR(shutdown_cookie);
- else
- iowrite8(shutdown_db, &bootparam->h2c_shutdown_db);
- return rc;
-}
-
-static void mic_shutdown_uninit(void)
-{
- struct mic_driver *mdrv = g_drv;
- struct mic_bootparam __iomem *bootparam = mdrv->dp;
-
- iowrite8(-1, &bootparam->h2c_shutdown_db);
- mic_free_card_irq(shutdown_cookie, mdrv);
-}
static int __init mic_dp_init(void)
{
@@ -359,11 +294,7 @@ int __init mic_driver_init(struct mic_driver *mdrv)
u8 node_id;
g_drv = mdrv;
- /*
- * Unloading the card module is not supported. The MIC card module
- * handles fundamental operations like host/card initiated shutdowns
- * and informing the host about card crashes and cannot be unloaded.
- */
+ /* Unloading the card module is not supported. */
if (!try_module_get(mdrv->dev->driver->owner)) {
rc = -ENODEV;
goto done;
@@ -374,12 +305,9 @@ int __init mic_driver_init(struct mic_driver *mdrv)
rc = mic_init_irq();
if (rc)
goto dp_uninit;
- rc = mic_shutdown_init();
- if (rc)
- goto irq_uninit;
if (!mic_request_dma_chans(mdrv)) {
rc = -ENODEV;
- goto shutdown_uninit;
+ goto irq_uninit;
}
rc = mic_devices_init(mdrv);
if (rc)
@@ -390,21 +318,18 @@ int __init mic_driver_init(struct mic_driver *mdrv)
NULL, &scif_hw_ops,
0, node_id, &mdrv->mdev.mmio, NULL,
NULL, mdrv->dp, mdrv->dma_ch,
- mdrv->num_dma_ch);
+ mdrv->num_dma_ch, true);
if (IS_ERR(mdrv->scdev)) {
rc = PTR_ERR(mdrv->scdev);
goto device_uninit;
}
mic_create_card_debug_dir(mdrv);
- atomic_notifier_chain_register(&panic_notifier_list, &mic_panic);
done:
return rc;
device_uninit:
mic_devices_uninit(mdrv);
dma_free:
mic_free_dma_chans(mdrv);
-shutdown_uninit:
- mic_shutdown_uninit();
irq_uninit:
mic_uninit_irq();
dp_uninit:
@@ -425,13 +350,6 @@ void mic_driver_uninit(struct mic_driver *mdrv)
scif_unregister_device(mdrv->scdev);
mic_devices_uninit(mdrv);
mic_free_dma_chans(mdrv);
- /*
- * Inform the host about the shutdown status i.e. poweroff/restart etc.
- * The module cannot be unloaded so the only code path to call
- * mic_devices_uninit(..) is the shutdown callback.
- */
- mic_notify_host(system_state);
- mic_shutdown_uninit();
mic_uninit_irq();
mic_dp_uninit();
module_put(mdrv->dev->driver->owner);
diff --git a/drivers/misc/mic/card/mic_x100.c b/drivers/misc/mic/card/mic_x100.c
index 77fd41781c2e..b2958ce2368c 100644
--- a/drivers/misc/mic/card/mic_x100.c
+++ b/drivers/misc/mic/card/mic_x100.c
@@ -261,7 +261,7 @@ static int __init mic_probe(struct platform_device *pdev)
mic_hw_intr_init(mdrv);
platform_set_drvdata(pdev, mdrv);
mdrv->dma_mbdev = mbus_register_device(mdrv->dev, MBUS_DEV_DMA_MIC,
- NULL, &mbus_hw_ops,
+ NULL, &mbus_hw_ops, 0,
mdrv->mdev.mmio.va);
if (IS_ERR(mdrv->dma_mbdev)) {
rc = PTR_ERR(mdrv->dma_mbdev);
diff --git a/drivers/misc/mic/common/mic_dev.h b/drivers/misc/mic/common/mic_dev.h
index 0b58c46045dc..50776772ebdf 100644
--- a/drivers/misc/mic/common/mic_dev.h
+++ b/drivers/misc/mic/common/mic_dev.h
@@ -21,6 +21,19 @@
#ifndef __MIC_DEV_H__
#define __MIC_DEV_H__
+/* The maximum number of MIC devices supported in a single host system. */
+#define MIC_MAX_NUM_DEVS 128
+
+/**
+ * enum mic_hw_family - The hardware family to which a device belongs.
+ */
+enum mic_hw_family {
+ MIC_FAMILY_X100 = 0,
+ MIC_FAMILY_X200,
+ MIC_FAMILY_UNKNOWN,
+ MIC_FAMILY_LAST
+};
+
/**
* struct mic_mw - MIC memory window
*
diff --git a/drivers/misc/mic/cosm/Makefile b/drivers/misc/mic/cosm/Makefile
new file mode 100644
index 000000000000..b85d4d49df46
--- /dev/null
+++ b/drivers/misc/mic/cosm/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile - Intel MIC Coprocessor State Management (COSM) Driver
+# Copyright(c) 2015, Intel Corporation.
+#
+obj-$(CONFIG_MIC_COSM) += mic_cosm.o
+
+mic_cosm-objs := cosm_main.o
+mic_cosm-objs += cosm_debugfs.o
+mic_cosm-objs += cosm_sysfs.o
+mic_cosm-objs += cosm_scif_server.o
diff --git a/drivers/misc/mic/cosm/cosm_debugfs.c b/drivers/misc/mic/cosm/cosm_debugfs.c
new file mode 100644
index 000000000000..216cb3cd2fe3
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_debugfs.c
@@ -0,0 +1,156 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include "cosm_main.h"
+
+/* Debugfs parent dir */
+static struct dentry *cosm_dbg;
+
+/**
+ * cosm_log_buf_show - Display MIC kernel log buffer
+ *
+ * log_buf addr/len is read from System.map by user space
+ * and populated in sysfs entries.
+ */
+static int cosm_log_buf_show(struct seq_file *s, void *unused)
+{
+ void __iomem *log_buf_va;
+ int __iomem *log_buf_len_va;
+ struct cosm_device *cdev = s->private;
+ void *kva;
+ int size;
+ u64 aper_offset;
+
+ if (!cdev || !cdev->log_buf_addr || !cdev->log_buf_len)
+ goto done;
+
+ mutex_lock(&cdev->cosm_mutex);
+ switch (cdev->state) {
+ case MIC_BOOTING:
+ case MIC_ONLINE:
+ case MIC_SHUTTING_DOWN:
+ break;
+ default:
+ goto unlock;
+ }
+
+ /*
+ * Card kernel will never be relocated and any kernel text/data mapping
+ * can be translated to phys address by subtracting __START_KERNEL_map.
+ */
+ aper_offset = (u64)cdev->log_buf_len - __START_KERNEL_map;
+ log_buf_len_va = cdev->hw_ops->aper(cdev)->va + aper_offset;
+ aper_offset = (u64)cdev->log_buf_addr - __START_KERNEL_map;
+ log_buf_va = cdev->hw_ops->aper(cdev)->va + aper_offset;
+
+ size = ioread32(log_buf_len_va);
+ kva = kmalloc(size, GFP_KERNEL);
+ if (!kva)
+ goto unlock;
+
+ memcpy_fromio(kva, log_buf_va, size);
+ seq_write(s, kva, size);
+ kfree(kva);
+unlock:
+ mutex_unlock(&cdev->cosm_mutex);
+done:
+ return 0;
+}
+
+static int cosm_log_buf_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, cosm_log_buf_show, inode->i_private);
+}
+
+static const struct file_operations log_buf_ops = {
+ .owner = THIS_MODULE,
+ .open = cosm_log_buf_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release
+};
+
+/**
+ * cosm_force_reset_show - Force MIC reset
+ *
+ * Invokes the force_reset COSM bus op instead of the standard reset
+ * op in case a force reset of the MIC device is required
+ */
+static int cosm_force_reset_show(struct seq_file *s, void *pos)
+{
+ struct cosm_device *cdev = s->private;
+
+ cosm_stop(cdev, true);
+ return 0;
+}
+
+static int cosm_force_reset_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, cosm_force_reset_show, inode->i_private);
+}
+
+static const struct file_operations force_reset_ops = {
+ .owner = THIS_MODULE,
+ .open = cosm_force_reset_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release
+};
+
+void cosm_create_debug_dir(struct cosm_device *cdev)
+{
+ char name[16];
+
+ if (!cosm_dbg)
+ return;
+
+ scnprintf(name, sizeof(name), "mic%d", cdev->index);
+ cdev->dbg_dir = debugfs_create_dir(name, cosm_dbg);
+ if (!cdev->dbg_dir)
+ return;
+
+ debugfs_create_file("log_buf", 0444, cdev->dbg_dir, cdev, &log_buf_ops);
+ debugfs_create_file("force_reset", 0444, cdev->dbg_dir, cdev,
+ &force_reset_ops);
+}
+
+void cosm_delete_debug_dir(struct cosm_device *cdev)
+{
+ if (!cdev->dbg_dir)
+ return;
+
+ debugfs_remove_recursive(cdev->dbg_dir);
+}
+
+void cosm_init_debugfs(void)
+{
+ cosm_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+ if (!cosm_dbg)
+ pr_err("can't create debugfs dir\n");
+}
+
+void cosm_exit_debugfs(void)
+{
+ debugfs_remove(cosm_dbg);
+}
diff --git a/drivers/misc/mic/cosm/cosm_main.c b/drivers/misc/mic/cosm/cosm_main.c
new file mode 100644
index 000000000000..4b4b356c797d
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_main.c
@@ -0,0 +1,388 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include "cosm_main.h"
+
+static const char cosm_driver_name[] = "mic";
+
+/* COSM ID allocator */
+static struct ida g_cosm_ida;
+/* Class of MIC devices for sysfs accessibility. */
+static struct class *g_cosm_class;
+/* Number of MIC devices */
+static atomic_t g_num_dev;
+
+/**
+ * cosm_hw_reset - Issue a HW reset for the MIC device
+ * @cdev: pointer to cosm_device instance
+ */
+static void cosm_hw_reset(struct cosm_device *cdev, bool force)
+{
+ int i;
+
+#define MIC_RESET_TO (45)
+ if (force && cdev->hw_ops->force_reset)
+ cdev->hw_ops->force_reset(cdev);
+ else
+ cdev->hw_ops->reset(cdev);
+
+ for (i = 0; i < MIC_RESET_TO; i++) {
+ if (cdev->hw_ops->ready(cdev)) {
+ cosm_set_state(cdev, MIC_READY);
+ return;
+ }
+ /*
+ * Resets typically take 10s of seconds to complete.
+ * Since an MMIO read is required to check if the
+ * firmware is ready or not, a 1 second delay works nicely.
+ */
+ msleep(1000);
+ }
+ cosm_set_state(cdev, MIC_RESET_FAILED);
+}
+
+/**
+ * cosm_start - Start the MIC
+ * @cdev: pointer to cosm_device instance
+ *
+ * This function prepares an MIC for boot and initiates boot.
+ * RETURNS: An appropriate -ERRNO error value on error, or 0 for success.
+ */
+int cosm_start(struct cosm_device *cdev)
+{
+ const struct cred *orig_cred;
+ struct cred *override_cred;
+ int rc;
+
+ mutex_lock(&cdev->cosm_mutex);
+ if (!cdev->bootmode) {
+ dev_err(&cdev->dev, "%s %d bootmode not set\n",
+ __func__, __LINE__);
+ rc = -EINVAL;
+ goto unlock_ret;
+ }
+retry:
+ if (cdev->state != MIC_READY) {
+ dev_err(&cdev->dev, "%s %d MIC state not READY\n",
+ __func__, __LINE__);
+ rc = -EINVAL;
+ goto unlock_ret;
+ }
+ if (!cdev->hw_ops->ready(cdev)) {
+ cosm_hw_reset(cdev, false);
+ /*
+ * The state will either be MIC_READY if the reset succeeded
+ * or MIC_RESET_FAILED if the firmware reset failed.
+ */
+ goto retry;
+ }
+
+ /*
+ * Set credentials to root to allow non-root user to download initramsfs
+ * with 600 permissions
+ */
+ override_cred = prepare_creds();
+ if (!override_cred) {
+ dev_err(&cdev->dev, "%s %d prepare_creds failed\n",
+ __func__, __LINE__);
+ rc = -ENOMEM;
+ goto unlock_ret;
+ }
+ override_cred->fsuid = GLOBAL_ROOT_UID;
+ orig_cred = override_creds(override_cred);
+
+ rc = cdev->hw_ops->start(cdev, cdev->index);
+
+ revert_creds(orig_cred);
+ put_cred(override_cred);
+ if (rc)
+ goto unlock_ret;
+
+ /*
+ * If linux is being booted, card is treated 'online' only
+ * when the scif interface in the card is up. If anything else
+ * is booted, we set card to 'online' immediately.
+ */
+ if (!strcmp(cdev->bootmode, "linux"))
+ cosm_set_state(cdev, MIC_BOOTING);
+ else
+ cosm_set_state(cdev, MIC_ONLINE);
+unlock_ret:
+ mutex_unlock(&cdev->cosm_mutex);
+ if (rc)
+ dev_err(&cdev->dev, "cosm_start failed rc %d\n", rc);
+ return rc;
+}
+
+/**
+ * cosm_stop - Prepare the MIC for reset and trigger reset
+ * @cdev: pointer to cosm_device instance
+ * @force: force a MIC to reset even if it is already reset and ready.
+ *
+ * RETURNS: None
+ */
+void cosm_stop(struct cosm_device *cdev, bool force)
+{
+ mutex_lock(&cdev->cosm_mutex);
+ if (cdev->state != MIC_READY || force) {
+ /*
+ * Don't call hw_ops if they have been called previously.
+ * stop(..) calls device_unregister and will crash the system if
+ * called multiple times.
+ */
+ bool call_hw_ops = cdev->state != MIC_RESET_FAILED &&
+ cdev->state != MIC_READY;
+
+ if (cdev->state != MIC_RESETTING)
+ cosm_set_state(cdev, MIC_RESETTING);
+ cdev->heartbeat_watchdog_enable = false;
+ if (call_hw_ops)
+ cdev->hw_ops->stop(cdev, force);
+ cosm_hw_reset(cdev, force);
+ cosm_set_shutdown_status(cdev, MIC_NOP);
+ if (call_hw_ops && cdev->hw_ops->post_reset)
+ cdev->hw_ops->post_reset(cdev, cdev->state);
+ }
+ mutex_unlock(&cdev->cosm_mutex);
+ flush_work(&cdev->scif_work);
+}
+
+/**
+ * cosm_reset_trigger_work - Trigger MIC reset
+ * @work: The work structure
+ *
+ * This work is scheduled whenever the host wants to reset the MIC.
+ */
+static void cosm_reset_trigger_work(struct work_struct *work)
+{
+ struct cosm_device *cdev = container_of(work, struct cosm_device,
+ reset_trigger_work);
+ cosm_stop(cdev, false);
+}
+
+/**
+ * cosm_reset - Schedule MIC reset
+ * @cdev: pointer to cosm_device instance
+ *
+ * RETURNS: An -EINVAL if the card is already READY or 0 for success.
+ */
+int cosm_reset(struct cosm_device *cdev)
+{
+ int rc = 0;
+
+ mutex_lock(&cdev->cosm_mutex);
+ if (cdev->state != MIC_READY) {
+ cosm_set_state(cdev, MIC_RESETTING);
+ schedule_work(&cdev->reset_trigger_work);
+ } else {
+ dev_err(&cdev->dev, "%s %d MIC is READY\n", __func__, __LINE__);
+ rc = -EINVAL;
+ }
+ mutex_unlock(&cdev->cosm_mutex);
+ return rc;
+}
+
+/**
+ * cosm_shutdown - Initiate MIC shutdown.
+ * @cdev: pointer to cosm_device instance
+ *
+ * RETURNS: None
+ */
+int cosm_shutdown(struct cosm_device *cdev)
+{
+ struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN };
+ int rc = 0;
+
+ mutex_lock(&cdev->cosm_mutex);
+ if (cdev->state != MIC_ONLINE) {
+ rc = -EINVAL;
+ dev_err(&cdev->dev, "%s %d skipping shutdown in state: %s\n",
+ __func__, __LINE__, cosm_state_string[cdev->state]);
+ goto err;
+ }
+
+ if (!cdev->epd) {
+ rc = -ENOTCONN;
+ dev_err(&cdev->dev, "%s %d scif endpoint not connected rc %d\n",
+ __func__, __LINE__, rc);
+ goto err;
+ }
+
+ rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+ if (rc < 0) {
+ dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n",
+ __func__, __LINE__, rc);
+ goto err;
+ }
+ cdev->heartbeat_watchdog_enable = false;
+ cosm_set_state(cdev, MIC_SHUTTING_DOWN);
+ rc = 0;
+err:
+ mutex_unlock(&cdev->cosm_mutex);
+ return rc;
+}
+
+static int cosm_driver_probe(struct cosm_device *cdev)
+{
+ int rc;
+
+ /* Initialize SCIF server at first probe */
+ if (atomic_add_return(1, &g_num_dev) == 1) {
+ rc = cosm_scif_init();
+ if (rc)
+ goto scif_exit;
+ }
+ mutex_init(&cdev->cosm_mutex);
+ INIT_WORK(&cdev->reset_trigger_work, cosm_reset_trigger_work);
+ INIT_WORK(&cdev->scif_work, cosm_scif_work);
+ cdev->sysfs_heartbeat_enable = true;
+ cosm_sysfs_init(cdev);
+ cdev->sdev = device_create_with_groups(g_cosm_class, cdev->dev.parent,
+ MKDEV(0, cdev->index), cdev, cdev->attr_group,
+ "mic%d", cdev->index);
+ if (IS_ERR(cdev->sdev)) {
+ rc = PTR_ERR(cdev->sdev);
+ dev_err(&cdev->dev, "device_create_with_groups failed rc %d\n",
+ rc);
+ goto scif_exit;
+ }
+
+ cdev->state_sysfs = sysfs_get_dirent(cdev->sdev->kobj.sd,
+ "state");
+ if (!cdev->state_sysfs) {
+ rc = -ENODEV;
+ dev_err(&cdev->dev, "sysfs_get_dirent failed rc %d\n", rc);
+ goto destroy_device;
+ }
+ cosm_create_debug_dir(cdev);
+ return 0;
+destroy_device:
+ device_destroy(g_cosm_class, MKDEV(0, cdev->index));
+scif_exit:
+ if (atomic_dec_and_test(&g_num_dev))
+ cosm_scif_exit();
+ return rc;
+}
+
+static void cosm_driver_remove(struct cosm_device *cdev)
+{
+ cosm_delete_debug_dir(cdev);
+ sysfs_put(cdev->state_sysfs);
+ device_destroy(g_cosm_class, MKDEV(0, cdev->index));
+ flush_work(&cdev->reset_trigger_work);
+ cosm_stop(cdev, false);
+ if (atomic_dec_and_test(&g_num_dev))
+ cosm_scif_exit();
+
+ /* These sysfs entries might have allocated */
+ kfree(cdev->cmdline);
+ kfree(cdev->firmware);
+ kfree(cdev->ramdisk);
+ kfree(cdev->bootmode);
+}
+
+static int cosm_suspend(struct device *dev)
+{
+ struct cosm_device *cdev = dev_to_cosm(dev);
+
+ mutex_lock(&cdev->cosm_mutex);
+ switch (cdev->state) {
+ /**
+ * Suspend/freeze hooks in userspace have already shutdown the card.
+ * Card should be 'ready' in most cases. It is however possible that
+ * some userspace application initiated a boot. In those cases, we
+ * simply reset the card.
+ */
+ case MIC_ONLINE:
+ case MIC_BOOTING:
+ case MIC_SHUTTING_DOWN:
+ mutex_unlock(&cdev->cosm_mutex);
+ cosm_stop(cdev, false);
+ break;
+ default:
+ mutex_unlock(&cdev->cosm_mutex);
+ break;
+ }
+ return 0;
+}
+
+static const struct dev_pm_ops cosm_pm_ops = {
+ .suspend = cosm_suspend,
+ .freeze = cosm_suspend
+};
+
+static struct cosm_driver cosm_driver = {
+ .driver = {
+ .name = KBUILD_MODNAME,
+ .owner = THIS_MODULE,
+ .pm = &cosm_pm_ops,
+ },
+ .probe = cosm_driver_probe,
+ .remove = cosm_driver_remove
+};
+
+static int __init cosm_init(void)
+{
+ int ret;
+
+ cosm_init_debugfs();
+
+ g_cosm_class = class_create(THIS_MODULE, cosm_driver_name);
+ if (IS_ERR(g_cosm_class)) {
+ ret = PTR_ERR(g_cosm_class);
+ pr_err("class_create failed ret %d\n", ret);
+ goto cleanup_debugfs;
+ }
+
+ ida_init(&g_cosm_ida);
+ ret = cosm_register_driver(&cosm_driver);
+ if (ret) {
+ pr_err("cosm_register_driver failed ret %d\n", ret);
+ goto ida_destroy;
+ }
+ return 0;
+ida_destroy:
+ ida_destroy(&g_cosm_ida);
+ class_destroy(g_cosm_class);
+cleanup_debugfs:
+ cosm_exit_debugfs();
+ return ret;
+}
+
+static void __exit cosm_exit(void)
+{
+ cosm_unregister_driver(&cosm_driver);
+ ida_destroy(&g_cosm_ida);
+ class_destroy(g_cosm_class);
+ cosm_exit_debugfs();
+}
+
+module_init(cosm_init);
+module_exit(cosm_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC Coprocessor State Management (COSM) Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/cosm/cosm_main.h b/drivers/misc/mic/cosm/cosm_main.h
new file mode 100644
index 000000000000..f01156fca881
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_main.h
@@ -0,0 +1,70 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+#ifndef _COSM_COSM_H_
+#define _COSM_COSM_H_
+
+#include <linux/scif.h>
+#include "../bus/cosm_bus.h"
+
+#define COSM_HEARTBEAT_SEND_SEC 30
+#define SCIF_COSM_LISTEN_PORT 201
+
+/**
+ * enum COSM msg id's
+ * @COSM_MSG_SHUTDOWN: host->card trigger shutdown
+ * @COSM_MSG_SYNC_TIME: host->card send host time to card to sync time
+ * @COSM_MSG_HEARTBEAT: card->host heartbeat
+ * @COSM_MSG_SHUTDOWN_STATUS: card->host with shutdown status as payload
+ */
+enum cosm_msg_id {
+ COSM_MSG_SHUTDOWN,
+ COSM_MSG_SYNC_TIME,
+ COSM_MSG_HEARTBEAT,
+ COSM_MSG_SHUTDOWN_STATUS,
+};
+
+struct cosm_msg {
+ u64 id;
+ union {
+ u64 shutdown_status;
+ struct timespec64 timespec;
+ };
+};
+
+extern const char * const cosm_state_string[];
+extern const char * const cosm_shutdown_status_string[];
+
+void cosm_sysfs_init(struct cosm_device *cdev);
+int cosm_start(struct cosm_device *cdev);
+void cosm_stop(struct cosm_device *cdev, bool force);
+int cosm_reset(struct cosm_device *cdev);
+int cosm_shutdown(struct cosm_device *cdev);
+void cosm_set_state(struct cosm_device *cdev, u8 state);
+void cosm_set_shutdown_status(struct cosm_device *cdev, u8 status);
+void cosm_init_debugfs(void);
+void cosm_exit_debugfs(void);
+void cosm_create_debug_dir(struct cosm_device *cdev);
+void cosm_delete_debug_dir(struct cosm_device *cdev);
+int cosm_scif_init(void);
+void cosm_scif_exit(void);
+void cosm_scif_work(struct work_struct *work);
+
+#endif
diff --git a/drivers/misc/mic/cosm/cosm_scif_server.c b/drivers/misc/mic/cosm/cosm_scif_server.c
new file mode 100644
index 000000000000..5696df4326b5
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_scif_server.c
@@ -0,0 +1,405 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+#include <linux/kthread.h>
+#include "cosm_main.h"
+
+/*
+ * The COSM driver uses SCIF to communicate between the management node and the
+ * MIC cards. SCIF is used to (a) Send a shutdown command to the card (b)
+ * receive a shutdown status back from the card upon completion of shutdown and
+ * (c) receive periodic heartbeat messages from the card used to deduce if the
+ * card has crashed.
+ *
+ * A COSM server consisting of a SCIF listening endpoint waits for incoming
+ * connections from the card. Upon acceptance of the connection, a separate
+ * work-item is scheduled to handle SCIF message processing for that card. The
+ * life-time of this work-item is therefore the time from which the connection
+ * from a card is accepted to the time at which the connection is closed. A new
+ * work-item starts each time the card boots and is alive till the card (a)
+ * shuts down (b) is reset (c) crashes (d) cosm_client driver on the card is
+ * unloaded.
+ *
+ * From the point of view of COSM interactions with SCIF during card
+ * shutdown, reset and crash are as follows:
+ *
+ * Card shutdown
+ * -------------
+ * 1. COSM client on the card invokes orderly_poweroff() in response to SHUTDOWN
+ * message from the host.
+ * 2. Card driver shutdown callback invokes scif_unregister_device(..) resulting
+ * in scif_remove(..) getting called on the card
+ * 3. scif_remove -> scif_stop -> scif_handle_remove_node ->
+ * scif_peer_unregister_device -> device_unregister for the host peer device
+ * 4. During device_unregister remove(..) method of cosm_client is invoked which
+ * closes the COSM SCIF endpoint on the card. This results in a SCIF_DISCNCT
+ * message being sent to host SCIF. SCIF_DISCNCT message processing on the
+ * host SCIF sets the host COSM SCIF endpoint state to DISCONNECTED and wakes
+ * up the host COSM thread blocked in scif_poll(..) resulting in
+ * scif_poll(..) returning POLLHUP.
+ * 5. On the card, scif_peer_release_dev is next called which results in an
+ * SCIF_EXIT message being sent to the host and after receiving the
+ * SCIF_EXIT_ACK from the host the peer device teardown on the card is
+ * complete.
+ * 6. As part of the SCIF_EXIT message processing on the host, host sends a
+ * SCIF_REMOVE_NODE to itself corresponding to the card being removed. This
+ * starts a similar SCIF peer device teardown sequence on the host
+ * corresponding to the card being shut down.
+ *
+ * Card reset
+ * ----------
+ * The case of interest here is when the card has not been previously shut down
+ * since most of the steps below are skipped in that case:
+
+ * 1. cosm_stop(..) invokes hw_ops->stop(..) method of the base PCIe driver
+ * which unregisters the SCIF HW device resulting in scif_remove(..) being
+ * called on the host.
+ * 2. scif_remove(..) calls scif_disconnect_node(..) which results in a
+ * SCIF_EXIT message being sent to the card.
+ * 3. The card executes scif_stop() as part of SCIF_EXIT message
+ * processing. This results in the COSM endpoint on the card being closed and
+ * the SCIF host peer device on the card getting unregistered similar to
+ * steps 3, 4 and 5 for the card shutdown case above. scif_poll(..) on the
+ * host returns POLLHUP as a result.
+ * 4. On the host, card peer device unregister and SCIF HW remove(..) also
+ * subsequently complete.
+ *
+ * Card crash
+ * ----------
+ * If a reset is issued after the card has crashed, there is no SCIF_DISCNT
+ * message from the card which would result in scif_poll(..) returning
+ * POLLHUP. In this case when the host SCIF driver sends a SCIF_REMOVE_NODE
+ * message to itself resulting in the card SCIF peer device being unregistered,
+ * this results in a scif_peer_release_dev -> scif_cleanup_scifdev->
+ * scif_invalidate_ep call sequence which sets the endpoint state to
+ * DISCONNECTED and results in scif_poll(..) returning POLLHUP.
+ */
+
+#define COSM_SCIF_BACKLOG 16
+#define COSM_HEARTBEAT_CHECK_DELTA_SEC 10
+#define COSM_HEARTBEAT_TIMEOUT_SEC \
+ (COSM_HEARTBEAT_SEND_SEC + COSM_HEARTBEAT_CHECK_DELTA_SEC)
+#define COSM_HEARTBEAT_TIMEOUT_MSEC (COSM_HEARTBEAT_TIMEOUT_SEC * MSEC_PER_SEC)
+
+static struct task_struct *server_thread;
+static scif_epd_t listen_epd;
+
+/* Publish MIC card's shutdown status to user space MIC daemon */
+static void cosm_update_mic_status(struct cosm_device *cdev)
+{
+ if (cdev->shutdown_status_int != MIC_NOP) {
+ cosm_set_shutdown_status(cdev, cdev->shutdown_status_int);
+ cdev->shutdown_status_int = MIC_NOP;
+ }
+}
+
+/* Store MIC card's shutdown status internally when it is received */
+static void cosm_shutdown_status_int(struct cosm_device *cdev,
+ enum mic_status shutdown_status)
+{
+ switch (shutdown_status) {
+ case MIC_HALTED:
+ case MIC_POWER_OFF:
+ case MIC_RESTART:
+ case MIC_CRASHED:
+ break;
+ default:
+ dev_err(&cdev->dev, "%s %d Unexpected shutdown_status %d\n",
+ __func__, __LINE__, shutdown_status);
+ return;
+ };
+ cdev->shutdown_status_int = shutdown_status;
+ cdev->heartbeat_watchdog_enable = false;
+
+ if (cdev->state != MIC_SHUTTING_DOWN)
+ cosm_set_state(cdev, MIC_SHUTTING_DOWN);
+}
+
+/* Non-blocking recv. Read and process all available messages */
+static void cosm_scif_recv(struct cosm_device *cdev)
+{
+ struct cosm_msg msg;
+ int rc;
+
+ while (1) {
+ rc = scif_recv(cdev->epd, &msg, sizeof(msg), 0);
+ if (!rc) {
+ break;
+ } else if (rc < 0) {
+ dev_dbg(&cdev->dev, "%s: %d rc %d\n",
+ __func__, __LINE__, rc);
+ break;
+ }
+ dev_dbg(&cdev->dev, "%s: %d rc %d id 0x%llx\n",
+ __func__, __LINE__, rc, msg.id);
+
+ switch (msg.id) {
+ case COSM_MSG_SHUTDOWN_STATUS:
+ cosm_shutdown_status_int(cdev, msg.shutdown_status);
+ break;
+ case COSM_MSG_HEARTBEAT:
+ /* Nothing to do, heartbeat only unblocks scif_poll */
+ break;
+ default:
+ dev_err(&cdev->dev, "%s: %d unknown msg.id %lld\n",
+ __func__, __LINE__, msg.id);
+ break;
+ }
+ }
+}
+
+/* Publish crashed status for this MIC card */
+static void cosm_set_crashed(struct cosm_device *cdev)
+{
+ dev_err(&cdev->dev, "node alive timeout\n");
+ cosm_shutdown_status_int(cdev, MIC_CRASHED);
+ cosm_update_mic_status(cdev);
+}
+
+/* Send host time to the MIC card to sync system time between host and MIC */
+static void cosm_send_time(struct cosm_device *cdev)
+{
+ struct cosm_msg msg = { .id = COSM_MSG_SYNC_TIME };
+ int rc;
+
+ getnstimeofday64(&msg.timespec);
+ rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+ if (rc < 0)
+ dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n",
+ __func__, __LINE__, rc);
+}
+
+/*
+ * Close this cosm_device's endpoint after its peer endpoint on the card has
+ * been closed. In all cases except MIC card crash POLLHUP on the host is
+ * triggered by the client's endpoint being closed.
+ */
+static void cosm_scif_close(struct cosm_device *cdev)
+{
+ /*
+ * Because SHUTDOWN_STATUS message is sent by the MIC cards in the
+ * reboot notifier when shutdown is still not complete, we notify mpssd
+ * to reset the card when SCIF endpoint is closed.
+ */
+ cosm_update_mic_status(cdev);
+ scif_close(cdev->epd);
+ cdev->epd = NULL;
+ dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__);
+}
+
+/*
+ * Set card state to ONLINE when a new SCIF connection from a MIC card is
+ * received. Normally the state is BOOTING when the connection comes in, but can
+ * be ONLINE if cosm_client driver on the card was unloaded and then reloaded.
+ */
+static int cosm_set_online(struct cosm_device *cdev)
+{
+ int rc = 0;
+
+ if (MIC_BOOTING == cdev->state || MIC_ONLINE == cdev->state) {
+ cdev->heartbeat_watchdog_enable = cdev->sysfs_heartbeat_enable;
+ cdev->epd = cdev->newepd;
+ if (cdev->state == MIC_BOOTING)
+ cosm_set_state(cdev, MIC_ONLINE);
+ cosm_send_time(cdev);
+ dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__);
+ } else {
+ dev_warn(&cdev->dev, "%s %d not going online in state: %s\n",
+ __func__, __LINE__, cosm_state_string[cdev->state]);
+ rc = -EINVAL;
+ }
+ /* Drop reference acquired by bus_find_device in the server thread */
+ put_device(&cdev->dev);
+ return rc;
+}
+
+/*
+ * Work function for handling work for a SCIF connection from a particular MIC
+ * card. It first sets the card state to ONLINE and then calls scif_poll to
+ * block on activity such as incoming messages on the SCIF endpoint. When the
+ * endpoint is closed, the work function exits, completing its life cycle, from
+ * MIC card boot to card shutdown/reset/crash.
+ */
+void cosm_scif_work(struct work_struct *work)
+{
+ struct cosm_device *cdev = container_of(work, struct cosm_device,
+ scif_work);
+ struct scif_pollepd pollepd;
+ int rc;
+
+ mutex_lock(&cdev->cosm_mutex);
+ if (cosm_set_online(cdev))
+ goto exit;
+
+ while (1) {
+ pollepd.epd = cdev->epd;
+ pollepd.events = POLLIN;
+
+ /* Drop the mutex before blocking in scif_poll(..) */
+ mutex_unlock(&cdev->cosm_mutex);
+ /* poll(..) with timeout on our endpoint */
+ rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_TIMEOUT_MSEC);
+ mutex_lock(&cdev->cosm_mutex);
+ if (rc < 0) {
+ dev_err(&cdev->dev, "%s %d scif_poll rc %d\n",
+ __func__, __LINE__, rc);
+ continue;
+ }
+
+ /* There is a message from the card */
+ if (pollepd.revents & POLLIN)
+ cosm_scif_recv(cdev);
+
+ /* The peer endpoint is closed or this endpoint disconnected */
+ if (pollepd.revents & POLLHUP) {
+ cosm_scif_close(cdev);
+ break;
+ }
+
+ /* Did we timeout from poll? */
+ if (!rc && cdev->heartbeat_watchdog_enable)
+ cosm_set_crashed(cdev);
+ }
+exit:
+ dev_dbg(&cdev->dev, "%s %d exiting\n", __func__, __LINE__);
+ mutex_unlock(&cdev->cosm_mutex);
+}
+
+/*
+ * COSM SCIF server thread function. Accepts incoming SCIF connections from MIC
+ * cards, finds the correct cosm_device to associate that connection with and
+ * schedules individual work items for each MIC card.
+ */
+static int cosm_scif_server(void *unused)
+{
+ struct cosm_device *cdev;
+ scif_epd_t newepd;
+ struct scif_port_id port_id;
+ int rc;
+
+ allow_signal(SIGKILL);
+
+ while (!kthread_should_stop()) {
+ rc = scif_accept(listen_epd, &port_id, &newepd,
+ SCIF_ACCEPT_SYNC);
+ if (rc < 0) {
+ if (-ERESTARTSYS != rc)
+ pr_err("%s %d rc %d\n", __func__, __LINE__, rc);
+ continue;
+ }
+
+ /*
+ * Associate the incoming connection with a particular
+ * cosm_device, COSM device ID == SCIF node ID - 1
+ */
+ cdev = cosm_find_cdev_by_id(port_id.node - 1);
+ if (!cdev)
+ continue;
+ cdev->newepd = newepd;
+ schedule_work(&cdev->scif_work);
+ }
+
+ pr_debug("%s %d Server thread stopped\n", __func__, __LINE__);
+ return 0;
+}
+
+static int cosm_scif_listen(void)
+{
+ int rc;
+
+ listen_epd = scif_open();
+ if (!listen_epd) {
+ pr_err("%s %d scif_open failed\n", __func__, __LINE__);
+ return -ENOMEM;
+ }
+
+ rc = scif_bind(listen_epd, SCIF_COSM_LISTEN_PORT);
+ if (rc < 0) {
+ pr_err("%s %d scif_bind failed rc %d\n",
+ __func__, __LINE__, rc);
+ goto err;
+ }
+
+ rc = scif_listen(listen_epd, COSM_SCIF_BACKLOG);
+ if (rc < 0) {
+ pr_err("%s %d scif_listen rc %d\n", __func__, __LINE__, rc);
+ goto err;
+ }
+ pr_debug("%s %d listen_epd set up\n", __func__, __LINE__);
+ return 0;
+err:
+ scif_close(listen_epd);
+ listen_epd = NULL;
+ return rc;
+}
+
+static void cosm_scif_listen_exit(void)
+{
+ pr_debug("%s %d closing listen_epd\n", __func__, __LINE__);
+ if (listen_epd) {
+ scif_close(listen_epd);
+ listen_epd = NULL;
+ }
+}
+
+/*
+ * Create a listening SCIF endpoint and a server kthread which accepts incoming
+ * SCIF connections from MIC cards
+ */
+int cosm_scif_init(void)
+{
+ int rc = cosm_scif_listen();
+
+ if (rc) {
+ pr_err("%s %d cosm_scif_listen rc %d\n",
+ __func__, __LINE__, rc);
+ goto err;
+ }
+
+ server_thread = kthread_run(cosm_scif_server, NULL, "cosm_server");
+ if (IS_ERR(server_thread)) {
+ rc = PTR_ERR(server_thread);
+ pr_err("%s %d kthread_run rc %d\n", __func__, __LINE__, rc);
+ goto listen_exit;
+ }
+ return 0;
+listen_exit:
+ cosm_scif_listen_exit();
+err:
+ return rc;
+}
+
+/* Stop the running server thread and close the listening SCIF endpoint */
+void cosm_scif_exit(void)
+{
+ int rc;
+
+ if (!IS_ERR_OR_NULL(server_thread)) {
+ rc = send_sig(SIGKILL, server_thread, 0);
+ if (rc) {
+ pr_err("%s %d send_sig rc %d\n",
+ __func__, __LINE__, rc);
+ return;
+ }
+ kthread_stop(server_thread);
+ }
+
+ cosm_scif_listen_exit();
+}
diff --git a/drivers/misc/mic/cosm/cosm_sysfs.c b/drivers/misc/mic/cosm/cosm_sysfs.c
new file mode 100644
index 000000000000..29d6863b6e59
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_sysfs.c
@@ -0,0 +1,461 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+#include <linux/slab.h>
+#include "cosm_main.h"
+
+/*
+ * A state-to-string lookup table, for exposing a human readable state
+ * via sysfs. Always keep in sync with enum cosm_states
+ */
+const char * const cosm_state_string[] = {
+ [MIC_READY] = "ready",
+ [MIC_BOOTING] = "booting",
+ [MIC_ONLINE] = "online",
+ [MIC_SHUTTING_DOWN] = "shutting_down",
+ [MIC_RESETTING] = "resetting",
+ [MIC_RESET_FAILED] = "reset_failed",
+};
+
+/*
+ * A shutdown-status-to-string lookup table, for exposing a human
+ * readable state via sysfs. Always keep in sync with enum cosm_shutdown_status
+ */
+const char * const cosm_shutdown_status_string[] = {
+ [MIC_NOP] = "nop",
+ [MIC_CRASHED] = "crashed",
+ [MIC_HALTED] = "halted",
+ [MIC_POWER_OFF] = "poweroff",
+ [MIC_RESTART] = "restart",
+};
+
+void cosm_set_shutdown_status(struct cosm_device *cdev, u8 shutdown_status)
+{
+ dev_dbg(&cdev->dev, "Shutdown Status %s -> %s\n",
+ cosm_shutdown_status_string[cdev->shutdown_status],
+ cosm_shutdown_status_string[shutdown_status]);
+ cdev->shutdown_status = shutdown_status;
+}
+
+void cosm_set_state(struct cosm_device *cdev, u8 state)
+{
+ dev_dbg(&cdev->dev, "State %s -> %s\n",
+ cosm_state_string[cdev->state],
+ cosm_state_string[state]);
+ cdev->state = state;
+ sysfs_notify_dirent(cdev->state_sysfs);
+}
+
+static ssize_t
+family_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ return cdev->hw_ops->family(cdev, buf);
+}
+static DEVICE_ATTR_RO(family);
+
+static ssize_t
+stepping_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ return cdev->hw_ops->stepping(cdev, buf);
+}
+static DEVICE_ATTR_RO(stepping);
+
+static ssize_t
+state_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev || cdev->state >= MIC_LAST)
+ return -EINVAL;
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
+ cosm_state_string[cdev->state]);
+}
+
+static ssize_t
+state_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ int rc;
+
+ if (!cdev)
+ return -EINVAL;
+
+ if (sysfs_streq(buf, "boot")) {
+ rc = cosm_start(cdev);
+ goto done;
+ }
+ if (sysfs_streq(buf, "reset")) {
+ rc = cosm_reset(cdev);
+ goto done;
+ }
+
+ if (sysfs_streq(buf, "shutdown")) {
+ rc = cosm_shutdown(cdev);
+ goto done;
+ }
+ rc = -EINVAL;
+done:
+ if (rc)
+ count = rc;
+ return count;
+}
+static DEVICE_ATTR_RW(state);
+
+static ssize_t shutdown_status_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev || cdev->shutdown_status >= MIC_STATUS_LAST)
+ return -EINVAL;
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
+ cosm_shutdown_status_string[cdev->shutdown_status]);
+}
+static DEVICE_ATTR_RO(shutdown_status);
+
+static ssize_t
+heartbeat_enable_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", cdev->sysfs_heartbeat_enable);
+}
+
+static ssize_t
+heartbeat_enable_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ int enable;
+ int ret;
+
+ if (!cdev)
+ return -EINVAL;
+
+ mutex_lock(&cdev->cosm_mutex);
+ ret = kstrtoint(buf, 10, &enable);
+ if (ret)
+ goto unlock;
+
+ cdev->sysfs_heartbeat_enable = enable;
+ /* if state is not online, cdev->heartbeat_watchdog_enable is 0 */
+ if (cdev->state == MIC_ONLINE)
+ cdev->heartbeat_watchdog_enable = enable;
+ ret = count;
+unlock:
+ mutex_unlock(&cdev->cosm_mutex);
+ return ret;
+}
+static DEVICE_ATTR_RW(heartbeat_enable);
+
+static ssize_t
+cmdline_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ char *cmdline;
+
+ if (!cdev)
+ return -EINVAL;
+
+ cmdline = cdev->cmdline;
+
+ if (cmdline)
+ return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline);
+ return 0;
+}
+
+static ssize_t
+cmdline_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ mutex_lock(&cdev->cosm_mutex);
+ kfree(cdev->cmdline);
+
+ cdev->cmdline = kmalloc(count + 1, GFP_KERNEL);
+ if (!cdev->cmdline) {
+ count = -ENOMEM;
+ goto unlock;
+ }
+
+ strncpy(cdev->cmdline, buf, count);
+
+ if (cdev->cmdline[count - 1] == '\n')
+ cdev->cmdline[count - 1] = '\0';
+ else
+ cdev->cmdline[count] = '\0';
+unlock:
+ mutex_unlock(&cdev->cosm_mutex);
+ return count;
+}
+static DEVICE_ATTR_RW(cmdline);
+
+static ssize_t
+firmware_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ char *firmware;
+
+ if (!cdev)
+ return -EINVAL;
+
+ firmware = cdev->firmware;
+
+ if (firmware)
+ return scnprintf(buf, PAGE_SIZE, "%s\n", firmware);
+ return 0;
+}
+
+static ssize_t
+firmware_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ mutex_lock(&cdev->cosm_mutex);
+ kfree(cdev->firmware);
+
+ cdev->firmware = kmalloc(count + 1, GFP_KERNEL);
+ if (!cdev->firmware) {
+ count = -ENOMEM;
+ goto unlock;
+ }
+ strncpy(cdev->firmware, buf, count);
+
+ if (cdev->firmware[count - 1] == '\n')
+ cdev->firmware[count - 1] = '\0';
+ else
+ cdev->firmware[count] = '\0';
+unlock:
+ mutex_unlock(&cdev->cosm_mutex);
+ return count;
+}
+static DEVICE_ATTR_RW(firmware);
+
+static ssize_t
+ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ char *ramdisk;
+
+ if (!cdev)
+ return -EINVAL;
+
+ ramdisk = cdev->ramdisk;
+
+ if (ramdisk)
+ return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk);
+ return 0;
+}
+
+static ssize_t
+ramdisk_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ mutex_lock(&cdev->cosm_mutex);
+ kfree(cdev->ramdisk);
+
+ cdev->ramdisk = kmalloc(count + 1, GFP_KERNEL);
+ if (!cdev->ramdisk) {
+ count = -ENOMEM;
+ goto unlock;
+ }
+
+ strncpy(cdev->ramdisk, buf, count);
+
+ if (cdev->ramdisk[count - 1] == '\n')
+ cdev->ramdisk[count - 1] = '\0';
+ else
+ cdev->ramdisk[count] = '\0';
+unlock:
+ mutex_unlock(&cdev->cosm_mutex);
+ return count;
+}
+static DEVICE_ATTR_RW(ramdisk);
+
+static ssize_t
+bootmode_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ char *bootmode;
+
+ if (!cdev)
+ return -EINVAL;
+
+ bootmode = cdev->bootmode;
+
+ if (bootmode)
+ return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode);
+ return 0;
+}
+
+static ssize_t
+bootmode_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "flash"))
+ return -EINVAL;
+
+ mutex_lock(&cdev->cosm_mutex);
+ kfree(cdev->bootmode);
+
+ cdev->bootmode = kmalloc(count + 1, GFP_KERNEL);
+ if (!cdev->bootmode) {
+ count = -ENOMEM;
+ goto unlock;
+ }
+
+ strncpy(cdev->bootmode, buf, count);
+
+ if (cdev->bootmode[count - 1] == '\n')
+ cdev->bootmode[count - 1] = '\0';
+ else
+ cdev->bootmode[count] = '\0';
+unlock:
+ mutex_unlock(&cdev->cosm_mutex);
+ return count;
+}
+static DEVICE_ATTR_RW(bootmode);
+
+static ssize_t
+log_buf_addr_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_addr);
+}
+
+static ssize_t
+log_buf_addr_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ int ret;
+ unsigned long addr;
+
+ if (!cdev)
+ return -EINVAL;
+
+ ret = kstrtoul(buf, 16, &addr);
+ if (ret)
+ goto exit;
+
+ cdev->log_buf_addr = (void *)addr;
+ ret = count;
+exit:
+ return ret;
+}
+static DEVICE_ATTR_RW(log_buf_addr);
+
+static ssize_t
+log_buf_len_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+
+ if (!cdev)
+ return -EINVAL;
+
+ return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_len);
+}
+
+static ssize_t
+log_buf_len_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cosm_device *cdev = dev_get_drvdata(dev);
+ int ret;
+ unsigned long addr;
+
+ if (!cdev)
+ return -EINVAL;
+
+ ret = kstrtoul(buf, 16, &addr);
+ if (ret)
+ goto exit;
+
+ cdev->log_buf_len = (int *)addr;
+ ret = count;
+exit:
+ return ret;
+}
+static DEVICE_ATTR_RW(log_buf_len);
+
+static struct attribute *cosm_default_attrs[] = {
+ &dev_attr_family.attr,
+ &dev_attr_stepping.attr,
+ &dev_attr_state.attr,
+ &dev_attr_shutdown_status.attr,
+ &dev_attr_heartbeat_enable.attr,
+ &dev_attr_cmdline.attr,
+ &dev_attr_firmware.attr,
+ &dev_attr_ramdisk.attr,
+ &dev_attr_bootmode.attr,
+ &dev_attr_log_buf_addr.attr,
+ &dev_attr_log_buf_len.attr,
+
+ NULL
+};
+
+ATTRIBUTE_GROUPS(cosm_default);
+
+void cosm_sysfs_init(struct cosm_device *cdev)
+{
+ cdev->attr_group = cosm_default_groups;
+}
diff --git a/drivers/misc/mic/cosm_client/Makefile b/drivers/misc/mic/cosm_client/Makefile
new file mode 100644
index 000000000000..6f751a519a09
--- /dev/null
+++ b/drivers/misc/mic/cosm_client/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile - Intel MIC COSM Client Driver
+# Copyright(c) 2015, Intel Corporation.
+#
+obj-$(CONFIG_MIC_COSM) += cosm_client.o
+
+cosm_client-objs += cosm_scif_client.o
diff --git a/drivers/misc/mic/cosm_client/cosm_scif_client.c b/drivers/misc/mic/cosm_client/cosm_scif_client.c
new file mode 100644
index 000000000000..03e98bf1ac15
--- /dev/null
+++ b/drivers/misc/mic/cosm_client/cosm_scif_client.c
@@ -0,0 +1,275 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC COSM Client Driver
+ *
+ */
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/kthread.h>
+#include "../cosm/cosm_main.h"
+
+#define COSM_SCIF_MAX_RETRIES 10
+#define COSM_HEARTBEAT_SEND_MSEC (COSM_HEARTBEAT_SEND_SEC * MSEC_PER_SEC)
+
+static struct task_struct *client_thread;
+static scif_epd_t client_epd;
+static struct scif_peer_dev *client_spdev;
+
+/*
+ * Reboot notifier: receives shutdown status from the OS and communicates it
+ * back to the COSM process on the host
+ */
+static int cosm_reboot_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN_STATUS };
+ int rc;
+
+ event = (event == SYS_RESTART) ? SYSTEM_RESTART : event;
+ dev_info(&client_spdev->dev, "%s %d received event %ld\n",
+ __func__, __LINE__, event);
+
+ msg.shutdown_status = event;
+ rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+ if (rc < 0)
+ dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n",
+ __func__, __LINE__, rc);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block cosm_reboot = {
+ .notifier_call = cosm_reboot_event,
+};
+
+/* Set system time from timespec value received from the host */
+static void cosm_set_time(struct cosm_msg *msg)
+{
+ int rc = do_settimeofday64(&msg->timespec);
+
+ if (rc)
+ dev_err(&client_spdev->dev, "%s: %d settimeofday rc %d\n",
+ __func__, __LINE__, rc);
+}
+
+/* COSM client receive message processing */
+static void cosm_client_recv(void)
+{
+ struct cosm_msg msg;
+ int rc;
+
+ while (1) {
+ rc = scif_recv(client_epd, &msg, sizeof(msg), 0);
+ if (!rc) {
+ return;
+ } else if (rc < 0) {
+ dev_err(&client_spdev->dev, "%s: %d rc %d\n",
+ __func__, __LINE__, rc);
+ return;
+ }
+
+ dev_dbg(&client_spdev->dev, "%s: %d rc %d id 0x%llx\n",
+ __func__, __LINE__, rc, msg.id);
+
+ switch (msg.id) {
+ case COSM_MSG_SYNC_TIME:
+ cosm_set_time(&msg);
+ break;
+ case COSM_MSG_SHUTDOWN:
+ orderly_poweroff(true);
+ break;
+ default:
+ dev_err(&client_spdev->dev, "%s: %d unknown id %lld\n",
+ __func__, __LINE__, msg.id);
+ break;
+ }
+ }
+}
+
+/* Initiate connection to the COSM server on the host */
+static int cosm_scif_connect(void)
+{
+ struct scif_port_id port_id;
+ int i, rc;
+
+ client_epd = scif_open();
+ if (!client_epd) {
+ dev_err(&client_spdev->dev, "%s %d scif_open failed\n",
+ __func__, __LINE__);
+ return -ENOMEM;
+ }
+
+ port_id.node = 0;
+ port_id.port = SCIF_COSM_LISTEN_PORT;
+
+ for (i = 0; i < COSM_SCIF_MAX_RETRIES; i++) {
+ rc = scif_connect(client_epd, &port_id);
+ if (rc < 0)
+ msleep(1000);
+ else
+ break;
+ }
+
+ if (rc < 0) {
+ dev_err(&client_spdev->dev, "%s %d scif_connect rc %d\n",
+ __func__, __LINE__, rc);
+ scif_close(client_epd);
+ client_epd = NULL;
+ }
+ return rc < 0 ? rc : 0;
+}
+
+/* Close host SCIF connection */
+static void cosm_scif_connect_exit(void)
+{
+ if (client_epd) {
+ scif_close(client_epd);
+ client_epd = NULL;
+ }
+}
+
+/*
+ * COSM SCIF client thread function: waits for messages from the host and sends
+ * a heartbeat to the host
+ */
+static int cosm_scif_client(void *unused)
+{
+ struct cosm_msg msg = { .id = COSM_MSG_HEARTBEAT };
+ struct scif_pollepd pollepd;
+ int rc;
+
+ allow_signal(SIGKILL);
+
+ while (!kthread_should_stop()) {
+ pollepd.epd = client_epd;
+ pollepd.events = POLLIN;
+
+ rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_SEND_MSEC);
+ if (rc < 0) {
+ if (-EINTR != rc)
+ dev_err(&client_spdev->dev,
+ "%s %d scif_poll rc %d\n",
+ __func__, __LINE__, rc);
+ continue;
+ }
+
+ if (pollepd.revents & POLLIN)
+ cosm_client_recv();
+
+ msg.id = COSM_MSG_HEARTBEAT;
+ rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+ if (rc < 0)
+ dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n",
+ __func__, __LINE__, rc);
+ }
+
+ dev_dbg(&client_spdev->dev, "%s %d Client thread stopped\n",
+ __func__, __LINE__);
+ return 0;
+}
+
+static void cosm_scif_probe(struct scif_peer_dev *spdev)
+{
+ int rc;
+
+ dev_dbg(&spdev->dev, "%s %d: dnode %d\n",
+ __func__, __LINE__, spdev->dnode);
+
+ /* We are only interested in the host with spdev->dnode == 0 */
+ if (spdev->dnode)
+ return;
+
+ client_spdev = spdev;
+ rc = cosm_scif_connect();
+ if (rc)
+ goto exit;
+
+ rc = register_reboot_notifier(&cosm_reboot);
+ if (rc) {
+ dev_err(&spdev->dev,
+ "reboot notifier registration failed rc %d\n", rc);
+ goto connect_exit;
+ }
+
+ client_thread = kthread_run(cosm_scif_client, NULL, "cosm_client");
+ if (IS_ERR(client_thread)) {
+ rc = PTR_ERR(client_thread);
+ dev_err(&spdev->dev, "%s %d kthread_run rc %d\n",
+ __func__, __LINE__, rc);
+ goto unreg_reboot;
+ }
+ return;
+unreg_reboot:
+ unregister_reboot_notifier(&cosm_reboot);
+connect_exit:
+ cosm_scif_connect_exit();
+exit:
+ client_spdev = NULL;
+}
+
+static void cosm_scif_remove(struct scif_peer_dev *spdev)
+{
+ int rc;
+
+ dev_dbg(&spdev->dev, "%s %d: dnode %d\n",
+ __func__, __LINE__, spdev->dnode);
+
+ if (spdev->dnode)
+ return;
+
+ if (!IS_ERR_OR_NULL(client_thread)) {
+ rc = send_sig(SIGKILL, client_thread, 0);
+ if (rc) {
+ pr_err("%s %d send_sig rc %d\n",
+ __func__, __LINE__, rc);
+ return;
+ }
+ kthread_stop(client_thread);
+ }
+ unregister_reboot_notifier(&cosm_reboot);
+ cosm_scif_connect_exit();
+ client_spdev = NULL;
+}
+
+static struct scif_client scif_client_cosm = {
+ .name = KBUILD_MODNAME,
+ .probe = cosm_scif_probe,
+ .remove = cosm_scif_remove,
+};
+
+static int __init cosm_client_init(void)
+{
+ int rc = scif_client_register(&scif_client_cosm);
+
+ if (rc)
+ pr_err("scif_client_register failed rc %d\n", rc);
+ return rc;
+}
+
+static void __exit cosm_client_exit(void)
+{
+ scif_client_unregister(&scif_client_cosm);
+}
+
+module_init(cosm_client_init);
+module_exit(cosm_client_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC card OS state management client driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/host/Makefile b/drivers/misc/mic/host/Makefile
index c2197f999394..004d3db0f990 100644
--- a/drivers/misc/mic/host/Makefile
+++ b/drivers/misc/mic/host/Makefile
@@ -5,7 +5,6 @@
obj-$(CONFIG_INTEL_MIC_HOST) += mic_host.o
mic_host-objs := mic_main.o
mic_host-objs += mic_x100.o
-mic_host-objs += mic_sysfs.o
mic_host-objs += mic_smpt.o
mic_host-objs += mic_intr.o
mic_host-objs += mic_boot.o
diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c
index e5f6a5e7bca1..7845564dff64 100644
--- a/drivers/misc/mic/host/mic_boot.c
+++ b/drivers/misc/mic/host/mic_boot.c
@@ -22,9 +22,9 @@
#include <linux/firmware.h>
#include <linux/pci.h>
#include <linux/kmod.h>
-
#include <linux/mic_common.h>
#include <linux/mic_bus.h>
+#include "../bus/scif_bus.h"
#include "../common/mic_dev.h"
#include "mic_device.h"
#include "mic_smpt.h"
@@ -99,7 +99,7 @@ static int __mic_dma_map_sg(struct device *dev, struct scatterlist *sg,
int i, j, ret;
dma_addr_t da;
- ret = dma_map_sg(mdev->sdev->parent, sg, nents, dir);
+ ret = dma_map_sg(&mdev->pdev->dev, sg, nents, dir);
if (ret <= 0)
return 0;
@@ -115,7 +115,7 @@ err:
mic_unmap(mdev, sg_dma_address(s), s->length);
sg_dma_address(s) = mic_to_dma_addr(mdev, sg_dma_address(s));
}
- dma_unmap_sg(mdev->sdev->parent, sg, nents, dir);
+ dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir);
return 0;
}
@@ -135,7 +135,7 @@ static void __mic_dma_unmap_sg(struct device *dev,
mic_unmap(mdev, sg_dma_address(s), s->length);
sg_dma_address(s) = da;
}
- dma_unmap_sg(mdev->sdev->parent, sg, nents, dir);
+ dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir);
}
static struct dma_map_ops __mic_dma_ops = {
@@ -270,48 +270,13 @@ static struct mbus_hw_ops mbus_hw_ops = {
.ack_interrupt = _mic_ack_interrupt,
};
-/**
- * mic_reset - Reset the MIC device.
- * @mdev: pointer to mic_device instance
- */
-static void mic_reset(struct mic_device *mdev)
-{
- int i;
-
-#define MIC_RESET_TO (45)
-
- reinit_completion(&mdev->reset_wait);
- mdev->ops->reset_fw_ready(mdev);
- mdev->ops->reset(mdev);
-
- for (i = 0; i < MIC_RESET_TO; i++) {
- if (mdev->ops->is_fw_ready(mdev))
- goto done;
- /*
- * Resets typically take 10s of seconds to complete.
- * Since an MMIO read is required to check if the
- * firmware is ready or not, a 1 second delay works nicely.
- */
- msleep(1000);
- }
- mic_set_state(mdev, MIC_RESET_FAILED);
-done:
- complete_all(&mdev->reset_wait);
-}
-
/* Initialize the MIC bootparams */
void mic_bootparam_init(struct mic_device *mdev)
{
struct mic_bootparam *bootparam = mdev->dp;
bootparam->magic = cpu_to_le32(MIC_MAGIC);
- bootparam->c2h_shutdown_db = mdev->shutdown_db;
- bootparam->h2c_shutdown_db = -1;
bootparam->h2c_config_db = -1;
- bootparam->shutdown_status = 0;
- bootparam->shutdown_card = 0;
- /* Total nodes = number of MICs + 1 for self node */
- bootparam->tot_nodes = atomic_read(&g_num_mics) + 1;
bootparam->node_id = mdev->id + 1;
bootparam->scif_host_dma_addr = 0x0;
bootparam->scif_card_dma_addr = 0x0;
@@ -319,6 +284,26 @@ void mic_bootparam_init(struct mic_device *mdev)
bootparam->h2c_scif_db = -1;
}
+static inline struct mic_device *cosmdev_to_mdev(struct cosm_device *cdev)
+{
+ return dev_get_drvdata(cdev->dev.parent);
+}
+
+static void _mic_reset(struct cosm_device *cdev)
+{
+ struct mic_device *mdev = cosmdev_to_mdev(cdev);
+
+ mdev->ops->reset_fw_ready(mdev);
+ mdev->ops->reset(mdev);
+}
+
+static bool _mic_ready(struct cosm_device *cdev)
+{
+ struct mic_device *mdev = cosmdev_to_mdev(cdev);
+
+ return mdev->ops->is_fw_ready(mdev);
+}
+
/**
* mic_request_dma_chans - Request DMA channels
* @mdev: pointer to mic_device instance
@@ -336,14 +321,14 @@ static int mic_request_dma_chans(struct mic_device *mdev)
do {
chan = dma_request_channel(mask, mdev->ops->dma_filter,
- mdev->sdev->parent);
+ &mdev->pdev->dev);
if (chan) {
mdev->dma_ch[mdev->num_dma_ch++] = chan;
if (mdev->num_dma_ch >= MIC_MAX_DMA_CHAN)
break;
}
} while (chan);
- dev_info(mdev->sdev->parent, "DMA channels # %d\n", mdev->num_dma_ch);
+ dev_info(&mdev->pdev->dev, "DMA channels # %d\n", mdev->num_dma_ch);
return mdev->num_dma_ch;
}
@@ -365,34 +350,24 @@ static void mic_free_dma_chans(struct mic_device *mdev)
}
/**
- * mic_start - Start the MIC.
- * @mdev: pointer to mic_device instance
- * @buf: buffer containing boot string including firmware/ramdisk path.
+ * _mic_start - Start the MIC.
+ * @cdev: pointer to cosm_device instance
+ * @id: MIC device id/index provided by COSM used in other drivers like SCIF
*
* This function prepares an MIC for boot and initiates boot.
* RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ *
+ * For all cosm_hw_ops the caller holds a mutex to ensure serialization.
*/
-int mic_start(struct mic_device *mdev, const char *buf)
+static int _mic_start(struct cosm_device *cdev, int id)
{
+ struct mic_device *mdev = cosmdev_to_mdev(cdev);
int rc;
- mutex_lock(&mdev->mic_mutex);
+
mic_bootparam_init(mdev);
-retry:
- if (MIC_OFFLINE != mdev->state) {
- rc = -EINVAL;
- goto unlock_ret;
- }
- if (!mdev->ops->is_fw_ready(mdev)) {
- mic_reset(mdev);
- /*
- * The state will either be MIC_OFFLINE if the reset succeeded
- * or MIC_RESET_FAILED if the firmware reset failed.
- */
- goto retry;
- }
- mdev->dma_mbdev = mbus_register_device(mdev->sdev->parent,
+ mdev->dma_mbdev = mbus_register_device(&mdev->pdev->dev,
MBUS_DEV_DMA_HOST, &mic_dma_ops,
- &mbus_hw_ops, mdev->mmio.va);
+ &mbus_hw_ops, id, mdev->mmio.va);
if (IS_ERR(mdev->dma_mbdev)) {
rc = PTR_ERR(mdev->dma_mbdev);
goto unlock_ret;
@@ -401,16 +376,18 @@ retry:
rc = -ENODEV;
goto dma_remove;
}
- mdev->scdev = scif_register_device(mdev->sdev->parent, MIC_SCIF_DEV,
+ mdev->scdev = scif_register_device(&mdev->pdev->dev, MIC_SCIF_DEV,
&__mic_dma_ops, &scif_hw_ops,
- mdev->id + 1, 0, &mdev->mmio,
+ id + 1, 0, &mdev->mmio,
&mdev->aper, mdev->dp, NULL,
- mdev->dma_ch, mdev->num_dma_ch);
+ mdev->dma_ch, mdev->num_dma_ch,
+ true);
if (IS_ERR(mdev->scdev)) {
rc = PTR_ERR(mdev->scdev);
goto dma_free;
}
- rc = mdev->ops->load_mic_fw(mdev, buf);
+
+ rc = mdev->ops->load_mic_fw(mdev, NULL);
if (rc)
goto scif_remove;
mic_smpt_restore(mdev);
@@ -419,7 +396,6 @@ retry:
mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr);
mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32);
mdev->ops->send_firmware_intr(mdev);
- mic_set_state(mdev, MIC_ONLINE);
goto unlock_ret;
scif_remove:
scif_unregister_device(mdev->scdev);
@@ -428,198 +404,79 @@ dma_free:
dma_remove:
mbus_unregister_device(mdev->dma_mbdev);
unlock_ret:
- mutex_unlock(&mdev->mic_mutex);
return rc;
}
/**
- * mic_stop - Prepare the MIC for reset and trigger reset.
- * @mdev: pointer to mic_device instance
+ * _mic_stop - Prepare the MIC for reset and trigger reset.
+ * @cdev: pointer to cosm_device instance
* @force: force a MIC to reset even if it is already offline.
*
* RETURNS: None.
*/
-void mic_stop(struct mic_device *mdev, bool force)
-{
- mutex_lock(&mdev->mic_mutex);
- if (MIC_OFFLINE != mdev->state || force) {
- scif_unregister_device(mdev->scdev);
- mic_virtio_reset_devices(mdev);
- mic_free_dma_chans(mdev);
- mbus_unregister_device(mdev->dma_mbdev);
- mic_bootparam_init(mdev);
- mic_reset(mdev);
- if (MIC_RESET_FAILED == mdev->state)
- goto unlock;
- mic_set_shutdown_status(mdev, MIC_NOP);
- if (MIC_SUSPENDED != mdev->state)
- mic_set_state(mdev, MIC_OFFLINE);
- }
-unlock:
- mutex_unlock(&mdev->mic_mutex);
-}
-
-/**
- * mic_shutdown - Initiate MIC shutdown.
- * @mdev: pointer to mic_device instance
- *
- * RETURNS: None.
- */
-void mic_shutdown(struct mic_device *mdev)
+static void _mic_stop(struct cosm_device *cdev, bool force)
{
- struct mic_bootparam *bootparam = mdev->dp;
- s8 db = bootparam->h2c_shutdown_db;
-
- mutex_lock(&mdev->mic_mutex);
- if (MIC_ONLINE == mdev->state && db != -1) {
- bootparam->shutdown_card = 1;
- mdev->ops->send_intr(mdev, db);
- mic_set_state(mdev, MIC_SHUTTING_DOWN);
- }
- mutex_unlock(&mdev->mic_mutex);
-}
-
-/**
- * mic_shutdown_work - Handle shutdown interrupt from MIC.
- * @work: The work structure.
- *
- * This work is scheduled whenever the host has received a shutdown
- * interrupt from the MIC.
- */
-void mic_shutdown_work(struct work_struct *work)
-{
- struct mic_device *mdev = container_of(work, struct mic_device,
- shutdown_work);
- struct mic_bootparam *bootparam = mdev->dp;
-
- mutex_lock(&mdev->mic_mutex);
- mic_set_shutdown_status(mdev, bootparam->shutdown_status);
- bootparam->shutdown_status = 0;
+ struct mic_device *mdev = cosmdev_to_mdev(cdev);
/*
- * if state is MIC_SUSPENDED, OSPM suspend is in progress. We do not
- * change the state here so as to prevent users from booting the card
- * during and after the suspend operation.
+ * Since SCIF handles card shutdown and reset (using COSM), it will
+ * will be the first to be registered and the last to be
+ * unregistered.
*/
- if (MIC_SHUTTING_DOWN != mdev->state &&
- MIC_SUSPENDED != mdev->state)
- mic_set_state(mdev, MIC_SHUTTING_DOWN);
- mutex_unlock(&mdev->mic_mutex);
+ mic_virtio_reset_devices(mdev);
+ scif_unregister_device(mdev->scdev);
+ mic_free_dma_chans(mdev);
+ mbus_unregister_device(mdev->dma_mbdev);
+ mic_bootparam_init(mdev);
}
-/**
- * mic_reset_trigger_work - Trigger MIC reset.
- * @work: The work structure.
- *
- * This work is scheduled whenever the host wants to reset the MIC.
- */
-void mic_reset_trigger_work(struct work_struct *work)
+static ssize_t _mic_family(struct cosm_device *cdev, char *buf)
{
- struct mic_device *mdev = container_of(work, struct mic_device,
- reset_trigger_work);
+ struct mic_device *mdev = cosmdev_to_mdev(cdev);
+ static const char *family[MIC_FAMILY_LAST] = { "x100", "Unknown" };
- mic_stop(mdev, false);
+ return scnprintf(buf, PAGE_SIZE, "%s\n", family[mdev->family]);
}
-/**
- * mic_complete_resume - Complete MIC Resume after an OSPM suspend/hibernate
- * event.
- * @mdev: pointer to mic_device instance
- *
- * RETURNS: None.
- */
-void mic_complete_resume(struct mic_device *mdev)
+static ssize_t _mic_stepping(struct cosm_device *cdev, char *buf)
{
- if (mdev->state != MIC_SUSPENDED) {
- dev_warn(mdev->sdev->parent, "state %d should be %d\n",
- mdev->state, MIC_SUSPENDED);
- return;
- }
-
- /* Make sure firmware is ready */
- if (!mdev->ops->is_fw_ready(mdev))
- mic_stop(mdev, true);
+ struct mic_device *mdev = cosmdev_to_mdev(cdev);
+ const char *string = "??";
- mutex_lock(&mdev->mic_mutex);
- mic_set_state(mdev, MIC_OFFLINE);
- mutex_unlock(&mdev->mic_mutex);
-}
-
-/**
- * mic_prepare_suspend - Handle suspend notification for the MIC device.
- * @mdev: pointer to mic_device instance
- *
- * RETURNS: None.
- */
-void mic_prepare_suspend(struct mic_device *mdev)
-{
- unsigned long timeout;
-
-#define MIC_SUSPEND_TIMEOUT (60 * HZ)
-
- mutex_lock(&mdev->mic_mutex);
- switch (mdev->state) {
- case MIC_OFFLINE:
- /*
- * Card is already offline. Set state to MIC_SUSPENDED
- * to prevent users from booting the card.
- */
- mic_set_state(mdev, MIC_SUSPENDED);
- mutex_unlock(&mdev->mic_mutex);
+ switch (mdev->stepping) {
+ case MIC_A0_STEP:
+ string = "A0";
break;
- case MIC_ONLINE:
- /*
- * Card is online. Set state to MIC_SUSPENDING and notify
- * MIC user space daemon which will issue card
- * shutdown and reset.
- */
- mic_set_state(mdev, MIC_SUSPENDING);
- mutex_unlock(&mdev->mic_mutex);
- timeout = wait_for_completion_timeout(&mdev->reset_wait,
- MIC_SUSPEND_TIMEOUT);
- /* Force reset the card if the shutdown completion timed out */
- if (!timeout) {
- mutex_lock(&mdev->mic_mutex);
- mic_set_state(mdev, MIC_SUSPENDED);
- mutex_unlock(&mdev->mic_mutex);
- mic_stop(mdev, true);
- }
+ case MIC_B0_STEP:
+ string = "B0";
+ break;
+ case MIC_B1_STEP:
+ string = "B1";
break;
- case MIC_SHUTTING_DOWN:
- /*
- * Card is shutting down. Set state to MIC_SUSPENDED
- * to prevent further boot of the card.
- */
- mic_set_state(mdev, MIC_SUSPENDED);
- mutex_unlock(&mdev->mic_mutex);
- timeout = wait_for_completion_timeout(&mdev->reset_wait,
- MIC_SUSPEND_TIMEOUT);
- /* Force reset the card if the shutdown completion timed out */
- if (!timeout)
- mic_stop(mdev, true);
+ case MIC_C0_STEP:
+ string = "C0";
break;
default:
- mutex_unlock(&mdev->mic_mutex);
break;
}
+ return scnprintf(buf, PAGE_SIZE, "%s\n", string);
}
-/**
- * mic_suspend - Initiate MIC suspend. Suspend merely issues card shutdown.
- * @mdev: pointer to mic_device instance
- *
- * RETURNS: None.
- */
-void mic_suspend(struct mic_device *mdev)
+static struct mic_mw *_mic_aper(struct cosm_device *cdev)
{
- struct mic_bootparam *bootparam = mdev->dp;
- s8 db = bootparam->h2c_shutdown_db;
+ struct mic_device *mdev = cosmdev_to_mdev(cdev);
- mutex_lock(&mdev->mic_mutex);
- if (MIC_SUSPENDING == mdev->state && db != -1) {
- bootparam->shutdown_card = 1;
- mdev->ops->send_intr(mdev, db);
- mic_set_state(mdev, MIC_SUSPENDED);
- }
- mutex_unlock(&mdev->mic_mutex);
+ return &mdev->aper;
}
+
+struct cosm_hw_ops cosm_hw_ops = {
+ .reset = _mic_reset,
+ .force_reset = _mic_reset,
+ .post_reset = NULL,
+ .ready = _mic_ready,
+ .start = _mic_start,
+ .stop = _mic_stop,
+ .family = _mic_family,
+ .stepping = _mic_stepping,
+ .aper = _mic_aper,
+};
diff --git a/drivers/misc/mic/host/mic_debugfs.c b/drivers/misc/mic/host/mic_debugfs.c
index 3c9ea4896f3c..10581600777a 100644
--- a/drivers/misc/mic/host/mic_debugfs.c
+++ b/drivers/misc/mic/host/mic_debugfs.c
@@ -31,71 +31,6 @@
/* Debugfs parent dir */
static struct dentry *mic_dbg;
-/**
- * mic_log_buf_show - Display MIC kernel log buffer.
- *
- * log_buf addr/len is read from System.map by user space
- * and populated in sysfs entries.
- */
-static int mic_log_buf_show(struct seq_file *s, void *unused)
-{
- void __iomem *log_buf_va;
- int __iomem *log_buf_len_va;
- struct mic_device *mdev = s->private;
- void *kva;
- int size;
- unsigned long aper_offset;
-
- if (!mdev || !mdev->log_buf_addr || !mdev->log_buf_len)
- goto done;
- /*
- * Card kernel will never be relocated and any kernel text/data mapping
- * can be translated to phys address by subtracting __START_KERNEL_map.
- */
- aper_offset = (unsigned long)mdev->log_buf_len - __START_KERNEL_map;
- log_buf_len_va = mdev->aper.va + aper_offset;
- aper_offset = (unsigned long)mdev->log_buf_addr - __START_KERNEL_map;
- log_buf_va = mdev->aper.va + aper_offset;
- size = ioread32(log_buf_len_va);
-
- kva = kmalloc(size, GFP_KERNEL);
- if (!kva)
- goto done;
- mutex_lock(&mdev->mic_mutex);
- memcpy_fromio(kva, log_buf_va, size);
- switch (mdev->state) {
- case MIC_ONLINE:
- /* Fall through */
- case MIC_SHUTTING_DOWN:
- seq_write(s, kva, size);
- break;
- default:
- break;
- }
- mutex_unlock(&mdev->mic_mutex);
- kfree(kva);
-done:
- return 0;
-}
-
-static int mic_log_buf_open(struct inode *inode, struct file *file)
-{
- return single_open(file, mic_log_buf_show, inode->i_private);
-}
-
-static int mic_log_buf_release(struct inode *inode, struct file *file)
-{
- return single_release(inode, file);
-}
-
-static const struct file_operations log_buf_ops = {
- .owner = THIS_MODULE,
- .open = mic_log_buf_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = mic_log_buf_release
-};
-
static int mic_smpt_show(struct seq_file *s, void *pos)
{
int i;
@@ -138,32 +73,6 @@ static const struct file_operations smpt_file_ops = {
.release = mic_smpt_debug_release
};
-static int mic_soft_reset_show(struct seq_file *s, void *pos)
-{
- struct mic_device *mdev = s->private;
-
- mic_stop(mdev, true);
- return 0;
-}
-
-static int mic_soft_reset_debug_open(struct inode *inode, struct file *file)
-{
- return single_open(file, mic_soft_reset_show, inode->i_private);
-}
-
-static int mic_soft_reset_debug_release(struct inode *inode, struct file *file)
-{
- return single_release(inode, file);
-}
-
-static const struct file_operations soft_reset_ops = {
- .owner = THIS_MODULE,
- .open = mic_soft_reset_debug_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = mic_soft_reset_debug_release
-};
-
static int mic_post_code_show(struct seq_file *s, void *pos)
{
struct mic_device *mdev = s->private;
@@ -204,18 +113,8 @@ static int mic_dp_show(struct seq_file *s, void *pos)
seq_printf(s, "Bootparam: magic 0x%x\n",
bootparam->magic);
- seq_printf(s, "Bootparam: h2c_shutdown_db %d\n",
- bootparam->h2c_shutdown_db);
seq_printf(s, "Bootparam: h2c_config_db %d\n",
bootparam->h2c_config_db);
- seq_printf(s, "Bootparam: c2h_shutdown_db %d\n",
- bootparam->c2h_shutdown_db);
- seq_printf(s, "Bootparam: shutdown_status %d\n",
- bootparam->shutdown_status);
- seq_printf(s, "Bootparam: shutdown_card %d\n",
- bootparam->shutdown_card);
- seq_printf(s, "Bootparam: tot_nodes %d\n",
- bootparam->tot_nodes);
seq_printf(s, "Bootparam: node_id %d\n",
bootparam->node_id);
seq_printf(s, "Bootparam: c2h_scif_db %d\n",
@@ -392,8 +291,7 @@ static int mic_msi_irq_info_show(struct seq_file *s, void *pos)
int i, j;
u16 entry;
u16 vector;
- struct pci_dev *pdev = container_of(mdev->sdev->parent,
- struct pci_dev, dev);
+ struct pci_dev *pdev = mdev->pdev;
if (pci_dev_msi_enabled(pdev)) {
for (i = 0; i < mdev->irq_info.num_vectors; i++) {
@@ -454,20 +352,18 @@ static const struct file_operations msi_irq_info_ops = {
*/
void mic_create_debug_dir(struct mic_device *mdev)
{
+ char name[16];
+
if (!mic_dbg)
return;
- mdev->dbg_dir = debugfs_create_dir(dev_name(mdev->sdev), mic_dbg);
+ scnprintf(name, sizeof(name), "mic%d", mdev->id);
+ mdev->dbg_dir = debugfs_create_dir(name, mic_dbg);
if (!mdev->dbg_dir)
return;
- debugfs_create_file("log_buf", 0444, mdev->dbg_dir, mdev, &log_buf_ops);
-
debugfs_create_file("smpt", 0444, mdev->dbg_dir, mdev, &smpt_file_ops);
- debugfs_create_file("soft_reset", 0444, mdev->dbg_dir, mdev,
- &soft_reset_ops);
-
debugfs_create_file("post_code", 0444, mdev->dbg_dir, mdev,
&post_code_ops);
diff --git a/drivers/misc/mic/host/mic_device.h b/drivers/misc/mic/host/mic_device.h
index 01a7555aa648..461184a12fbb 100644
--- a/drivers/misc/mic/host/mic_device.h
+++ b/drivers/misc/mic/host/mic_device.h
@@ -26,21 +26,12 @@
#include <linux/notifier.h>
#include <linux/irqreturn.h>
#include <linux/dmaengine.h>
+#include <linux/miscdevice.h>
#include <linux/mic_bus.h>
#include "../bus/scif_bus.h"
+#include "../bus/cosm_bus.h"
#include "mic_intr.h"
-/* The maximum number of MIC devices supported in a single host system. */
-#define MIC_MAX_NUM_DEVS 256
-
-/**
- * enum mic_hw_family - The hardware family to which a device belongs.
- */
-enum mic_hw_family {
- MIC_FAMILY_X100 = 0,
- MIC_FAMILY_UNKNOWN
-};
-
/**
* enum mic_stepping - MIC stepping ids.
*/
@@ -51,6 +42,8 @@ enum mic_stepping {
MIC_C0_STEP = 0x20,
};
+extern struct cosm_hw_ops cosm_hw_ops;
+
/**
* struct mic_device - MIC device information for each card.
*
@@ -60,8 +53,7 @@ enum mic_stepping {
* @ops: MIC HW specific operations.
* @id: The unique device id for this MIC device.
* @stepping: Stepping ID.
- * @attr_group: Pointer to list of sysfs attribute groups.
- * @sdev: Device for sysfs entries.
+ * @pdev: Underlying PCI device.
* @mic_mutex: Mutex for synchronizing access to mic_device.
* @intr_ops: HW specific interrupt operations.
* @smpt_ops: Hardware specific SMPT operations.
@@ -69,30 +61,17 @@ enum mic_stepping {
* @intr_info: H/W specific interrupt information.
* @irq_info: The OS specific irq information
* @dbg_dir: debugfs directory of this MIC device.
- * @cmdline: Kernel command line.
- * @firmware: Firmware file name.
- * @ramdisk: Ramdisk file name.
- * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates.
* @bootaddr: MIC boot address.
- * @reset_trigger_work: Work for triggering reset requests.
- * @shutdown_work: Work for handling shutdown interrupts.
- * @state: MIC state.
- * @shutdown_status: MIC status reported by card for shutdown/crashes.
- * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes.
- * @reset_wait: Waitqueue for sleeping while reset completes.
- * @log_buf_addr: Log buffer address for MIC.
- * @log_buf_len: Log buffer length address for MIC.
* @dp: virtio device page
* @dp_dma_addr: virtio device page DMA address.
- * @shutdown_db: shutdown doorbell.
- * @shutdown_cookie: shutdown cookie.
- * @cdev: Character device for MIC.
+ * @name: name for the misc char device
+ * @miscdev: registered misc char device
* @vdev_list: list of virtio devices.
- * @pm_notifier: Handles PM notifications from the OS.
* @dma_mbdev: MIC BUS DMA device.
* @dma_ch - Array of DMA channels
* @num_dma_ch - Number of DMA channels available
* @scdev: SCIF device on the SCIF virtual bus.
+ * @cosm_dev: COSM device
*/
struct mic_device {
struct mic_mw mmio;
@@ -101,8 +80,7 @@ struct mic_device {
struct mic_hw_ops *ops;
int id;
enum mic_stepping stepping;
- const struct attribute_group **attr_group;
- struct device *sdev;
+ struct pci_dev *pdev;
struct mutex mic_mutex;
struct mic_hw_intr_ops *intr_ops;
struct mic_smpt_ops *smpt_ops;
@@ -110,30 +88,17 @@ struct mic_device {
struct mic_intr_info *intr_info;
struct mic_irq_info irq_info;
struct dentry *dbg_dir;
- char *cmdline;
- char *firmware;
- char *ramdisk;
- char *bootmode;
u32 bootaddr;
- struct work_struct reset_trigger_work;
- struct work_struct shutdown_work;
- u8 state;
- u8 shutdown_status;
- struct kernfs_node *state_sysfs;
- struct completion reset_wait;
- void *log_buf_addr;
- int *log_buf_len;
void *dp;
dma_addr_t dp_dma_addr;
- int shutdown_db;
- struct mic_irq *shutdown_cookie;
- struct cdev cdev;
+ char name[16];
+ struct miscdevice miscdev;
struct list_head vdev_list;
- struct notifier_block pm_notifier;
struct mbus_device *dma_mbdev;
struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN];
int num_dma_ch;
struct scif_hw_dev *scdev;
+ struct cosm_device *cosm_dev;
};
/**
@@ -199,38 +164,9 @@ mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset)
iowrite32(val, mw->va + offset);
}
-static inline struct dma_chan *mic_request_dma_chan(struct mic_device *mdev)
-{
- dma_cap_mask_t mask;
- struct dma_chan *chan;
-
- dma_cap_zero(mask);
- dma_cap_set(DMA_MEMCPY, mask);
- chan = dma_request_channel(mask, mdev->ops->dma_filter,
- mdev->sdev->parent);
- if (chan)
- return chan;
- dev_err(mdev->sdev->parent, "%s %d unable to acquire channel\n",
- __func__, __LINE__);
- return NULL;
-}
-
-void mic_sysfs_init(struct mic_device *mdev);
-int mic_start(struct mic_device *mdev, const char *buf);
-void mic_stop(struct mic_device *mdev, bool force);
-void mic_shutdown(struct mic_device *mdev);
-void mic_reset_delayed_work(struct work_struct *work);
-void mic_reset_trigger_work(struct work_struct *work);
-void mic_shutdown_work(struct work_struct *work);
void mic_bootparam_init(struct mic_device *mdev);
-void mic_set_state(struct mic_device *mdev, u8 state);
-void mic_set_shutdown_status(struct mic_device *mdev, u8 status);
void mic_create_debug_dir(struct mic_device *dev);
void mic_delete_debug_dir(struct mic_device *dev);
void __init mic_init_debugfs(void);
void mic_exit_debugfs(void);
-void mic_prepare_suspend(struct mic_device *mdev);
-void mic_complete_resume(struct mic_device *mdev);
-void mic_suspend(struct mic_device *mdev);
-extern atomic_t g_num_mics;
#endif
diff --git a/drivers/misc/mic/host/mic_fops.c b/drivers/misc/mic/host/mic_fops.c
index 85776d7327f3..8cc1d90cd949 100644
--- a/drivers/misc/mic/host/mic_fops.c
+++ b/drivers/misc/mic/host/mic_fops.c
@@ -30,8 +30,8 @@
int mic_open(struct inode *inode, struct file *f)
{
struct mic_vdev *mvdev;
- struct mic_device *mdev = container_of(inode->i_cdev,
- struct mic_device, cdev);
+ struct mic_device *mdev = container_of(f->private_data,
+ struct mic_device, miscdev);
mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL);
if (!mvdev)
diff --git a/drivers/misc/mic/host/mic_intr.c b/drivers/misc/mic/host/mic_intr.c
index b4ca6c884d19..08ca3e372fa4 100644
--- a/drivers/misc/mic/host/mic_intr.c
+++ b/drivers/misc/mic/host/mic_intr.c
@@ -30,8 +30,7 @@ static irqreturn_t mic_thread_fn(int irq, void *dev)
struct mic_intr_info *intr_info = mdev->intr_info;
struct mic_irq_info *irq_info = &mdev->irq_info;
struct mic_intr_cb *intr_cb;
- struct pci_dev *pdev = container_of(mdev->sdev->parent,
- struct pci_dev, dev);
+ struct pci_dev *pdev = mdev->pdev;
int i;
spin_lock(&irq_info->mic_thread_lock);
@@ -57,8 +56,7 @@ static irqreturn_t mic_interrupt(int irq, void *dev)
struct mic_intr_info *intr_info = mdev->intr_info;
struct mic_irq_info *irq_info = &mdev->irq_info;
struct mic_intr_cb *intr_cb;
- struct pci_dev *pdev = container_of(mdev->sdev->parent,
- struct pci_dev, dev);
+ struct pci_dev *pdev = mdev->pdev;
u32 mask;
int i;
@@ -83,7 +81,7 @@ static irqreturn_t mic_interrupt(int irq, void *dev)
/* Return the interrupt offset from the index. Index is 0 based. */
static u16 mic_map_src_to_offset(struct mic_device *mdev,
- int intr_src, enum mic_intr_type type)
+ int intr_src, enum mic_intr_type type)
{
if (type >= MIC_NUM_INTR_TYPES)
return MIC_NUM_OFFSETS;
@@ -214,7 +212,7 @@ static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev)
mdev->irq_info.msix_entries[i].entry = i;
rc = pci_enable_msix_exact(pdev, mdev->irq_info.msix_entries,
- MIC_MIN_MSIX);
+ MIC_MIN_MSIX);
if (rc) {
dev_dbg(&pdev->dev, "Error enabling MSIx. rc = %d\n", rc);
goto err_enable_msix;
@@ -229,7 +227,7 @@ static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev)
goto err_nomem2;
}
- dev_dbg(mdev->sdev->parent,
+ dev_dbg(&mdev->pdev->dev,
"%d MSIx irqs setup\n", mdev->irq_info.num_vectors);
return 0;
err_nomem2:
@@ -281,7 +279,6 @@ static void mic_release_callbacks(struct mic_device *mdev)
spin_lock(&mdev->irq_info.mic_thread_lock);
spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags);
for (i = 0; i < MIC_NUM_OFFSETS; i++) {
-
if (list_empty(&mdev->irq_info.cb_list[i]))
break;
@@ -443,12 +440,11 @@ mic_request_threaded_irq(struct mic_device *mdev,
unsigned long cookie = 0;
u16 entry;
struct mic_intr_cb *intr_cb;
- struct pci_dev *pdev = container_of(mdev->sdev->parent,
- struct pci_dev, dev);
+ struct pci_dev *pdev = mdev->pdev;
offset = mic_map_src_to_offset(mdev, intr_src, type);
if (offset >= MIC_NUM_OFFSETS) {
- dev_err(mdev->sdev->parent,
+ dev_err(&mdev->pdev->dev,
"Error mapping index %d to a valid source id.\n",
intr_src);
rc = -EINVAL;
@@ -458,7 +454,7 @@ mic_request_threaded_irq(struct mic_device *mdev,
if (mdev->irq_info.num_vectors > 1) {
msix = mic_get_available_vector(mdev);
if (!msix) {
- dev_err(mdev->sdev->parent,
+ dev_err(&mdev->pdev->dev,
"No MSIx vectors available for use.\n");
rc = -ENOSPC;
goto err;
@@ -467,7 +463,7 @@ mic_request_threaded_irq(struct mic_device *mdev,
rc = request_threaded_irq(msix->vector, handler, thread_fn,
0, name, data);
if (rc) {
- dev_dbg(mdev->sdev->parent,
+ dev_dbg(&mdev->pdev->dev,
"request irq failed rc = %d\n", rc);
goto err;
}
@@ -476,13 +472,13 @@ mic_request_threaded_irq(struct mic_device *mdev,
mdev->intr_ops->program_msi_to_src_map(mdev,
entry, offset, true);
cookie = MK_COOKIE(entry, offset);
- dev_dbg(mdev->sdev->parent, "irq: %d assigned for src: %d\n",
+ dev_dbg(&mdev->pdev->dev, "irq: %d assigned for src: %d\n",
msix->vector, intr_src);
} else {
intr_cb = mic_register_intr_callback(mdev, offset, handler,
thread_fn, data);
if (IS_ERR(intr_cb)) {
- dev_err(mdev->sdev->parent,
+ dev_err(&mdev->pdev->dev,
"No available callback entries for use\n");
rc = PTR_ERR(intr_cb);
goto err;
@@ -495,7 +491,7 @@ mic_request_threaded_irq(struct mic_device *mdev,
entry, offset, true);
}
cookie = MK_COOKIE(entry, intr_cb->cb_id);
- dev_dbg(mdev->sdev->parent, "callback %d registered for src: %d\n",
+ dev_dbg(&mdev->pdev->dev, "callback %d registered for src: %d\n",
intr_cb->cb_id, intr_src);
}
return (struct mic_irq *)cookie;
@@ -515,20 +511,19 @@ err:
* returns: none.
*/
void mic_free_irq(struct mic_device *mdev,
- struct mic_irq *cookie, void *data)
+ struct mic_irq *cookie, void *data)
{
u32 offset;
u32 entry;
u8 src_id;
unsigned int irq;
- struct pci_dev *pdev = container_of(mdev->sdev->parent,
- struct pci_dev, dev);
+ struct pci_dev *pdev = mdev->pdev;
entry = GET_ENTRY((unsigned long)cookie);
offset = GET_OFFSET((unsigned long)cookie);
if (mdev->irq_info.num_vectors > 1) {
if (entry >= mdev->irq_info.num_vectors) {
- dev_warn(mdev->sdev->parent,
+ dev_warn(&mdev->pdev->dev,
"entry %d should be < num_irq %d\n",
entry, mdev->irq_info.num_vectors);
return;
@@ -539,12 +534,12 @@ void mic_free_irq(struct mic_device *mdev,
mdev->intr_ops->program_msi_to_src_map(mdev,
entry, offset, false);
- dev_dbg(mdev->sdev->parent, "irq: %d freed\n", irq);
+ dev_dbg(&mdev->pdev->dev, "irq: %d freed\n", irq);
} else {
irq = pdev->irq;
src_id = mic_unregister_intr_callback(mdev, offset);
if (src_id >= MIC_NUM_OFFSETS) {
- dev_warn(mdev->sdev->parent, "Error unregistering callback\n");
+ dev_warn(&mdev->pdev->dev, "Error unregistering callback\n");
return;
}
if (pci_dev_msi_enabled(pdev)) {
@@ -552,7 +547,7 @@ void mic_free_irq(struct mic_device *mdev,
mdev->intr_ops->program_msi_to_src_map(mdev,
entry, src_id, false);
}
- dev_dbg(mdev->sdev->parent, "callback %d unregistered for src: %d\n",
+ dev_dbg(&mdev->pdev->dev, "callback %d unregistered for src: %d\n",
offset, src_id);
}
}
@@ -579,7 +574,7 @@ int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev)
rc = mic_setup_intx(mdev, pdev);
if (rc) {
- dev_err(mdev->sdev->parent, "no usable interrupts\n");
+ dev_err(&mdev->pdev->dev, "no usable interrupts\n");
return rc;
}
done:
@@ -635,8 +630,7 @@ void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev)
void mic_intr_restore(struct mic_device *mdev)
{
int entry, offset;
- struct pci_dev *pdev = container_of(mdev->sdev->parent,
- struct pci_dev, dev);
+ struct pci_dev *pdev = mdev->pdev;
if (!pci_dev_msi_enabled(pdev))
return;
diff --git a/drivers/misc/mic/host/mic_main.c b/drivers/misc/mic/host/mic_main.c
index 456462932151..153894e7ed5b 100644
--- a/drivers/misc/mic/host/mic_main.c
+++ b/drivers/misc/mic/host/mic_main.c
@@ -16,17 +16,11 @@
* the file called "COPYING".
*
* Intel MIC Host driver.
- *
- * Global TODO's across the driver to be added after initial base
- * patches are accepted upstream:
- * 1) Enable DMA support.
- * 2) Enable per vring interrupt support.
*/
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/poll.h>
-#include <linux/suspend.h>
#include <linux/mic_common.h>
#include "../common/mic_dev.h"
@@ -63,12 +57,8 @@ MODULE_DEVICE_TABLE(pci, mic_pci_tbl);
/* ID allocator for MIC devices */
static struct ida g_mic_ida;
-/* Class of MIC devices for sysfs accessibility. */
-static struct class *g_mic_class;
/* Base device node number for MIC devices */
static dev_t g_mic_devno;
-/* Track the total number of MIC devices */
-atomic_t g_num_mics;
static const struct file_operations mic_fops = {
.open = mic_open,
@@ -83,17 +73,14 @@ static const struct file_operations mic_fops = {
static int mic_dp_init(struct mic_device *mdev)
{
mdev->dp = kzalloc(MIC_DP_SIZE, GFP_KERNEL);
- if (!mdev->dp) {
- dev_err(mdev->sdev->parent, "%s %d err %d\n",
- __func__, __LINE__, -ENOMEM);
+ if (!mdev->dp)
return -ENOMEM;
- }
mdev->dp_dma_addr = mic_map_single(mdev,
mdev->dp, MIC_DP_SIZE);
if (mic_map_error(mdev->dp_dma_addr)) {
kfree(mdev->dp);
- dev_err(mdev->sdev->parent, "%s %d err %d\n",
+ dev_err(&mdev->pdev->dev, "%s %d err %d\n",
__func__, __LINE__, -ENOMEM);
return -ENOMEM;
}
@@ -110,30 +97,6 @@ static void mic_dp_uninit(struct mic_device *mdev)
}
/**
- * mic_shutdown_db - Shutdown doorbell interrupt handler.
- */
-static irqreturn_t mic_shutdown_db(int irq, void *data)
-{
- struct mic_device *mdev = data;
- struct mic_bootparam *bootparam = mdev->dp;
-
- mdev->ops->intr_workarounds(mdev);
-
- switch (bootparam->shutdown_status) {
- case MIC_HALTED:
- case MIC_POWER_OFF:
- case MIC_RESTART:
- /* Fall through */
- case MIC_CRASHED:
- schedule_work(&mdev->shutdown_work);
- break;
- default:
- break;
- };
- return IRQ_HANDLED;
-}
-
-/**
* mic_ops_init: Initialize HW specific operation tables.
*
* @mdev: pointer to mic_device instance
@@ -190,43 +153,6 @@ static enum mic_hw_family mic_get_family(struct pci_dev *pdev)
}
/**
-* mic_pm_notifier: Notifier callback function that handles
-* PM notifications.
-*
-* @notifier_block: The notifier structure.
-* @pm_event: The event for which the driver was notified.
-* @unused: Meaningless. Always NULL.
-*
-* returns NOTIFY_DONE
-*/
-static int mic_pm_notifier(struct notifier_block *notifier,
- unsigned long pm_event, void *unused)
-{
- struct mic_device *mdev = container_of(notifier,
- struct mic_device, pm_notifier);
-
- switch (pm_event) {
- case PM_HIBERNATION_PREPARE:
- /* Fall through */
- case PM_SUSPEND_PREPARE:
- mic_prepare_suspend(mdev);
- break;
- case PM_POST_HIBERNATION:
- /* Fall through */
- case PM_POST_SUSPEND:
- /* Fall through */
- case PM_POST_RESTORE:
- mic_complete_resume(mdev);
- break;
- case PM_RESTORE_PREPARE:
- break;
- default:
- break;
- }
- return NOTIFY_DONE;
-}
-
-/**
* mic_device_init - Allocates and initializes the MIC device structure
*
* @mdev: pointer to mic_device instance
@@ -234,52 +160,16 @@ static int mic_pm_notifier(struct notifier_block *notifier,
*
* returns none.
*/
-static int
+static void
mic_device_init(struct mic_device *mdev, struct pci_dev *pdev)
{
- int rc;
-
+ mdev->pdev = pdev;
mdev->family = mic_get_family(pdev);
mdev->stepping = pdev->revision;
mic_ops_init(mdev);
- mic_sysfs_init(mdev);
mutex_init(&mdev->mic_mutex);
mdev->irq_info.next_avail_src = 0;
- INIT_WORK(&mdev->reset_trigger_work, mic_reset_trigger_work);
- INIT_WORK(&mdev->shutdown_work, mic_shutdown_work);
- init_completion(&mdev->reset_wait);
INIT_LIST_HEAD(&mdev->vdev_list);
- mdev->pm_notifier.notifier_call = mic_pm_notifier;
- rc = register_pm_notifier(&mdev->pm_notifier);
- if (rc) {
- dev_err(&pdev->dev, "register_pm_notifier failed rc %d\n",
- rc);
- goto register_pm_notifier_fail;
- }
- return 0;
-register_pm_notifier_fail:
- flush_work(&mdev->shutdown_work);
- flush_work(&mdev->reset_trigger_work);
- return rc;
-}
-
-/**
- * mic_device_uninit - Frees resources allocated during mic_device_init(..)
- *
- * @mdev: pointer to mic_device instance
- *
- * returns none
- */
-static void mic_device_uninit(struct mic_device *mdev)
-{
- /* The cmdline sysfs entry might have allocated cmdline */
- kfree(mdev->cmdline);
- kfree(mdev->firmware);
- kfree(mdev->ramdisk);
- kfree(mdev->bootmode);
- flush_work(&mdev->reset_trigger_work);
- flush_work(&mdev->shutdown_work);
- unregister_pm_notifier(&mdev->pm_notifier);
}
/**
@@ -291,7 +181,7 @@ static void mic_device_uninit(struct mic_device *mdev)
* returns 0 on success, < 0 on failure.
*/
static int mic_probe(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+ const struct pci_device_id *ent)
{
int rc;
struct mic_device *mdev;
@@ -309,16 +199,12 @@ static int mic_probe(struct pci_dev *pdev,
goto ida_fail;
}
- rc = mic_device_init(mdev, pdev);
- if (rc) {
- dev_err(&pdev->dev, "mic_device_init failed rc %d\n", rc);
- goto device_init_fail;
- }
+ mic_device_init(mdev, pdev);
rc = pci_enable_device(pdev);
if (rc) {
dev_err(&pdev->dev, "failed to enable pci device.\n");
- goto uninit_device;
+ goto ida_remove;
}
pci_set_master(pdev);
@@ -367,62 +253,39 @@ static int mic_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, mdev);
- mdev->sdev = device_create_with_groups(g_mic_class, &pdev->dev,
- MKDEV(MAJOR(g_mic_devno), mdev->id), NULL,
- mdev->attr_group, "mic%d", mdev->id);
- if (IS_ERR(mdev->sdev)) {
- rc = PTR_ERR(mdev->sdev);
- dev_err(&pdev->dev,
- "device_create_with_groups failed rc %d\n", rc);
- goto smpt_uninit;
- }
- mdev->state_sysfs = sysfs_get_dirent(mdev->sdev->kobj.sd, "state");
- if (!mdev->state_sysfs) {
- rc = -ENODEV;
- dev_err(&pdev->dev, "sysfs_get_dirent failed rc %d\n", rc);
- goto destroy_device;
- }
-
rc = mic_dp_init(mdev);
if (rc) {
dev_err(&pdev->dev, "mic_dp_init failed rc %d\n", rc);
- goto sysfs_put;
- }
- mutex_lock(&mdev->mic_mutex);
-
- mdev->shutdown_db = mic_next_db(mdev);
- mdev->shutdown_cookie = mic_request_threaded_irq(mdev, mic_shutdown_db,
- NULL, "shutdown-interrupt", mdev,
- mdev->shutdown_db, MIC_INTR_DB);
- if (IS_ERR(mdev->shutdown_cookie)) {
- rc = PTR_ERR(mdev->shutdown_cookie);
- mutex_unlock(&mdev->mic_mutex);
- goto dp_uninit;
+ goto smpt_uninit;
}
- mutex_unlock(&mdev->mic_mutex);
mic_bootparam_init(mdev);
mic_create_debug_dir(mdev);
- cdev_init(&mdev->cdev, &mic_fops);
- mdev->cdev.owner = THIS_MODULE;
- rc = cdev_add(&mdev->cdev, MKDEV(MAJOR(g_mic_devno), mdev->id), 1);
+
+ mdev->miscdev.minor = MISC_DYNAMIC_MINOR;
+ snprintf(mdev->name, sizeof(mdev->name), "mic%d", mdev->id);
+ mdev->miscdev.name = mdev->name;
+ mdev->miscdev.fops = &mic_fops;
+ mdev->miscdev.parent = &mdev->pdev->dev;
+ rc = misc_register(&mdev->miscdev);
if (rc) {
- dev_err(&pdev->dev, "cdev_add err id %d rc %d\n", mdev->id, rc);
+ dev_err(&pdev->dev, "misc_register err id %d rc %d\n",
+ mdev->id, rc);
goto cleanup_debug_dir;
}
- atomic_inc(&g_num_mics);
+
+ mdev->cosm_dev = cosm_register_device(&mdev->pdev->dev, &cosm_hw_ops);
+ if (IS_ERR(mdev->cosm_dev)) {
+ rc = PTR_ERR(mdev->cosm_dev);
+ dev_err(&pdev->dev, "cosm_add_device failed rc %d\n", rc);
+ goto misc_dereg;
+ }
return 0;
+misc_dereg:
+ misc_deregister(&mdev->miscdev);
cleanup_debug_dir:
mic_delete_debug_dir(mdev);
- mutex_lock(&mdev->mic_mutex);
- mic_free_irq(mdev, mdev->shutdown_cookie, mdev);
- mutex_unlock(&mdev->mic_mutex);
-dp_uninit:
mic_dp_uninit(mdev);
-sysfs_put:
- sysfs_put(mdev->state_sysfs);
-destroy_device:
- device_destroy(g_mic_class, MKDEV(MAJOR(g_mic_devno), mdev->id));
smpt_uninit:
mic_smpt_uninit(mdev);
free_interrupts:
@@ -435,9 +298,7 @@ release_regions:
pci_release_regions(pdev);
disable_device:
pci_disable_device(pdev);
-uninit_device:
- mic_device_uninit(mdev);
-device_init_fail:
+ida_remove:
ida_simple_remove(&g_mic_ida, mdev->id);
ida_fail:
kfree(mdev);
@@ -461,22 +322,14 @@ static void mic_remove(struct pci_dev *pdev)
if (!mdev)
return;
- mic_stop(mdev, false);
- atomic_dec(&g_num_mics);
- cdev_del(&mdev->cdev);
+ cosm_unregister_device(mdev->cosm_dev);
+ misc_deregister(&mdev->miscdev);
mic_delete_debug_dir(mdev);
- mutex_lock(&mdev->mic_mutex);
- mic_free_irq(mdev, mdev->shutdown_cookie, mdev);
- mutex_unlock(&mdev->mic_mutex);
- flush_work(&mdev->shutdown_work);
mic_dp_uninit(mdev);
- sysfs_put(mdev->state_sysfs);
- device_destroy(g_mic_class, MKDEV(MAJOR(g_mic_devno), mdev->id));
mic_smpt_uninit(mdev);
mic_free_interrupts(mdev, pdev);
- iounmap(mdev->mmio.va);
iounmap(mdev->aper.va);
- mic_device_uninit(mdev);
+ iounmap(mdev->mmio.va);
pci_release_regions(pdev);
pci_disable_device(pdev);
ida_simple_remove(&g_mic_ida, mdev->id);
@@ -495,32 +348,23 @@ static int __init mic_init(void)
int ret;
ret = alloc_chrdev_region(&g_mic_devno, 0,
- MIC_MAX_NUM_DEVS, mic_driver_name);
+ MIC_MAX_NUM_DEVS, mic_driver_name);
if (ret) {
pr_err("alloc_chrdev_region failed ret %d\n", ret);
goto error;
}
- g_mic_class = class_create(THIS_MODULE, mic_driver_name);
- if (IS_ERR(g_mic_class)) {
- ret = PTR_ERR(g_mic_class);
- pr_err("class_create failed ret %d\n", ret);
- goto cleanup_chrdev;
- }
-
mic_init_debugfs();
ida_init(&g_mic_ida);
ret = pci_register_driver(&mic_driver);
if (ret) {
pr_err("pci_register_driver failed ret %d\n", ret);
- goto cleanup_debugfs;
+ goto cleanup_chrdev;
}
return ret;
-cleanup_debugfs:
+cleanup_chrdev:
ida_destroy(&g_mic_ida);
mic_exit_debugfs();
- class_destroy(g_mic_class);
-cleanup_chrdev:
unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS);
error:
return ret;
@@ -531,7 +375,6 @@ static void __exit mic_exit(void)
pci_unregister_driver(&mic_driver);
ida_destroy(&g_mic_ida);
mic_exit_debugfs();
- class_destroy(g_mic_class);
unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS);
}
diff --git a/drivers/misc/mic/host/mic_smpt.c b/drivers/misc/mic/host/mic_smpt.c
index cec82034875f..c3f958580fb0 100644
--- a/drivers/misc/mic/host/mic_smpt.c
+++ b/drivers/misc/mic/host/mic_smpt.c
@@ -76,7 +76,7 @@ mic_is_system_addr(struct mic_device *mdev, dma_addr_t pa)
/* Populate an SMPT entry and update the reference counts. */
static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr,
- int entries, struct mic_device *mdev)
+ int entries, struct mic_device *mdev)
{
struct mic_smpt_info *smpt_info = mdev->smpt;
int i;
@@ -97,7 +97,7 @@ static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr,
* for a given DMA address and size.
*/
static dma_addr_t mic_smpt_op(struct mic_device *mdev, u64 dma_addr,
- int entries, s64 *ref, size_t size)
+ int entries, s64 *ref, size_t size)
{
int spt;
int ae = 0;
@@ -148,7 +148,7 @@ found:
* and the starting smpt address
*/
static int mic_get_smpt_ref_count(struct mic_device *mdev, dma_addr_t dma_addr,
- size_t size, s64 *ref, u64 *smpt_start)
+ size_t size, s64 *ref, u64 *smpt_start)
{
u64 start = dma_addr;
u64 end = dma_addr + size;
@@ -181,7 +181,7 @@ dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr)
dma_addr_t dma_addr;
if (!mic_is_system_addr(mdev, mic_addr)) {
- dev_err(mdev->sdev->parent,
+ dev_err(&mdev->pdev->dev,
"mic_addr is invalid. mic_addr = 0x%llx\n", mic_addr);
return -EINVAL;
}
@@ -218,7 +218,7 @@ dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size)
return mic_addr;
num_entries = mic_get_smpt_ref_count(mdev, dma_addr, size,
- ref, &smpt_start);
+ ref, &smpt_start);
/* Set the smpt table appropriately and get 16G aligned mic address */
mic_addr = mic_smpt_op(mdev, smpt_start, num_entries, ref, size);
@@ -231,7 +231,7 @@ dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size)
* else generate mic_addr by adding the 16G offset in dma_addr
*/
if (!mic_addr && MIC_FAMILY_X100 == mdev->family) {
- dev_err(mdev->sdev->parent,
+ dev_err(&mdev->pdev->dev,
"mic_map failed dma_addr 0x%llx size 0x%lx\n",
dma_addr, size);
return mic_addr;
@@ -264,7 +264,7 @@ void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
return;
if (!mic_is_system_addr(mdev, mic_addr)) {
- dev_err(mdev->sdev->parent,
+ dev_err(&mdev->pdev->dev,
"invalid address: 0x%llx\n", mic_addr);
return;
}
@@ -284,7 +284,7 @@ void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
for (i = spt; i < spt + num_smpt; i++) {
smpt_info->entry[i].ref_count -= ref[i - spt];
if (smpt_info->entry[i].ref_count < 0)
- dev_warn(mdev->sdev->parent,
+ dev_warn(&mdev->pdev->dev,
"ref count for entry %d is negative\n", i);
}
spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
@@ -307,15 +307,14 @@ void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size)
{
dma_addr_t mic_addr = 0;
- struct pci_dev *pdev = container_of(mdev->sdev->parent,
- struct pci_dev, dev);
+ struct pci_dev *pdev = mdev->pdev;
dma_addr_t dma_addr =
pci_map_single(pdev, va, size, PCI_DMA_BIDIRECTIONAL);
if (!pci_dma_mapping_error(pdev, dma_addr)) {
mic_addr = mic_map(mdev, dma_addr, size);
if (!mic_addr) {
- dev_err(mdev->sdev->parent,
+ dev_err(&mdev->pdev->dev,
"mic_map failed dma_addr 0x%llx size 0x%lx\n",
dma_addr, size);
pci_unmap_single(pdev, dma_addr,
@@ -339,8 +338,7 @@ dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size)
void
mic_unmap_single(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
{
- struct pci_dev *pdev = container_of(mdev->sdev->parent,
- struct pci_dev, dev);
+ struct pci_dev *pdev = mdev->pdev;
dma_addr_t dma_addr = mic_to_dma_addr(mdev, mic_addr);
mic_unmap(mdev, mic_addr, size);
pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
@@ -399,18 +397,18 @@ void mic_smpt_uninit(struct mic_device *mdev)
struct mic_smpt_info *smpt_info = mdev->smpt;
int i;
- dev_dbg(mdev->sdev->parent,
+ dev_dbg(&mdev->pdev->dev,
"nodeid %d SMPT ref count %lld map %lld unmap %lld\n",
mdev->id, smpt_info->ref_count,
smpt_info->map_count, smpt_info->unmap_count);
for (i = 0; i < smpt_info->info.num_reg; i++) {
- dev_dbg(mdev->sdev->parent,
+ dev_dbg(&mdev->pdev->dev,
"SMPT entry[%d] dma_addr = 0x%llx ref_count = %lld\n",
i, smpt_info->entry[i].dma_addr,
smpt_info->entry[i].ref_count);
if (smpt_info->entry[i].ref_count)
- dev_warn(mdev->sdev->parent,
+ dev_warn(&mdev->pdev->dev,
"ref count for entry %d is not zero\n", i);
}
kfree(smpt_info->entry);
diff --git a/drivers/misc/mic/host/mic_sysfs.c b/drivers/misc/mic/host/mic_sysfs.c
deleted file mode 100644
index 6dd864e4a617..000000000000
--- a/drivers/misc/mic/host/mic_sysfs.c
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Intel MIC Host driver.
- *
- */
-#include <linux/pci.h>
-
-#include <linux/mic_common.h>
-#include "../common/mic_dev.h"
-#include "mic_device.h"
-
-/*
- * A state-to-string lookup table, for exposing a human readable state
- * via sysfs. Always keep in sync with enum mic_states
- */
-static const char * const mic_state_string[] = {
- [MIC_OFFLINE] = "offline",
- [MIC_ONLINE] = "online",
- [MIC_SHUTTING_DOWN] = "shutting_down",
- [MIC_RESET_FAILED] = "reset_failed",
- [MIC_SUSPENDING] = "suspending",
- [MIC_SUSPENDED] = "suspended",
-};
-
-/*
- * A shutdown-status-to-string lookup table, for exposing a human
- * readable state via sysfs. Always keep in sync with enum mic_shutdown_status
- */
-static const char * const mic_shutdown_status_string[] = {
- [MIC_NOP] = "nop",
- [MIC_CRASHED] = "crashed",
- [MIC_HALTED] = "halted",
- [MIC_POWER_OFF] = "poweroff",
- [MIC_RESTART] = "restart",
-};
-
-void mic_set_shutdown_status(struct mic_device *mdev, u8 shutdown_status)
-{
- dev_dbg(mdev->sdev->parent, "Shutdown Status %s -> %s\n",
- mic_shutdown_status_string[mdev->shutdown_status],
- mic_shutdown_status_string[shutdown_status]);
- mdev->shutdown_status = shutdown_status;
-}
-
-void mic_set_state(struct mic_device *mdev, u8 state)
-{
- dev_dbg(mdev->sdev->parent, "State %s -> %s\n",
- mic_state_string[mdev->state],
- mic_state_string[state]);
- mdev->state = state;
- sysfs_notify_dirent(mdev->state_sysfs);
-}
-
-static ssize_t
-family_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
- static const char x100[] = "x100";
- static const char unknown[] = "Unknown";
- const char *card = NULL;
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
- if (!mdev)
- return -EINVAL;
-
- switch (mdev->family) {
- case MIC_FAMILY_X100:
- card = x100;
- break;
- default:
- card = unknown;
- break;
- }
- return scnprintf(buf, PAGE_SIZE, "%s\n", card);
-}
-static DEVICE_ATTR_RO(family);
-
-static ssize_t
-stepping_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
- char *string = "??";
-
- if (!mdev)
- return -EINVAL;
-
- switch (mdev->stepping) {
- case MIC_A0_STEP:
- string = "A0";
- break;
- case MIC_B0_STEP:
- string = "B0";
- break;
- case MIC_B1_STEP:
- string = "B1";
- break;
- case MIC_C0_STEP:
- string = "C0";
- break;
- default:
- break;
- }
- return scnprintf(buf, PAGE_SIZE, "%s\n", string);
-}
-static DEVICE_ATTR_RO(stepping);
-
-static ssize_t
-state_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
- if (!mdev || mdev->state >= MIC_LAST)
- return -EINVAL;
-
- return scnprintf(buf, PAGE_SIZE, "%s\n",
- mic_state_string[mdev->state]);
-}
-
-static ssize_t
-state_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- int rc = 0;
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
- if (!mdev)
- return -EINVAL;
- if (sysfs_streq(buf, "boot")) {
- rc = mic_start(mdev, buf);
- if (rc) {
- dev_err(mdev->sdev->parent,
- "mic_boot failed rc %d\n", rc);
- count = rc;
- }
- goto done;
- }
-
- if (sysfs_streq(buf, "reset")) {
- schedule_work(&mdev->reset_trigger_work);
- goto done;
- }
-
- if (sysfs_streq(buf, "shutdown")) {
- mic_shutdown(mdev);
- goto done;
- }
-
- if (sysfs_streq(buf, "suspend")) {
- mic_suspend(mdev);
- goto done;
- }
-
- count = -EINVAL;
-done:
- return count;
-}
-static DEVICE_ATTR_RW(state);
-
-static ssize_t shutdown_status_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
- if (!mdev || mdev->shutdown_status >= MIC_STATUS_LAST)
- return -EINVAL;
-
- return scnprintf(buf, PAGE_SIZE, "%s\n",
- mic_shutdown_status_string[mdev->shutdown_status]);
-}
-static DEVICE_ATTR_RO(shutdown_status);
-
-static ssize_t
-cmdline_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
- char *cmdline;
-
- if (!mdev)
- return -EINVAL;
-
- cmdline = mdev->cmdline;
-
- if (cmdline)
- return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline);
- return 0;
-}
-
-static ssize_t
-cmdline_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
- if (!mdev)
- return -EINVAL;
-
- mutex_lock(&mdev->mic_mutex);
- kfree(mdev->cmdline);
-
- mdev->cmdline = kmalloc(count + 1, GFP_KERNEL);
- if (!mdev->cmdline) {
- count = -ENOMEM;
- goto unlock;
- }
-
- strncpy(mdev->cmdline, buf, count);
-
- if (mdev->cmdline[count - 1] == '\n')
- mdev->cmdline[count - 1] = '\0';
- else
- mdev->cmdline[count] = '\0';
-unlock:
- mutex_unlock(&mdev->mic_mutex);
- return count;
-}
-static DEVICE_ATTR_RW(cmdline);
-
-static ssize_t
-firmware_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
- char *firmware;
-
- if (!mdev)
- return -EINVAL;
-
- firmware = mdev->firmware;
-
- if (firmware)
- return scnprintf(buf, PAGE_SIZE, "%s\n", firmware);
- return 0;
-}
-
-static ssize_t
-firmware_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
- if (!mdev)
- return -EINVAL;
-
- mutex_lock(&mdev->mic_mutex);
- kfree(mdev->firmware);
-
- mdev->firmware = kmalloc(count + 1, GFP_KERNEL);
- if (!mdev->firmware) {
- count = -ENOMEM;
- goto unlock;
- }
- strncpy(mdev->firmware, buf, count);
-
- if (mdev->firmware[count - 1] == '\n')
- mdev->firmware[count - 1] = '\0';
- else
- mdev->firmware[count] = '\0';
-unlock:
- mutex_unlock(&mdev->mic_mutex);
- return count;
-}
-static DEVICE_ATTR_RW(firmware);
-
-static ssize_t
-ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
- char *ramdisk;
-
- if (!mdev)
- return -EINVAL;
-
- ramdisk = mdev->ramdisk;
-
- if (ramdisk)
- return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk);
- return 0;
-}
-
-static ssize_t
-ramdisk_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
- if (!mdev)
- return -EINVAL;
-
- mutex_lock(&mdev->mic_mutex);
- kfree(mdev->ramdisk);
-
- mdev->ramdisk = kmalloc(count + 1, GFP_KERNEL);
- if (!mdev->ramdisk) {
- count = -ENOMEM;
- goto unlock;
- }
-
- strncpy(mdev->ramdisk, buf, count);
-
- if (mdev->ramdisk[count - 1] == '\n')
- mdev->ramdisk[count - 1] = '\0';
- else
- mdev->ramdisk[count] = '\0';
-unlock:
- mutex_unlock(&mdev->mic_mutex);
- return count;
-}
-static DEVICE_ATTR_RW(ramdisk);
-
-static ssize_t
-bootmode_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
- char *bootmode;
-
- if (!mdev)
- return -EINVAL;
-
- bootmode = mdev->bootmode;
-
- if (bootmode)
- return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode);
- return 0;
-}
-
-static ssize_t
-bootmode_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
- if (!mdev)
- return -EINVAL;
-
- if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "elf"))
- return -EINVAL;
-
- mutex_lock(&mdev->mic_mutex);
- kfree(mdev->bootmode);
-
- mdev->bootmode = kmalloc(count + 1, GFP_KERNEL);
- if (!mdev->bootmode) {
- count = -ENOMEM;
- goto unlock;
- }
-
- strncpy(mdev->bootmode, buf, count);
-
- if (mdev->bootmode[count - 1] == '\n')
- mdev->bootmode[count - 1] = '\0';
- else
- mdev->bootmode[count] = '\0';
-unlock:
- mutex_unlock(&mdev->mic_mutex);
- return count;
-}
-static DEVICE_ATTR_RW(bootmode);
-
-static ssize_t
-log_buf_addr_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
- if (!mdev)
- return -EINVAL;
-
- return scnprintf(buf, PAGE_SIZE, "%p\n", mdev->log_buf_addr);
-}
-
-static ssize_t
-log_buf_addr_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
- int ret;
- unsigned long addr;
-
- if (!mdev)
- return -EINVAL;
-
- ret = kstrtoul(buf, 16, &addr);
- if (ret)
- goto exit;
-
- mdev->log_buf_addr = (void *)addr;
- ret = count;
-exit:
- return ret;
-}
-static DEVICE_ATTR_RW(log_buf_addr);
-
-static ssize_t
-log_buf_len_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
- if (!mdev)
- return -EINVAL;
-
- return scnprintf(buf, PAGE_SIZE, "%p\n", mdev->log_buf_len);
-}
-
-static ssize_t
-log_buf_len_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct mic_device *mdev = dev_get_drvdata(dev->parent);
- int ret;
- unsigned long addr;
-
- if (!mdev)
- return -EINVAL;
-
- ret = kstrtoul(buf, 16, &addr);
- if (ret)
- goto exit;
-
- mdev->log_buf_len = (int *)addr;
- ret = count;
-exit:
- return ret;
-}
-static DEVICE_ATTR_RW(log_buf_len);
-
-static struct attribute *mic_default_attrs[] = {
- &dev_attr_family.attr,
- &dev_attr_stepping.attr,
- &dev_attr_state.attr,
- &dev_attr_shutdown_status.attr,
- &dev_attr_cmdline.attr,
- &dev_attr_firmware.attr,
- &dev_attr_ramdisk.attr,
- &dev_attr_bootmode.attr,
- &dev_attr_log_buf_addr.attr,
- &dev_attr_log_buf_len.attr,
-
- NULL
-};
-
-ATTRIBUTE_GROUPS(mic_default);
-
-void mic_sysfs_init(struct mic_device *mdev)
-{
- mdev->attr_group = mic_default_groups;
-}
diff --git a/drivers/misc/mic/host/mic_virtio.c b/drivers/misc/mic/host/mic_virtio.c
index cc08e9f733c9..58b107a24a8b 100644
--- a/drivers/misc/mic/host/mic_virtio.c
+++ b/drivers/misc/mic/host/mic_virtio.c
@@ -23,7 +23,6 @@
#include <linux/uaccess.h>
#include <linux/dmaengine.h>
#include <linux/mic_common.h>
-
#include "../common/mic_dev.h"
#include "mic_device.h"
#include "mic_smpt.h"
@@ -62,7 +61,7 @@ static int mic_sync_dma(struct mic_device *mdev, dma_addr_t dst,
}
error:
if (err)
- dev_err(mdev->sdev->parent, "%s %d err %d\n",
+ dev_err(&mdev->pdev->dev, "%s %d err %d\n",
__func__, __LINE__, err);
return err;
}
@@ -440,7 +439,7 @@ void mic_virtio_reset_devices(struct mic_device *mdev)
struct list_head *pos, *tmp;
struct mic_vdev *mvdev;
- dev_dbg(mdev->sdev->parent, "%s\n", __func__);
+ dev_dbg(&mdev->pdev->dev, "%s\n", __func__);
list_for_each_safe(pos, tmp, &mdev->vdev_list) {
mvdev = list_entry(pos, struct mic_vdev, list);
@@ -686,7 +685,7 @@ int mic_virtio_add_device(struct mic_vdev *mvdev,
mvr->head = USHRT_MAX;
mvr->mvdev = mvdev;
mvr->vrh.notify = mic_notify;
- dev_dbg(mdev->sdev->parent,
+ dev_dbg(&mdev->pdev->dev,
"%s %d index %d va %p info %p vr_size 0x%x\n",
__func__, __LINE__, i, vr->va, vr->info, vr_size);
mvr->buf = (void *)__get_free_pages(GFP_KERNEL,
@@ -704,7 +703,7 @@ int mic_virtio_add_device(struct mic_vdev *mvdev,
mvdev->virtio_db, MIC_INTR_DB);
if (IS_ERR(mvdev->virtio_cookie)) {
ret = PTR_ERR(mvdev->virtio_cookie);
- dev_dbg(mdev->sdev->parent, "request irq failed\n");
+ dev_dbg(&mdev->pdev->dev, "request irq failed\n");
goto err;
}
@@ -720,7 +719,7 @@ int mic_virtio_add_device(struct mic_vdev *mvdev,
smp_wmb();
dd->type = type;
- dev_dbg(mdev->sdev->parent, "Added virtio device id %d\n", dd->type);
+ dev_dbg(&mdev->pdev->dev, "Added virtio device id %d\n", dd->type);
db = bootparam->h2c_config_db;
if (db != -1)
@@ -755,7 +754,7 @@ void mic_virtio_del_device(struct mic_vdev *mvdev)
db = bootparam->h2c_config_db;
if (db == -1)
goto skip_hot_remove;
- dev_dbg(mdev->sdev->parent,
+ dev_dbg(&mdev->pdev->dev,
"Requesting hot remove id %d\n", mvdev->virtio_id);
mvdev->dc->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
mdev->ops->send_intr(mdev, db);
@@ -765,7 +764,7 @@ void mic_virtio_del_device(struct mic_vdev *mvdev)
if (ret)
break;
}
- dev_dbg(mdev->sdev->parent,
+ dev_dbg(&mdev->pdev->dev,
"Device id %d config_change %d guest_ack %d retry %d\n",
mvdev->virtio_id, mvdev->dc->config_change,
mvdev->dc->guest_ack, retry);
@@ -794,7 +793,7 @@ skip_hot_remove:
tmp_mvdev = list_entry(pos, struct mic_vdev, list);
if (tmp_mvdev == mvdev) {
list_del(pos);
- dev_dbg(mdev->sdev->parent,
+ dev_dbg(&mdev->pdev->dev,
"Removing virtio device id %d\n",
mvdev->virtio_id);
break;
diff --git a/drivers/misc/mic/host/mic_virtio.h b/drivers/misc/mic/host/mic_virtio.h
index d574efb853d9..a80631f2790d 100644
--- a/drivers/misc/mic/host/mic_virtio.h
+++ b/drivers/misc/mic/host/mic_virtio.h
@@ -124,7 +124,7 @@ void mic_bh_handler(struct work_struct *work);
/* Helper API to obtain the MIC PCIe device */
static inline struct device *mic_dev(struct mic_vdev *mvdev)
{
- return mvdev->mdev->sdev->parent;
+ return &mvdev->mdev->pdev->dev;
}
/* Helper API to check if a virtio device is initialized */
diff --git a/drivers/misc/mic/host/mic_x100.c b/drivers/misc/mic/host/mic_x100.c
index 3341e90dede4..8118ac48c764 100644
--- a/drivers/misc/mic/host/mic_x100.c
+++ b/drivers/misc/mic/host/mic_x100.c
@@ -43,7 +43,7 @@
static void
mic_x100_write_spad(struct mic_device *mdev, unsigned int idx, u32 val)
{
- dev_dbg(mdev->sdev->parent, "Writing 0x%x to scratch pad index %d\n",
+ dev_dbg(&mdev->pdev->dev, "Writing 0x%x to scratch pad index %d\n",
val, idx);
mic_mmio_write(&mdev->mmio, val,
MIC_X100_SBOX_BASE_ADDRESS +
@@ -66,7 +66,7 @@ mic_x100_read_spad(struct mic_device *mdev, unsigned int idx)
MIC_X100_SBOX_BASE_ADDRESS +
MIC_X100_SBOX_SPAD0 + idx * 4);
- dev_dbg(mdev->sdev->parent,
+ dev_dbg(&mdev->pdev->dev,
"Reading 0x%x from scratch pad index %d\n", val, idx);
return val;
}
@@ -126,7 +126,7 @@ static void mic_x100_disable_interrupts(struct mic_device *mdev)
* @mdev: pointer to mic_device instance
*/
static void mic_x100_send_sbox_intr(struct mic_device *mdev,
- int doorbell)
+ int doorbell)
{
struct mic_mw *mw = &mdev->mmio;
u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8;
@@ -147,7 +147,7 @@ static void mic_x100_send_sbox_intr(struct mic_device *mdev,
* @mdev: pointer to mic_device instance
*/
static void mic_x100_send_rdmasr_intr(struct mic_device *mdev,
- int doorbell)
+ int doorbell)
{
int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2);
/* Ensure that the interrupt is ordered w.r.t. previous stores. */
@@ -359,15 +359,14 @@ mic_x100_load_command_line(struct mic_device *mdev, const struct firmware *fw)
boot_mem = mdev->aper.len >> 20;
buf = kzalloc(CMDLINE_SIZE, GFP_KERNEL);
- if (!buf) {
- dev_err(mdev->sdev->parent,
- "%s %d allocation failed\n", __func__, __LINE__);
+ if (!buf)
return -ENOMEM;
- }
+
len += snprintf(buf, CMDLINE_SIZE - len,
" mem=%dM", boot_mem);
- if (mdev->cmdline)
- snprintf(buf + len, CMDLINE_SIZE - len, " %s", mdev->cmdline);
+ if (mdev->cosm_dev->cmdline)
+ snprintf(buf + len, CMDLINE_SIZE - len, " %s",
+ mdev->cosm_dev->cmdline);
memcpy_toio(cmd_line_va, buf, strlen(buf) + 1);
kfree(buf);
return 0;
@@ -386,12 +385,11 @@ mic_x100_load_ramdisk(struct mic_device *mdev)
int rc;
struct boot_params __iomem *bp = mdev->aper.va + mdev->bootaddr;
- rc = request_firmware(&fw,
- mdev->ramdisk, mdev->sdev->parent);
+ rc = request_firmware(&fw, mdev->cosm_dev->ramdisk, &mdev->pdev->dev);
if (rc < 0) {
- dev_err(mdev->sdev->parent,
+ dev_err(&mdev->pdev->dev,
"ramdisk request_firmware failed: %d %s\n",
- rc, mdev->ramdisk);
+ rc, mdev->cosm_dev->ramdisk);
goto error;
}
/*
@@ -423,10 +421,10 @@ mic_x100_get_boot_addr(struct mic_device *mdev)
scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO);
boot_addr = MIC_X100_SPAD2_DOWNLOAD_ADDR(scratch2);
- dev_dbg(mdev->sdev->parent, "%s %d boot_addr 0x%x\n",
+ dev_dbg(&mdev->pdev->dev, "%s %d boot_addr 0x%x\n",
__func__, __LINE__, boot_addr);
if (boot_addr > (1 << 31)) {
- dev_err(mdev->sdev->parent,
+ dev_err(&mdev->pdev->dev,
"incorrect bootaddr 0x%x\n",
boot_addr);
rc = -EINVAL;
@@ -454,37 +452,37 @@ mic_x100_load_firmware(struct mic_device *mdev, const char *buf)
if (rc)
goto error;
/* load OS */
- rc = request_firmware(&fw, mdev->firmware, mdev->sdev->parent);
+ rc = request_firmware(&fw, mdev->cosm_dev->firmware, &mdev->pdev->dev);
if (rc < 0) {
- dev_err(mdev->sdev->parent,
+ dev_err(&mdev->pdev->dev,
"ramdisk request_firmware failed: %d %s\n",
- rc, mdev->firmware);
+ rc, mdev->cosm_dev->firmware);
goto error;
}
if (mdev->bootaddr > mdev->aper.len - fw->size) {
rc = -EINVAL;
- dev_err(mdev->sdev->parent, "%s %d rc %d bootaddr 0x%x\n",
+ dev_err(&mdev->pdev->dev, "%s %d rc %d bootaddr 0x%x\n",
__func__, __LINE__, rc, mdev->bootaddr);
release_firmware(fw);
goto error;
}
memcpy_toio(mdev->aper.va + mdev->bootaddr, fw->data, fw->size);
mdev->ops->write_spad(mdev, MIC_X100_FW_SIZE, fw->size);
- if (!strcmp(mdev->bootmode, "elf"))
+ if (!strcmp(mdev->cosm_dev->bootmode, "flash"))
goto done;
/* load command line */
rc = mic_x100_load_command_line(mdev, fw);
if (rc) {
- dev_err(mdev->sdev->parent, "%s %d rc %d\n",
+ dev_err(&mdev->pdev->dev, "%s %d rc %d\n",
__func__, __LINE__, rc);
goto error;
}
release_firmware(fw);
/* load ramdisk */
- if (mdev->ramdisk)
+ if (mdev->cosm_dev->ramdisk)
rc = mic_x100_load_ramdisk(mdev);
error:
- dev_dbg(mdev->sdev->parent, "%s %d rc %d\n", __func__, __LINE__, rc);
+ dev_dbg(&mdev->pdev->dev, "%s %d rc %d\n", __func__, __LINE__, rc);
done:
return rc;
}
diff --git a/drivers/misc/mic/scif/Makefile b/drivers/misc/mic/scif/Makefile
index bf10bb7e2b91..29cfc3e51ac9 100644
--- a/drivers/misc/mic/scif/Makefile
+++ b/drivers/misc/mic/scif/Makefile
@@ -13,3 +13,8 @@ scif-objs += scif_epd.o
scif-objs += scif_rb.o
scif-objs += scif_nodeqp.o
scif-objs += scif_nm.o
+scif-objs += scif_dma.o
+scif-objs += scif_fence.o
+scif-objs += scif_mmap.o
+scif-objs += scif_rma.o
+scif-objs += scif_rma_list.o
diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c
index f39d3135a9ef..ddc9e4b08b5c 100644
--- a/drivers/misc/mic/scif/scif_api.c
+++ b/drivers/misc/mic/scif/scif_api.c
@@ -37,9 +37,21 @@ enum conn_async_state {
ASYNC_CONN_FLUSH_WORK /* async work flush in progress */
};
+/*
+ * File operations for anonymous inode file associated with a SCIF endpoint,
+ * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
+ * poll API in the kernel and these take in a struct file *. Since a struct
+ * file is not available to kernel mode SCIF, it uses an anonymous file for
+ * this purpose.
+ */
+const struct file_operations scif_anon_fops = {
+ .owner = THIS_MODULE,
+};
+
scif_epd_t scif_open(void)
{
struct scif_endpt *ep;
+ int err;
might_sleep();
ep = kzalloc(sizeof(*ep), GFP_KERNEL);
@@ -50,15 +62,22 @@ scif_epd_t scif_open(void)
if (!ep->qp_info.qp)
goto err_qp_alloc;
+ err = scif_anon_inode_getfile(ep);
+ if (err)
+ goto err_anon_inode;
+
spin_lock_init(&ep->lock);
mutex_init(&ep->sendlock);
mutex_init(&ep->recvlock);
+ scif_rma_ep_init(ep);
ep->state = SCIFEP_UNBOUND;
dev_dbg(scif_info.mdev.this_device,
"SCIFAPI open: ep %p success\n", ep);
return ep;
+err_anon_inode:
+ kfree(ep->qp_info.qp);
err_qp_alloc:
kfree(ep);
err_ep_alloc:
@@ -166,8 +185,11 @@ int scif_close(scif_epd_t epd)
switch (oldstate) {
case SCIFEP_ZOMBIE:
+ dev_err(scif_info.mdev.this_device,
+ "SCIFAPI close: zombie state unexpected\n");
case SCIFEP_DISCONNECTED:
spin_unlock(&ep->lock);
+ scif_unregister_all_windows(epd);
/* Remove from the disconnected list */
mutex_lock(&scif_info.connlock);
list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
@@ -189,6 +211,7 @@ int scif_close(scif_epd_t epd)
case SCIFEP_CLOSING:
{
spin_unlock(&ep->lock);
+ scif_unregister_all_windows(epd);
scif_disconnect_ep(ep);
break;
}
@@ -200,7 +223,7 @@ int scif_close(scif_epd_t epd)
struct scif_endpt *aep;
spin_unlock(&ep->lock);
- spin_lock(&scif_info.eplock);
+ mutex_lock(&scif_info.eplock);
/* remove from listen list */
list_for_each_safe(pos, tmpq, &scif_info.listen) {
@@ -222,7 +245,7 @@ int scif_close(scif_epd_t epd)
break;
}
}
- spin_unlock(&scif_info.eplock);
+ mutex_unlock(&scif_info.eplock);
mutex_lock(&scif_info.connlock);
list_for_each_safe(pos, tmpq, &scif_info.connected) {
tmpep = list_entry(pos,
@@ -242,13 +265,13 @@ int scif_close(scif_epd_t epd)
}
mutex_unlock(&scif_info.connlock);
scif_teardown_ep(aep);
- spin_lock(&scif_info.eplock);
+ mutex_lock(&scif_info.eplock);
scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
ep->acceptcnt--;
}
spin_lock(&ep->lock);
- spin_unlock(&scif_info.eplock);
+ mutex_unlock(&scif_info.eplock);
/* Remove and reject any pending connection requests. */
while (ep->conreqcnt) {
@@ -279,6 +302,7 @@ int scif_close(scif_epd_t epd)
}
}
scif_put_port(ep->port.port);
+ scif_anon_inode_fput(ep);
scif_teardown_ep(ep);
scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
return 0;
@@ -409,9 +433,9 @@ int scif_listen(scif_epd_t epd, int backlog)
scif_teardown_ep(ep);
ep->qp_info.qp = NULL;
- spin_lock(&scif_info.eplock);
+ mutex_lock(&scif_info.eplock);
list_add_tail(&ep->list, &scif_info.listen);
- spin_unlock(&scif_info.eplock);
+ mutex_unlock(&scif_info.eplock);
return 0;
}
EXPORT_SYMBOL_GPL(scif_listen);
@@ -450,6 +474,13 @@ static int scif_conn_func(struct scif_endpt *ep)
struct scifmsg msg;
struct device *spdev;
+ err = scif_reserve_dma_chan(ep);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ ep->state = SCIFEP_BOUND;
+ goto connect_error_simple;
+ }
/* Initiate the first part of the endpoint QP setup */
err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
SCIF_ENDPT_QP_SIZE, ep->remote_dev);
@@ -558,8 +589,10 @@ void scif_conn_handler(struct work_struct *work)
list_del(&ep->conn_list);
}
spin_unlock(&scif_info.nb_connect_lock);
- if (ep)
+ if (ep) {
ep->conn_err = scif_conn_func(ep);
+ wake_up_interruptible(&ep->conn_pend_wq);
+ }
} while (ep);
}
@@ -660,6 +693,7 @@ int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
ep->remote_dev = &scif_dev[dst->node];
ep->qp_info.qp->magic = SCIFEP_MAGIC;
if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+ init_waitqueue_head(&ep->conn_pend_wq);
spin_lock(&scif_info.nb_connect_lock);
list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
spin_unlock(&scif_info.nb_connect_lock);
@@ -782,12 +816,25 @@ retry_connection:
cep->remote_dev = &scif_dev[peer->node];
cep->remote_ep = conreq->msg.payload[0];
+ scif_rma_ep_init(cep);
+
+ err = scif_reserve_dma_chan(cep);
+ if (err) {
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto scif_accept_error_qpalloc;
+ }
+
cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
if (!cep->qp_info.qp) {
err = -ENOMEM;
goto scif_accept_error_qpalloc;
}
+ err = scif_anon_inode_getfile(cep);
+ if (err)
+ goto scif_accept_error_anon_inode;
+
cep->qp_info.qp->magic = SCIFEP_MAGIC;
spdev = scif_get_peer_dev(cep->remote_dev);
if (IS_ERR(spdev)) {
@@ -858,6 +905,8 @@ retry:
spin_unlock(&cep->lock);
return 0;
scif_accept_error_map:
+ scif_anon_inode_fput(cep);
+scif_accept_error_anon_inode:
scif_teardown_ep(cep);
scif_accept_error_qpalloc:
kfree(cep);
@@ -1247,6 +1296,134 @@ int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
}
EXPORT_SYMBOL_GPL(scif_recv);
+static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
+ poll_table *p, struct scif_endpt *ep)
+{
+ /*
+ * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
+ * and regrab it afterwards. Because the endpoint state might have
+ * changed while the lock was given up, the state must be checked
+ * again after re-acquiring the lock. The code in __scif_pollfd(..)
+ * does this.
+ */
+ spin_unlock(&ep->lock);
+ poll_wait(f, wq, p);
+ spin_lock(&ep->lock);
+}
+
+unsigned int
+__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
+{
+ unsigned int mask = 0;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
+
+ spin_lock(&ep->lock);
+
+ /* Endpoint is waiting for a non-blocking connect to complete */
+ if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+ _scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
+ if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+ if (ep->state == SCIFEP_CONNECTED ||
+ ep->state == SCIFEP_DISCONNECTED ||
+ ep->conn_err)
+ mask |= POLLOUT;
+ goto exit;
+ }
+ }
+
+ /* Endpoint is listening for incoming connection requests */
+ if (ep->state == SCIFEP_LISTENING) {
+ _scif_poll_wait(f, &ep->conwq, wait, ep);
+ if (ep->state == SCIFEP_LISTENING) {
+ if (ep->conreqcnt)
+ mask |= POLLIN;
+ goto exit;
+ }
+ }
+
+ /* Endpoint is connected or disconnected */
+ if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
+ if (poll_requested_events(wait) & POLLIN)
+ _scif_poll_wait(f, &ep->recvwq, wait, ep);
+ if (poll_requested_events(wait) & POLLOUT)
+ _scif_poll_wait(f, &ep->sendwq, wait, ep);
+ if (ep->state == SCIFEP_CONNECTED ||
+ ep->state == SCIFEP_DISCONNECTED) {
+ /* Data can be read without blocking */
+ if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
+ mask |= POLLIN;
+ /* Data can be written without blocking */
+ if (scif_rb_space(&ep->qp_info.qp->outbound_q))
+ mask |= POLLOUT;
+ /* Return POLLHUP if endpoint is disconnected */
+ if (ep->state == SCIFEP_DISCONNECTED)
+ mask |= POLLHUP;
+ goto exit;
+ }
+ }
+
+ /* Return POLLERR if the endpoint is in none of the above states */
+ mask |= POLLERR;
+exit:
+ spin_unlock(&ep->lock);
+ return mask;
+}
+
+/**
+ * scif_poll() - Kernel mode SCIF poll
+ * @ufds: Array of scif_pollepd structures containing the end points
+ * and events to poll on
+ * @nfds: Size of the ufds array
+ * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
+ *
+ * The code flow in this function is based on do_poll(..) in select.c
+ *
+ * Returns the number of endpoints which have pending events or 0 in
+ * the event of a timeout. If a signal is used for wake up, -EINTR is
+ * returned.
+ */
+int
+scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
+{
+ struct poll_wqueues table;
+ poll_table *pt;
+ int i, mask, count = 0, timed_out = timeout_msecs == 0;
+ u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
+ : msecs_to_jiffies(timeout_msecs);
+
+ poll_initwait(&table);
+ pt = &table.pt;
+ while (1) {
+ for (i = 0; i < nfds; i++) {
+ pt->_key = ufds[i].events | POLLERR | POLLHUP;
+ mask = __scif_pollfd(ufds[i].epd->anon,
+ pt, ufds[i].epd);
+ mask &= ufds[i].events | POLLERR | POLLHUP;
+ if (mask) {
+ count++;
+ pt->_qproc = NULL;
+ }
+ ufds[i].revents = mask;
+ }
+ pt->_qproc = NULL;
+ if (!count) {
+ count = table.error;
+ if (signal_pending(current))
+ count = -EINTR;
+ }
+ if (count || timed_out)
+ break;
+
+ if (!schedule_timeout_interruptible(timeout))
+ timed_out = 1;
+ }
+ poll_freewait(&table);
+ return count;
+}
+EXPORT_SYMBOL_GPL(scif_poll);
+
int scif_get_node_ids(u16 *nodes, int len, u16 *self)
{
int online = 0;
@@ -1274,3 +1451,46 @@ int scif_get_node_ids(u16 *nodes, int len, u16 *self)
return online;
}
EXPORT_SYMBOL_GPL(scif_get_node_ids);
+
+static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
+{
+ struct scif_client *client =
+ container_of(si, struct scif_client, si);
+ struct scif_peer_dev *spdev =
+ container_of(dev, struct scif_peer_dev, dev);
+
+ if (client->probe)
+ client->probe(spdev);
+ return 0;
+}
+
+static void scif_remove_client_dev(struct device *dev,
+ struct subsys_interface *si)
+{
+ struct scif_client *client =
+ container_of(si, struct scif_client, si);
+ struct scif_peer_dev *spdev =
+ container_of(dev, struct scif_peer_dev, dev);
+
+ if (client->remove)
+ client->remove(spdev);
+}
+
+void scif_client_unregister(struct scif_client *client)
+{
+ subsys_interface_unregister(&client->si);
+}
+EXPORT_SYMBOL_GPL(scif_client_unregister);
+
+int scif_client_register(struct scif_client *client)
+{
+ struct subsys_interface *si = &client->si;
+
+ si->name = client->name;
+ si->subsys = &scif_peer_bus;
+ si->add_dev = scif_add_client_dev;
+ si->remove_dev = scif_remove_client_dev;
+
+ return subsys_interface_register(&client->si);
+}
+EXPORT_SYMBOL_GPL(scif_client_register);
diff --git a/drivers/misc/mic/scif/scif_debugfs.c b/drivers/misc/mic/scif/scif_debugfs.c
index 51f14e2a1196..6884dad97e17 100644
--- a/drivers/misc/mic/scif/scif_debugfs.c
+++ b/drivers/misc/mic/scif/scif_debugfs.c
@@ -62,10 +62,87 @@ static const struct file_operations scif_dev_ops = {
.release = scif_dev_test_release
};
-void __init scif_init_debugfs(void)
+static void scif_display_window(struct scif_window *window, struct seq_file *s)
+{
+ int j;
+ struct scatterlist *sg;
+ scif_pinned_pages_t pin = window->pinned_pages;
+
+ seq_printf(s, "window %p type %d temp %d offset 0x%llx ",
+ window, window->type, window->temp, window->offset);
+ seq_printf(s, "nr_pages 0x%llx nr_contig_chunks 0x%x prot %d ",
+ window->nr_pages, window->nr_contig_chunks, window->prot);
+ seq_printf(s, "ref_count %d magic 0x%llx peer_window 0x%llx ",
+ window->ref_count, window->magic, window->peer_window);
+ seq_printf(s, "unreg_state 0x%x va_for_temp 0x%lx\n",
+ window->unreg_state, window->va_for_temp);
+
+ for (j = 0; j < window->nr_contig_chunks; j++)
+ seq_printf(s, "page[%d] dma_addr 0x%llx num_pages 0x%llx\n", j,
+ window->dma_addr[j], window->num_pages[j]);
+
+ if (window->type == SCIF_WINDOW_SELF && pin)
+ for (j = 0; j < window->nr_pages; j++)
+ seq_printf(s, "page[%d] = pinned_pages %p address %p\n",
+ j, pin->pages[j],
+ page_address(pin->pages[j]));
+
+ if (window->st)
+ for_each_sg(window->st->sgl, sg, window->st->nents, j)
+ seq_printf(s, "sg[%d] dma addr 0x%llx length 0x%x\n",
+ j, sg_dma_address(sg), sg_dma_len(sg));
+}
+
+static void scif_display_all_windows(struct list_head *head, struct seq_file *s)
{
- struct dentry *d;
+ struct list_head *item;
+ struct scif_window *window;
+ list_for_each(item, head) {
+ window = list_entry(item, struct scif_window, list);
+ scif_display_window(window, s);
+ }
+}
+
+static int scif_rma_test(struct seq_file *s, void *unused)
+{
+ struct scif_endpt *ep;
+ struct list_head *pos;
+
+ mutex_lock(&scif_info.connlock);
+ list_for_each(pos, &scif_info.connected) {
+ ep = list_entry(pos, struct scif_endpt, list);
+ seq_printf(s, "ep %p self windows\n", ep);
+ mutex_lock(&ep->rma_info.rma_lock);
+ scif_display_all_windows(&ep->rma_info.reg_list, s);
+ seq_printf(s, "ep %p remote windows\n", ep);
+ scif_display_all_windows(&ep->rma_info.remote_reg_list, s);
+ mutex_unlock(&ep->rma_info.rma_lock);
+ }
+ mutex_unlock(&scif_info.connlock);
+ return 0;
+}
+
+static int scif_rma_test_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, scif_rma_test, inode->i_private);
+}
+
+static int scif_rma_test_release(struct inode *inode, struct file *file)
+{
+ return single_release(inode, file);
+}
+
+static const struct file_operations scif_rma_ops = {
+ .owner = THIS_MODULE,
+ .open = scif_rma_test_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = scif_rma_test_release
+};
+
+void __init scif_init_debugfs(void)
+{
scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
if (!scif_dbg) {
dev_err(scif_info.mdev.this_device,
@@ -73,8 +150,8 @@ void __init scif_init_debugfs(void)
return;
}
- d = debugfs_create_file("scif_dev", 0444, scif_dbg,
- NULL, &scif_dev_ops);
+ debugfs_create_file("scif_dev", 0444, scif_dbg, NULL, &scif_dev_ops);
+ debugfs_create_file("scif_rma", 0444, scif_dbg, NULL, &scif_rma_ops);
debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log);
debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable);
}
diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
new file mode 100644
index 000000000000..95a13c629a8e
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_dma.c
@@ -0,0 +1,1979 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include "scif_map.h"
+
+/*
+ * struct scif_dma_comp_cb - SCIF DMA completion callback
+ *
+ * @dma_completion_func: DMA completion callback
+ * @cb_cookie: DMA completion callback cookie
+ * @temp_buf: Temporary buffer
+ * @temp_buf_to_free: Temporary buffer to be freed
+ * @is_cache: Is a kmem_cache allocated buffer
+ * @dst_offset: Destination registration offset
+ * @dst_window: Destination registration window
+ * @len: Length of the temp buffer
+ * @temp_phys: DMA address of the temp buffer
+ * @sdev: The SCIF device
+ * @header_padding: padding for cache line alignment
+ */
+struct scif_dma_comp_cb {
+ void (*dma_completion_func)(void *cookie);
+ void *cb_cookie;
+ u8 *temp_buf;
+ u8 *temp_buf_to_free;
+ bool is_cache;
+ s64 dst_offset;
+ struct scif_window *dst_window;
+ size_t len;
+ dma_addr_t temp_phys;
+ struct scif_dev *sdev;
+ int header_padding;
+};
+
+/**
+ * struct scif_copy_work - Work for DMA copy
+ *
+ * @src_offset: Starting source offset
+ * @dst_offset: Starting destination offset
+ * @src_window: Starting src registered window
+ * @dst_window: Starting dst registered window
+ * @loopback: true if this is a loopback DMA transfer
+ * @len: Length of the transfer
+ * @comp_cb: DMA copy completion callback
+ * @remote_dev: The remote SCIF peer device
+ * @fence_type: polling or interrupt based
+ * @ordered: is this a tail byte ordered DMA transfer
+ */
+struct scif_copy_work {
+ s64 src_offset;
+ s64 dst_offset;
+ struct scif_window *src_window;
+ struct scif_window *dst_window;
+ int loopback;
+ size_t len;
+ struct scif_dma_comp_cb *comp_cb;
+ struct scif_dev *remote_dev;
+ int fence_type;
+ bool ordered;
+};
+
+#ifndef list_entry_next
+#define list_entry_next(pos, member) \
+ list_entry(pos->member.next, typeof(*pos), member)
+#endif
+
+/**
+ * scif_reserve_dma_chan:
+ * @ep: Endpoint Descriptor.
+ *
+ * This routine reserves a DMA channel for a particular
+ * endpoint. All DMA transfers for an endpoint are always
+ * programmed on the same DMA channel.
+ */
+int scif_reserve_dma_chan(struct scif_endpt *ep)
+{
+ int err = 0;
+ struct scif_dev *scifdev;
+ struct scif_hw_dev *sdev;
+ struct dma_chan *chan;
+
+ /* Loopback DMAs are not supported on the management node */
+ if (!scif_info.nodeid && scifdev_self(ep->remote_dev))
+ return 0;
+ if (scif_info.nodeid)
+ scifdev = &scif_dev[0];
+ else
+ scifdev = ep->remote_dev;
+ sdev = scifdev->sdev;
+ if (!sdev->num_dma_ch)
+ return -ENODEV;
+ chan = sdev->dma_ch[scifdev->dma_ch_idx];
+ scifdev->dma_ch_idx = (scifdev->dma_ch_idx + 1) % sdev->num_dma_ch;
+ mutex_lock(&ep->rma_info.rma_lock);
+ ep->rma_info.dma_chan = chan;
+ mutex_unlock(&ep->rma_info.rma_lock);
+ return err;
+}
+
+#ifdef CONFIG_MMU_NOTIFIER
+/**
+ * scif_rma_destroy_tcw:
+ *
+ * This routine destroys temporary cached windows
+ */
+static
+void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn,
+ struct scif_endpt *ep,
+ u64 start, u64 len)
+{
+ struct list_head *item, *tmp;
+ struct scif_window *window;
+ u64 start_va, end_va;
+ u64 end = start + len;
+
+ if (end <= start)
+ return;
+
+ list_for_each_safe(item, tmp, &mmn->tc_reg_list) {
+ window = list_entry(item, struct scif_window, list);
+ ep = (struct scif_endpt *)window->ep;
+ if (!len)
+ break;
+ start_va = window->va_for_temp;
+ end_va = start_va + (window->nr_pages << PAGE_SHIFT);
+ if (start < start_va && end <= start_va)
+ break;
+ if (start >= end_va)
+ continue;
+ __scif_rma_destroy_tcw_helper(window);
+ }
+}
+
+static void scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, u64 start, u64 len)
+{
+ struct scif_endpt *ep = mmn->ep;
+
+ spin_lock(&ep->rma_info.tc_lock);
+ __scif_rma_destroy_tcw(mmn, ep, start, len);
+ spin_unlock(&ep->rma_info.tc_lock);
+}
+
+static void scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
+{
+ struct list_head *item, *tmp;
+ struct scif_mmu_notif *mmn;
+
+ list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
+ mmn = list_entry(item, struct scif_mmu_notif, list);
+ scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
+ }
+}
+
+static void __scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
+{
+ struct list_head *item, *tmp;
+ struct scif_mmu_notif *mmn;
+
+ spin_lock(&ep->rma_info.tc_lock);
+ list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
+ mmn = list_entry(item, struct scif_mmu_notif, list);
+ __scif_rma_destroy_tcw(mmn, ep, 0, ULONG_MAX);
+ }
+ spin_unlock(&ep->rma_info.tc_lock);
+}
+
+static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
+{
+ if ((cur_bytes >> PAGE_SHIFT) > scif_info.rma_tc_limit)
+ return false;
+ if ((atomic_read(&ep->rma_info.tcw_total_pages)
+ + (cur_bytes >> PAGE_SHIFT)) >
+ scif_info.rma_tc_limit) {
+ dev_info(scif_info.mdev.this_device,
+ "%s %d total=%d, current=%zu reached max\n",
+ __func__, __LINE__,
+ atomic_read(&ep->rma_info.tcw_total_pages),
+ (1 + (cur_bytes >> PAGE_SHIFT)));
+ scif_rma_destroy_tcw_invalid();
+ __scif_rma_destroy_tcw_ep(ep);
+ }
+ return true;
+}
+
+static void scif_mmu_notifier_release(struct mmu_notifier *mn,
+ struct mm_struct *mm)
+{
+ struct scif_mmu_notif *mmn;
+
+ mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+ scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
+ schedule_work(&scif_info.misc_work);
+}
+
+static void scif_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long address)
+{
+ struct scif_mmu_notif *mmn;
+
+ mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+ scif_rma_destroy_tcw(mmn, address, PAGE_SIZE);
+}
+
+static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ struct scif_mmu_notif *mmn;
+
+ mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+ scif_rma_destroy_tcw(mmn, start, end - start);
+}
+
+static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ /*
+ * Nothing to do here, everything needed was done in
+ * invalidate_range_start.
+ */
+}
+
+static const struct mmu_notifier_ops scif_mmu_notifier_ops = {
+ .release = scif_mmu_notifier_release,
+ .clear_flush_young = NULL,
+ .invalidate_page = scif_mmu_notifier_invalidate_page,
+ .invalidate_range_start = scif_mmu_notifier_invalidate_range_start,
+ .invalidate_range_end = scif_mmu_notifier_invalidate_range_end};
+
+static void scif_ep_unregister_mmu_notifier(struct scif_endpt *ep)
+{
+ struct scif_endpt_rma_info *rma = &ep->rma_info;
+ struct scif_mmu_notif *mmn = NULL;
+ struct list_head *item, *tmp;
+
+ mutex_lock(&ep->rma_info.mmn_lock);
+ list_for_each_safe(item, tmp, &rma->mmn_list) {
+ mmn = list_entry(item, struct scif_mmu_notif, list);
+ mmu_notifier_unregister(&mmn->ep_mmu_notifier, mmn->mm);
+ list_del(item);
+ kfree(mmn);
+ }
+ mutex_unlock(&ep->rma_info.mmn_lock);
+}
+
+static void scif_init_mmu_notifier(struct scif_mmu_notif *mmn,
+ struct mm_struct *mm, struct scif_endpt *ep)
+{
+ mmn->ep = ep;
+ mmn->mm = mm;
+ mmn->ep_mmu_notifier.ops = &scif_mmu_notifier_ops;
+ INIT_LIST_HEAD(&mmn->list);
+ INIT_LIST_HEAD(&mmn->tc_reg_list);
+}
+
+static struct scif_mmu_notif *
+scif_find_mmu_notifier(struct mm_struct *mm, struct scif_endpt_rma_info *rma)
+{
+ struct scif_mmu_notif *mmn;
+ struct list_head *item;
+
+ list_for_each(item, &rma->mmn_list) {
+ mmn = list_entry(item, struct scif_mmu_notif, list);
+ if (mmn->mm == mm)
+ return mmn;
+ }
+ return NULL;
+}
+
+static struct scif_mmu_notif *
+scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
+{
+ struct scif_mmu_notif *mmn
+ = kzalloc(sizeof(*mmn), GFP_KERNEL);
+
+ if (!mmn)
+ return ERR_PTR(ENOMEM);
+
+ scif_init_mmu_notifier(mmn, current->mm, ep);
+ if (mmu_notifier_register(&mmn->ep_mmu_notifier,
+ current->mm)) {
+ kfree(mmn);
+ return ERR_PTR(EBUSY);
+ }
+ list_add(&mmn->list, &ep->rma_info.mmn_list);
+ return mmn;
+}
+
+/*
+ * Called from the misc thread to destroy temporary cached windows and
+ * unregister the MMU notifier for the SCIF endpoint.
+ */
+void scif_mmu_notif_handler(struct work_struct *work)
+{
+ struct list_head *pos, *tmpq;
+ struct scif_endpt *ep;
+restart:
+ scif_rma_destroy_tcw_invalid();
+ spin_lock(&scif_info.rmalock);
+ list_for_each_safe(pos, tmpq, &scif_info.mmu_notif_cleanup) {
+ ep = list_entry(pos, struct scif_endpt, mmu_list);
+ list_del(&ep->mmu_list);
+ spin_unlock(&scif_info.rmalock);
+ scif_rma_destroy_tcw_ep(ep);
+ scif_ep_unregister_mmu_notifier(ep);
+ goto restart;
+ }
+ spin_unlock(&scif_info.rmalock);
+}
+
+static bool scif_is_set_reg_cache(int flags)
+{
+ return !!(flags & SCIF_RMA_USECACHE);
+}
+#else
+static struct scif_mmu_notif *
+scif_find_mmu_notifier(struct mm_struct *mm,
+ struct scif_endpt_rma_info *rma)
+{
+ return NULL;
+}
+
+static struct scif_mmu_notif *
+scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
+{
+ return NULL;
+}
+
+void scif_mmu_notif_handler(struct work_struct *work)
+{
+}
+
+static bool scif_is_set_reg_cache(int flags)
+{
+ return false;
+}
+
+static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
+{
+ return false;
+}
+#endif
+
+/**
+ * scif_register_temp:
+ * @epd: End Point Descriptor.
+ * @addr: virtual address to/from which to copy
+ * @len: length of range to copy
+ * @out_offset: computed offset returned by reference.
+ * @out_window: allocated registered window returned by reference.
+ *
+ * Create a temporary registered window. The peer will not know about this
+ * window. This API is used for scif_vreadfrom()/scif_vwriteto() API's.
+ */
+static int
+scif_register_temp(scif_epd_t epd, unsigned long addr, size_t len, int prot,
+ off_t *out_offset, struct scif_window **out_window)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int err;
+ scif_pinned_pages_t pinned_pages;
+ size_t aligned_len;
+
+ aligned_len = ALIGN(len, PAGE_SIZE);
+
+ err = __scif_pin_pages((void *)(addr & PAGE_MASK),
+ aligned_len, &prot, 0, &pinned_pages);
+ if (err)
+ return err;
+
+ pinned_pages->prot = prot;
+
+ /* Compute the offset for this registration */
+ err = scif_get_window_offset(ep, 0, 0,
+ aligned_len >> PAGE_SHIFT,
+ (s64 *)out_offset);
+ if (err)
+ goto error_unpin;
+
+ /* Allocate and prepare self registration window */
+ *out_window = scif_create_window(ep, aligned_len >> PAGE_SHIFT,
+ *out_offset, true);
+ if (!*out_window) {
+ scif_free_window_offset(ep, NULL, *out_offset);
+ err = -ENOMEM;
+ goto error_unpin;
+ }
+
+ (*out_window)->pinned_pages = pinned_pages;
+ (*out_window)->nr_pages = pinned_pages->nr_pages;
+ (*out_window)->prot = pinned_pages->prot;
+
+ (*out_window)->va_for_temp = addr & PAGE_MASK;
+ err = scif_map_window(ep->remote_dev, *out_window);
+ if (err) {
+ /* Something went wrong! Rollback */
+ scif_destroy_window(ep, *out_window);
+ *out_window = NULL;
+ } else {
+ *out_offset |= (addr - (*out_window)->va_for_temp);
+ }
+ return err;
+error_unpin:
+ if (err)
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ scif_unpin_pages(pinned_pages);
+ return err;
+}
+
+#define SCIF_DMA_TO (3 * HZ)
+
+/*
+ * scif_sync_dma - Program a DMA without an interrupt descriptor
+ *
+ * @dev - The address of the pointer to the device instance used
+ * for DMA registration.
+ * @chan - DMA channel to be used.
+ * @sync_wait: Wait for DMA to complete?
+ *
+ * Return 0 on success and -errno on error.
+ */
+static int scif_sync_dma(struct scif_hw_dev *sdev, struct dma_chan *chan,
+ bool sync_wait)
+{
+ int err = 0;
+ struct dma_async_tx_descriptor *tx = NULL;
+ enum dma_ctrl_flags flags = DMA_PREP_FENCE;
+ dma_cookie_t cookie;
+ struct dma_device *ddev;
+
+ if (!chan) {
+ err = -EIO;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ return err;
+ }
+ ddev = chan->device;
+
+ tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
+ if (!tx) {
+ err = -ENOMEM;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto release;
+ }
+ cookie = tx->tx_submit(tx);
+
+ if (dma_submit_error(cookie)) {
+ err = -ENOMEM;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto release;
+ }
+ if (!sync_wait) {
+ dma_async_issue_pending(chan);
+ } else {
+ if (dma_sync_wait(chan, cookie) == DMA_COMPLETE) {
+ err = 0;
+ } else {
+ err = -EIO;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ }
+ }
+release:
+ return err;
+}
+
+static void scif_dma_callback(void *arg)
+{
+ struct completion *done = (struct completion *)arg;
+
+ complete(done);
+}
+
+#define SCIF_DMA_SYNC_WAIT true
+#define SCIF_DMA_POLL BIT(0)
+#define SCIF_DMA_INTR BIT(1)
+
+/*
+ * scif_async_dma - Program a DMA with an interrupt descriptor
+ *
+ * @dev - The address of the pointer to the device instance used
+ * for DMA registration.
+ * @chan - DMA channel to be used.
+ * Return 0 on success and -errno on error.
+ */
+static int scif_async_dma(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+ int err = 0;
+ struct dma_device *ddev;
+ struct dma_async_tx_descriptor *tx = NULL;
+ enum dma_ctrl_flags flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE;
+ DECLARE_COMPLETION_ONSTACK(done_wait);
+ dma_cookie_t cookie;
+ enum dma_status status;
+
+ if (!chan) {
+ err = -EIO;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ return err;
+ }
+ ddev = chan->device;
+
+ tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
+ if (!tx) {
+ err = -ENOMEM;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto release;
+ }
+ reinit_completion(&done_wait);
+ tx->callback = scif_dma_callback;
+ tx->callback_param = &done_wait;
+ cookie = tx->tx_submit(tx);
+
+ if (dma_submit_error(cookie)) {
+ err = -ENOMEM;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto release;
+ }
+ dma_async_issue_pending(chan);
+
+ err = wait_for_completion_timeout(&done_wait, SCIF_DMA_TO);
+ if (!err) {
+ err = -EIO;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto release;
+ }
+ err = 0;
+ status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
+ if (status != DMA_COMPLETE) {
+ err = -EIO;
+ dev_err(&sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto release;
+ }
+release:
+ return err;
+}
+
+/*
+ * scif_drain_dma_poll - Drain all outstanding DMA operations for a particular
+ * DMA channel via polling.
+ *
+ * @sdev - The SCIF device
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+static int scif_drain_dma_poll(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+ if (!chan)
+ return -EINVAL;
+ return scif_sync_dma(sdev, chan, SCIF_DMA_SYNC_WAIT);
+}
+
+/*
+ * scif_drain_dma_intr - Drain all outstanding DMA operations for a particular
+ * DMA channel via interrupt based blocking wait.
+ *
+ * @sdev - The SCIF device
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+ if (!chan)
+ return -EINVAL;
+ return scif_async_dma(sdev, chan);
+}
+
+/**
+ * scif_rma_destroy_windows:
+ *
+ * This routine destroys all windows queued for cleanup
+ */
+void scif_rma_destroy_windows(void)
+{
+ struct list_head *item, *tmp;
+ struct scif_window *window;
+ struct scif_endpt *ep;
+ struct dma_chan *chan;
+
+ might_sleep();
+restart:
+ spin_lock(&scif_info.rmalock);
+ list_for_each_safe(item, tmp, &scif_info.rma) {
+ window = list_entry(item, struct scif_window,
+ list);
+ ep = (struct scif_endpt *)window->ep;
+ chan = ep->rma_info.dma_chan;
+
+ list_del_init(&window->list);
+ spin_unlock(&scif_info.rmalock);
+ if (!chan || !scifdev_alive(ep) ||
+ !scif_drain_dma_intr(ep->remote_dev->sdev,
+ ep->rma_info.dma_chan))
+ /* Remove window from global list */
+ window->unreg_state = OP_COMPLETED;
+ else
+ dev_warn(&ep->remote_dev->sdev->dev,
+ "DMA engine hung?\n");
+ if (window->unreg_state == OP_COMPLETED) {
+ if (window->type == SCIF_WINDOW_SELF)
+ scif_destroy_window(ep, window);
+ else
+ scif_destroy_remote_window(window);
+ atomic_dec(&ep->rma_info.tw_refcount);
+ }
+ goto restart;
+ }
+ spin_unlock(&scif_info.rmalock);
+}
+
+/**
+ * scif_rma_destroy_tcw:
+ *
+ * This routine destroys temporary cached registered windows
+ * which have been queued for cleanup.
+ */
+void scif_rma_destroy_tcw_invalid(void)
+{
+ struct list_head *item, *tmp;
+ struct scif_window *window;
+ struct scif_endpt *ep;
+ struct dma_chan *chan;
+
+ might_sleep();
+restart:
+ spin_lock(&scif_info.rmalock);
+ list_for_each_safe(item, tmp, &scif_info.rma_tc) {
+ window = list_entry(item, struct scif_window, list);
+ ep = (struct scif_endpt *)window->ep;
+ chan = ep->rma_info.dma_chan;
+ list_del_init(&window->list);
+ spin_unlock(&scif_info.rmalock);
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (!chan || !scifdev_alive(ep) ||
+ !scif_drain_dma_intr(ep->remote_dev->sdev,
+ ep->rma_info.dma_chan)) {
+ atomic_sub(window->nr_pages,
+ &ep->rma_info.tcw_total_pages);
+ scif_destroy_window(ep, window);
+ atomic_dec(&ep->rma_info.tcw_refcount);
+ } else {
+ dev_warn(&ep->remote_dev->sdev->dev,
+ "DMA engine hung?\n");
+ }
+ mutex_unlock(&ep->rma_info.rma_lock);
+ goto restart;
+ }
+ spin_unlock(&scif_info.rmalock);
+}
+
+static inline
+void *_get_local_va(off_t off, struct scif_window *window, size_t len)
+{
+ int page_nr = (off - window->offset) >> PAGE_SHIFT;
+ off_t page_off = off & ~PAGE_MASK;
+ void *va = NULL;
+
+ if (window->type == SCIF_WINDOW_SELF) {
+ struct page **pages = window->pinned_pages->pages;
+
+ va = page_address(pages[page_nr]) + page_off;
+ }
+ return va;
+}
+
+static inline
+void *ioremap_remote(off_t off, struct scif_window *window,
+ size_t len, struct scif_dev *dev,
+ struct scif_window_iter *iter)
+{
+ dma_addr_t phys = scif_off_to_dma_addr(window, off, NULL, iter);
+
+ /*
+ * If the DMA address is not card relative then we need the DMA
+ * addresses to be an offset into the bar. The aperture base was already
+ * added so subtract it here since scif_ioremap is going to add it again
+ */
+ if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
+ dev->sdev->aper && !dev->sdev->card_rel_da)
+ phys = phys - dev->sdev->aper->pa;
+ return scif_ioremap(phys, len, dev);
+}
+
+static inline void
+iounmap_remote(void *virt, size_t size, struct scif_copy_work *work)
+{
+ scif_iounmap(virt, size, work->remote_dev);
+}
+
+/*
+ * Takes care of ordering issue caused by
+ * 1. Hardware: Only in the case of cpu copy from mgmt node to card
+ * because of WC memory.
+ * 2. Software: If memcpy reorders copy instructions for optimization.
+ * This could happen at both mgmt node and card.
+ */
+static inline void
+scif_ordered_memcpy_toio(char *dst, const char *src, size_t count)
+{
+ if (!count)
+ return;
+
+ memcpy_toio((void __iomem __force *)dst, src, --count);
+ /* Order the last byte with the previous stores */
+ wmb();
+ *(dst + count) = *(src + count);
+}
+
+static inline void scif_unaligned_cpy_toio(char *dst, const char *src,
+ size_t count, bool ordered)
+{
+ if (ordered)
+ scif_ordered_memcpy_toio(dst, src, count);
+ else
+ memcpy_toio((void __iomem __force *)dst, src, count);
+}
+
+static inline
+void scif_ordered_memcpy_fromio(char *dst, const char *src, size_t count)
+{
+ if (!count)
+ return;
+
+ memcpy_fromio(dst, (void __iomem __force *)src, --count);
+ /* Order the last byte with the previous loads */
+ rmb();
+ *(dst + count) = *(src + count);
+}
+
+static inline void scif_unaligned_cpy_fromio(char *dst, const char *src,
+ size_t count, bool ordered)
+{
+ if (ordered)
+ scif_ordered_memcpy_fromio(dst, src, count);
+ else
+ memcpy_fromio(dst, (void __iomem __force *)src, count);
+}
+
+#define SCIF_RMA_ERROR_CODE (~(dma_addr_t)0x0)
+
+/*
+ * scif_off_to_dma_addr:
+ * Obtain the dma_addr given the window and the offset.
+ * @window: Registered window.
+ * @off: Window offset.
+ * @nr_bytes: Return the number of contiguous bytes till next DMA addr index.
+ * @index: Return the index of the dma_addr array found.
+ * @start_off: start offset of index of the dma addr array found.
+ * The nr_bytes provides the callee an estimate of the maximum possible
+ * DMA xfer possible while the index/start_off provide faster lookups
+ * for the next iteration.
+ */
+dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
+ size_t *nr_bytes, struct scif_window_iter *iter)
+{
+ int i, page_nr;
+ s64 start, end;
+ off_t page_off;
+
+ if (window->nr_pages == window->nr_contig_chunks) {
+ page_nr = (off - window->offset) >> PAGE_SHIFT;
+ page_off = off & ~PAGE_MASK;
+
+ if (nr_bytes)
+ *nr_bytes = PAGE_SIZE - page_off;
+ return window->dma_addr[page_nr] | page_off;
+ }
+ if (iter) {
+ i = iter->index;
+ start = iter->offset;
+ } else {
+ i = 0;
+ start = window->offset;
+ }
+ for (; i < window->nr_contig_chunks; i++) {
+ end = start + (window->num_pages[i] << PAGE_SHIFT);
+ if (off >= start && off < end) {
+ if (iter) {
+ iter->index = i;
+ iter->offset = start;
+ }
+ if (nr_bytes)
+ *nr_bytes = end - off;
+ return (window->dma_addr[i] + (off - start));
+ }
+ start += (window->num_pages[i] << PAGE_SHIFT);
+ }
+ dev_err(scif_info.mdev.this_device,
+ "%s %d BUG. Addr not found? window %p off 0x%llx\n",
+ __func__, __LINE__, window, off);
+ return SCIF_RMA_ERROR_CODE;
+}
+
+/*
+ * Copy between rma window and temporary buffer
+ */
+static void scif_rma_local_cpu_copy(s64 offset, struct scif_window *window,
+ u8 *temp, size_t rem_len, bool to_temp)
+{
+ void *window_virt;
+ size_t loop_len;
+ int offset_in_page;
+ s64 end_offset;
+
+ offset_in_page = offset & ~PAGE_MASK;
+ loop_len = PAGE_SIZE - offset_in_page;
+
+ if (rem_len < loop_len)
+ loop_len = rem_len;
+
+ window_virt = _get_local_va(offset, window, loop_len);
+ if (!window_virt)
+ return;
+ if (to_temp)
+ memcpy(temp, window_virt, loop_len);
+ else
+ memcpy(window_virt, temp, loop_len);
+
+ offset += loop_len;
+ temp += loop_len;
+ rem_len -= loop_len;
+
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ while (rem_len) {
+ if (offset == end_offset) {
+ window = list_entry_next(window, list);
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ }
+ loop_len = min(PAGE_SIZE, rem_len);
+ window_virt = _get_local_va(offset, window, loop_len);
+ if (!window_virt)
+ return;
+ if (to_temp)
+ memcpy(temp, window_virt, loop_len);
+ else
+ memcpy(window_virt, temp, loop_len);
+ offset += loop_len;
+ temp += loop_len;
+ rem_len -= loop_len;
+ }
+}
+
+/**
+ * scif_rma_completion_cb:
+ * @data: RMA cookie
+ *
+ * RMA interrupt completion callback.
+ */
+static void scif_rma_completion_cb(void *data)
+{
+ struct scif_dma_comp_cb *comp_cb = data;
+
+ /* Free DMA Completion CB. */
+ if (comp_cb->dst_window)
+ scif_rma_local_cpu_copy(comp_cb->dst_offset,
+ comp_cb->dst_window,
+ comp_cb->temp_buf +
+ comp_cb->header_padding,
+ comp_cb->len, false);
+ scif_unmap_single(comp_cb->temp_phys, comp_cb->sdev,
+ SCIF_KMEM_UNALIGNED_BUF_SIZE);
+ if (comp_cb->is_cache)
+ kmem_cache_free(unaligned_cache,
+ comp_cb->temp_buf_to_free);
+ else
+ kfree(comp_cb->temp_buf_to_free);
+}
+
+/* Copies between temporary buffer and offsets provided in work */
+static int
+scif_rma_list_dma_copy_unaligned(struct scif_copy_work *work,
+ u8 *temp, struct dma_chan *chan,
+ bool src_local)
+{
+ struct scif_dma_comp_cb *comp_cb = work->comp_cb;
+ dma_addr_t window_dma_addr, temp_dma_addr;
+ dma_addr_t temp_phys = comp_cb->temp_phys;
+ size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len;
+ int offset_in_ca, ret = 0;
+ s64 end_offset, offset;
+ struct scif_window *window;
+ void *window_virt_addr;
+ size_t tail_len;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_device *dev = chan->device;
+ dma_cookie_t cookie;
+
+ if (src_local) {
+ offset = work->dst_offset;
+ window = work->dst_window;
+ } else {
+ offset = work->src_offset;
+ window = work->src_window;
+ }
+
+ offset_in_ca = offset & (L1_CACHE_BYTES - 1);
+ if (offset_in_ca) {
+ loop_len = L1_CACHE_BYTES - offset_in_ca;
+ loop_len = min(loop_len, remaining_len);
+ window_virt_addr = ioremap_remote(offset, window,
+ loop_len,
+ work->remote_dev,
+ NULL);
+ if (!window_virt_addr)
+ return -ENOMEM;
+ if (src_local)
+ scif_unaligned_cpy_toio(window_virt_addr, temp,
+ loop_len,
+ work->ordered &&
+ !(remaining_len - loop_len));
+ else
+ scif_unaligned_cpy_fromio(temp, window_virt_addr,
+ loop_len, work->ordered &&
+ !(remaining_len - loop_len));
+ iounmap_remote(window_virt_addr, loop_len, work);
+
+ offset += loop_len;
+ temp += loop_len;
+ temp_phys += loop_len;
+ remaining_len -= loop_len;
+ }
+
+ offset_in_ca = offset & ~PAGE_MASK;
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+
+ tail_len = remaining_len & (L1_CACHE_BYTES - 1);
+ remaining_len -= tail_len;
+ while (remaining_len) {
+ if (offset == end_offset) {
+ window = list_entry_next(window, list);
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ }
+ if (scif_is_mgmt_node())
+ temp_dma_addr = temp_phys;
+ else
+ /* Fix if we ever enable IOMMU on the card */
+ temp_dma_addr = (dma_addr_t)virt_to_phys(temp);
+ window_dma_addr = scif_off_to_dma_addr(window, offset,
+ &nr_contig_bytes,
+ NULL);
+ loop_len = min(nr_contig_bytes, remaining_len);
+ if (src_local) {
+ if (work->ordered && !tail_len &&
+ !(remaining_len - loop_len) &&
+ loop_len != L1_CACHE_BYTES) {
+ /*
+ * Break up the last chunk of the transfer into
+ * two steps. if there is no tail to guarantee
+ * DMA ordering. SCIF_DMA_POLLING inserts
+ * a status update descriptor in step 1 which
+ * acts as a double sided synchronization fence
+ * for the DMA engine to ensure that the last
+ * cache line in step 2 is updated last.
+ */
+ /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
+ tx =
+ dev->device_prep_dma_memcpy(chan,
+ window_dma_addr,
+ temp_dma_addr,
+ loop_len -
+ L1_CACHE_BYTES,
+ DMA_PREP_FENCE);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ offset += (loop_len - L1_CACHE_BYTES);
+ temp_dma_addr += (loop_len - L1_CACHE_BYTES);
+ window_dma_addr += (loop_len - L1_CACHE_BYTES);
+ remaining_len -= (loop_len - L1_CACHE_BYTES);
+ loop_len = remaining_len;
+
+ /* Step 2) DMA: L1_CACHE_BYTES */
+ tx =
+ dev->device_prep_dma_memcpy(chan,
+ window_dma_addr,
+ temp_dma_addr,
+ loop_len, 0);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ } else {
+ tx =
+ dev->device_prep_dma_memcpy(chan,
+ window_dma_addr,
+ temp_dma_addr,
+ loop_len, 0);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ }
+ } else {
+ tx = dev->device_prep_dma_memcpy(chan, temp_dma_addr,
+ window_dma_addr, loop_len, 0);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ }
+ if (ret < 0)
+ goto err;
+ offset += loop_len;
+ temp += loop_len;
+ temp_phys += loop_len;
+ remaining_len -= loop_len;
+ offset_in_ca = 0;
+ }
+ if (tail_len) {
+ if (offset == end_offset) {
+ window = list_entry_next(window, list);
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ }
+ window_virt_addr = ioremap_remote(offset, window, tail_len,
+ work->remote_dev,
+ NULL);
+ if (!window_virt_addr)
+ return -ENOMEM;
+ /*
+ * The CPU copy for the tail bytes must be initiated only once
+ * previous DMA transfers for this endpoint have completed
+ * to guarantee ordering.
+ */
+ if (work->ordered) {
+ struct scif_dev *rdev = work->remote_dev;
+
+ ret = scif_drain_dma_intr(rdev->sdev, chan);
+ if (ret)
+ return ret;
+ }
+ if (src_local)
+ scif_unaligned_cpy_toio(window_virt_addr, temp,
+ tail_len, work->ordered);
+ else
+ scif_unaligned_cpy_fromio(temp, window_virt_addr,
+ tail_len, work->ordered);
+ iounmap_remote(window_virt_addr, tail_len, work);
+ }
+ tx = dev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_INTERRUPT);
+ if (!tx) {
+ ret = -ENOMEM;
+ return ret;
+ }
+ tx->callback = &scif_rma_completion_cb;
+ tx->callback_param = comp_cb;
+ cookie = tx->tx_submit(tx);
+
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ return ret;
+ }
+ dma_async_issue_pending(chan);
+ return 0;
+err:
+ dev_err(scif_info.mdev.this_device,
+ "%s %d Desc Prog Failed ret %d\n",
+ __func__, __LINE__, ret);
+ return ret;
+}
+
+/*
+ * _scif_rma_list_dma_copy_aligned:
+ *
+ * Traverse all the windows and perform DMA copy.
+ */
+static int _scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
+ struct dma_chan *chan)
+{
+ dma_addr_t src_dma_addr, dst_dma_addr;
+ size_t loop_len, remaining_len, src_contig_bytes = 0;
+ size_t dst_contig_bytes = 0;
+ struct scif_window_iter src_win_iter;
+ struct scif_window_iter dst_win_iter;
+ s64 end_src_offset, end_dst_offset;
+ struct scif_window *src_window = work->src_window;
+ struct scif_window *dst_window = work->dst_window;
+ s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+ int ret = 0;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_device *dev = chan->device;
+ dma_cookie_t cookie;
+
+ remaining_len = work->len;
+
+ scif_init_window_iter(src_window, &src_win_iter);
+ scif_init_window_iter(dst_window, &dst_win_iter);
+ end_src_offset = src_window->offset +
+ (src_window->nr_pages << PAGE_SHIFT);
+ end_dst_offset = dst_window->offset +
+ (dst_window->nr_pages << PAGE_SHIFT);
+ while (remaining_len) {
+ if (src_offset == end_src_offset) {
+ src_window = list_entry_next(src_window, list);
+ end_src_offset = src_window->offset +
+ (src_window->nr_pages << PAGE_SHIFT);
+ scif_init_window_iter(src_window, &src_win_iter);
+ }
+ if (dst_offset == end_dst_offset) {
+ dst_window = list_entry_next(dst_window, list);
+ end_dst_offset = dst_window->offset +
+ (dst_window->nr_pages << PAGE_SHIFT);
+ scif_init_window_iter(dst_window, &dst_win_iter);
+ }
+
+ /* compute dma addresses for transfer */
+ src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
+ &src_contig_bytes,
+ &src_win_iter);
+ dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
+ &dst_contig_bytes,
+ &dst_win_iter);
+ loop_len = min(src_contig_bytes, dst_contig_bytes);
+ loop_len = min(loop_len, remaining_len);
+ if (work->ordered && !(remaining_len - loop_len)) {
+ /*
+ * Break up the last chunk of the transfer into two
+ * steps to ensure that the last byte in step 2 is
+ * updated last.
+ */
+ /* Step 1) DMA: Body Length - 1 */
+ tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+ src_dma_addr,
+ loop_len - 1,
+ DMA_PREP_FENCE);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ src_offset += (loop_len - 1);
+ dst_offset += (loop_len - 1);
+ src_dma_addr += (loop_len - 1);
+ dst_dma_addr += (loop_len - 1);
+ remaining_len -= (loop_len - 1);
+ loop_len = remaining_len;
+
+ /* Step 2) DMA: 1 BYTES */
+ tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+ src_dma_addr, loop_len, 0);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ } else {
+ tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+ src_dma_addr, loop_len, 0);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ }
+ src_offset += loop_len;
+ dst_offset += loop_len;
+ remaining_len -= loop_len;
+ }
+ return ret;
+err:
+ dev_err(scif_info.mdev.this_device,
+ "%s %d Desc Prog Failed ret %d\n",
+ __func__, __LINE__, ret);
+ return ret;
+}
+
+/*
+ * scif_rma_list_dma_copy_aligned:
+ *
+ * Traverse all the windows and perform DMA copy.
+ */
+static int scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
+ struct dma_chan *chan)
+{
+ dma_addr_t src_dma_addr, dst_dma_addr;
+ size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0;
+ size_t dst_contig_bytes = 0;
+ int src_cache_off;
+ s64 end_src_offset, end_dst_offset;
+ struct scif_window_iter src_win_iter;
+ struct scif_window_iter dst_win_iter;
+ void *src_virt, *dst_virt;
+ struct scif_window *src_window = work->src_window;
+ struct scif_window *dst_window = work->dst_window;
+ s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+ int ret = 0;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_device *dev = chan->device;
+ dma_cookie_t cookie;
+
+ remaining_len = work->len;
+ scif_init_window_iter(src_window, &src_win_iter);
+ scif_init_window_iter(dst_window, &dst_win_iter);
+
+ src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
+ if (src_cache_off != 0) {
+ /* Head */
+ loop_len = L1_CACHE_BYTES - src_cache_off;
+ loop_len = min(loop_len, remaining_len);
+ src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
+ dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
+ if (src_window->type == SCIF_WINDOW_SELF)
+ src_virt = _get_local_va(src_offset, src_window,
+ loop_len);
+ else
+ src_virt = ioremap_remote(src_offset, src_window,
+ loop_len,
+ work->remote_dev, NULL);
+ if (!src_virt)
+ return -ENOMEM;
+ if (dst_window->type == SCIF_WINDOW_SELF)
+ dst_virt = _get_local_va(dst_offset, dst_window,
+ loop_len);
+ else
+ dst_virt = ioremap_remote(dst_offset, dst_window,
+ loop_len,
+ work->remote_dev, NULL);
+ if (!dst_virt) {
+ if (src_window->type != SCIF_WINDOW_SELF)
+ iounmap_remote(src_virt, loop_len, work);
+ return -ENOMEM;
+ }
+ if (src_window->type == SCIF_WINDOW_SELF)
+ scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
+ remaining_len == loop_len ?
+ work->ordered : false);
+ else
+ scif_unaligned_cpy_fromio(dst_virt, src_virt, loop_len,
+ remaining_len == loop_len ?
+ work->ordered : false);
+ if (src_window->type != SCIF_WINDOW_SELF)
+ iounmap_remote(src_virt, loop_len, work);
+ if (dst_window->type != SCIF_WINDOW_SELF)
+ iounmap_remote(dst_virt, loop_len, work);
+ src_offset += loop_len;
+ dst_offset += loop_len;
+ remaining_len -= loop_len;
+ }
+
+ end_src_offset = src_window->offset +
+ (src_window->nr_pages << PAGE_SHIFT);
+ end_dst_offset = dst_window->offset +
+ (dst_window->nr_pages << PAGE_SHIFT);
+ tail_len = remaining_len & (L1_CACHE_BYTES - 1);
+ remaining_len -= tail_len;
+ while (remaining_len) {
+ if (src_offset == end_src_offset) {
+ src_window = list_entry_next(src_window, list);
+ end_src_offset = src_window->offset +
+ (src_window->nr_pages << PAGE_SHIFT);
+ scif_init_window_iter(src_window, &src_win_iter);
+ }
+ if (dst_offset == end_dst_offset) {
+ dst_window = list_entry_next(dst_window, list);
+ end_dst_offset = dst_window->offset +
+ (dst_window->nr_pages << PAGE_SHIFT);
+ scif_init_window_iter(dst_window, &dst_win_iter);
+ }
+
+ /* compute dma addresses for transfer */
+ src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
+ &src_contig_bytes,
+ &src_win_iter);
+ dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
+ &dst_contig_bytes,
+ &dst_win_iter);
+ loop_len = min(src_contig_bytes, dst_contig_bytes);
+ loop_len = min(loop_len, remaining_len);
+ if (work->ordered && !tail_len &&
+ !(remaining_len - loop_len)) {
+ /*
+ * Break up the last chunk of the transfer into two
+ * steps. if there is no tail to gurantee DMA ordering.
+ * Passing SCIF_DMA_POLLING inserts a status update
+ * descriptor in step 1 which acts as a double sided
+ * synchronization fence for the DMA engine to ensure
+ * that the last cache line in step 2 is updated last.
+ */
+ /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
+ tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+ src_dma_addr,
+ loop_len -
+ L1_CACHE_BYTES,
+ DMA_PREP_FENCE);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ src_offset += (loop_len - L1_CACHE_BYTES);
+ dst_offset += (loop_len - L1_CACHE_BYTES);
+ src_dma_addr += (loop_len - L1_CACHE_BYTES);
+ dst_dma_addr += (loop_len - L1_CACHE_BYTES);
+ remaining_len -= (loop_len - L1_CACHE_BYTES);
+ loop_len = remaining_len;
+
+ /* Step 2) DMA: L1_CACHE_BYTES */
+ tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+ src_dma_addr,
+ loop_len, 0);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ } else {
+ tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+ src_dma_addr,
+ loop_len, 0);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dma_async_issue_pending(chan);
+ }
+ src_offset += loop_len;
+ dst_offset += loop_len;
+ remaining_len -= loop_len;
+ }
+ remaining_len = tail_len;
+ if (remaining_len) {
+ loop_len = remaining_len;
+ if (src_offset == end_src_offset)
+ src_window = list_entry_next(src_window, list);
+ if (dst_offset == end_dst_offset)
+ dst_window = list_entry_next(dst_window, list);
+
+ src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
+ dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
+ /*
+ * The CPU copy for the tail bytes must be initiated only once
+ * previous DMA transfers for this endpoint have completed to
+ * guarantee ordering.
+ */
+ if (work->ordered) {
+ struct scif_dev *rdev = work->remote_dev;
+
+ ret = scif_drain_dma_poll(rdev->sdev, chan);
+ if (ret)
+ return ret;
+ }
+ if (src_window->type == SCIF_WINDOW_SELF)
+ src_virt = _get_local_va(src_offset, src_window,
+ loop_len);
+ else
+ src_virt = ioremap_remote(src_offset, src_window,
+ loop_len,
+ work->remote_dev, NULL);
+ if (!src_virt)
+ return -ENOMEM;
+
+ if (dst_window->type == SCIF_WINDOW_SELF)
+ dst_virt = _get_local_va(dst_offset, dst_window,
+ loop_len);
+ else
+ dst_virt = ioremap_remote(dst_offset, dst_window,
+ loop_len,
+ work->remote_dev, NULL);
+ if (!dst_virt) {
+ if (src_window->type != SCIF_WINDOW_SELF)
+ iounmap_remote(src_virt, loop_len, work);
+ return -ENOMEM;
+ }
+
+ if (src_window->type == SCIF_WINDOW_SELF)
+ scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
+ work->ordered);
+ else
+ scif_unaligned_cpy_fromio(dst_virt, src_virt,
+ loop_len, work->ordered);
+ if (src_window->type != SCIF_WINDOW_SELF)
+ iounmap_remote(src_virt, loop_len, work);
+
+ if (dst_window->type != SCIF_WINDOW_SELF)
+ iounmap_remote(dst_virt, loop_len, work);
+ remaining_len -= loop_len;
+ }
+ return ret;
+err:
+ dev_err(scif_info.mdev.this_device,
+ "%s %d Desc Prog Failed ret %d\n",
+ __func__, __LINE__, ret);
+ return ret;
+}
+
+/*
+ * scif_rma_list_cpu_copy:
+ *
+ * Traverse all the windows and perform CPU copy.
+ */
+static int scif_rma_list_cpu_copy(struct scif_copy_work *work)
+{
+ void *src_virt, *dst_virt;
+ size_t loop_len, remaining_len;
+ int src_page_off, dst_page_off;
+ s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+ struct scif_window *src_window = work->src_window;
+ struct scif_window *dst_window = work->dst_window;
+ s64 end_src_offset, end_dst_offset;
+ int ret = 0;
+ struct scif_window_iter src_win_iter;
+ struct scif_window_iter dst_win_iter;
+
+ remaining_len = work->len;
+
+ scif_init_window_iter(src_window, &src_win_iter);
+ scif_init_window_iter(dst_window, &dst_win_iter);
+ while (remaining_len) {
+ src_page_off = src_offset & ~PAGE_MASK;
+ dst_page_off = dst_offset & ~PAGE_MASK;
+ loop_len = min(PAGE_SIZE -
+ max(src_page_off, dst_page_off),
+ remaining_len);
+
+ if (src_window->type == SCIF_WINDOW_SELF)
+ src_virt = _get_local_va(src_offset, src_window,
+ loop_len);
+ else
+ src_virt = ioremap_remote(src_offset, src_window,
+ loop_len,
+ work->remote_dev,
+ &src_win_iter);
+ if (!src_virt) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ if (dst_window->type == SCIF_WINDOW_SELF)
+ dst_virt = _get_local_va(dst_offset, dst_window,
+ loop_len);
+ else
+ dst_virt = ioremap_remote(dst_offset, dst_window,
+ loop_len,
+ work->remote_dev,
+ &dst_win_iter);
+ if (!dst_virt) {
+ if (src_window->type == SCIF_WINDOW_PEER)
+ iounmap_remote(src_virt, loop_len, work);
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ if (work->loopback) {
+ memcpy(dst_virt, src_virt, loop_len);
+ } else {
+ if (src_window->type == SCIF_WINDOW_SELF)
+ memcpy_toio((void __iomem __force *)dst_virt,
+ src_virt, loop_len);
+ else
+ memcpy_fromio(dst_virt,
+ (void __iomem __force *)src_virt,
+ loop_len);
+ }
+ if (src_window->type == SCIF_WINDOW_PEER)
+ iounmap_remote(src_virt, loop_len, work);
+
+ if (dst_window->type == SCIF_WINDOW_PEER)
+ iounmap_remote(dst_virt, loop_len, work);
+
+ src_offset += loop_len;
+ dst_offset += loop_len;
+ remaining_len -= loop_len;
+ if (remaining_len) {
+ end_src_offset = src_window->offset +
+ (src_window->nr_pages << PAGE_SHIFT);
+ end_dst_offset = dst_window->offset +
+ (dst_window->nr_pages << PAGE_SHIFT);
+ if (src_offset == end_src_offset) {
+ src_window = list_entry_next(src_window, list);
+ scif_init_window_iter(src_window,
+ &src_win_iter);
+ }
+ if (dst_offset == end_dst_offset) {
+ dst_window = list_entry_next(dst_window, list);
+ scif_init_window_iter(dst_window,
+ &dst_win_iter);
+ }
+ }
+ }
+error:
+ return ret;
+}
+
+static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd,
+ struct scif_copy_work *work,
+ struct dma_chan *chan, off_t loffset)
+{
+ int src_cache_off, dst_cache_off;
+ s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+ u8 *temp = NULL;
+ bool src_local = true, dst_local = false;
+ struct scif_dma_comp_cb *comp_cb;
+ dma_addr_t src_dma_addr, dst_dma_addr;
+ int err;
+
+ if (is_dma_copy_aligned(chan->device, 1, 1, 1))
+ return _scif_rma_list_dma_copy_aligned(work, chan);
+
+ src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
+ dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1);
+
+ if (dst_cache_off == src_cache_off)
+ return scif_rma_list_dma_copy_aligned(work, chan);
+
+ if (work->loopback)
+ return scif_rma_list_cpu_copy(work);
+ src_dma_addr = __scif_off_to_dma_addr(work->src_window, src_offset);
+ dst_dma_addr = __scif_off_to_dma_addr(work->dst_window, dst_offset);
+ src_local = work->src_window->type == SCIF_WINDOW_SELF;
+ dst_local = work->dst_window->type == SCIF_WINDOW_SELF;
+
+ dst_local = dst_local;
+ /* Allocate dma_completion cb */
+ comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL);
+ if (!comp_cb)
+ goto error;
+
+ work->comp_cb = comp_cb;
+ comp_cb->cb_cookie = comp_cb;
+ comp_cb->dma_completion_func = &scif_rma_completion_cb;
+
+ if (work->len + (L1_CACHE_BYTES << 1) < SCIF_KMEM_UNALIGNED_BUF_SIZE) {
+ comp_cb->is_cache = false;
+ /* Allocate padding bytes to align to a cache line */
+ temp = kmalloc(work->len + (L1_CACHE_BYTES << 1),
+ GFP_KERNEL);
+ if (!temp)
+ goto free_comp_cb;
+ comp_cb->temp_buf_to_free = temp;
+ /* kmalloc(..) does not guarantee cache line alignment */
+ if (!IS_ALIGNED((u64)temp, L1_CACHE_BYTES))
+ temp = PTR_ALIGN(temp, L1_CACHE_BYTES);
+ } else {
+ comp_cb->is_cache = true;
+ temp = kmem_cache_alloc(unaligned_cache, GFP_KERNEL);
+ if (!temp)
+ goto free_comp_cb;
+ comp_cb->temp_buf_to_free = temp;
+ }
+
+ if (src_local) {
+ temp += dst_cache_off;
+ scif_rma_local_cpu_copy(work->src_offset, work->src_window,
+ temp, work->len, true);
+ } else {
+ comp_cb->dst_window = work->dst_window;
+ comp_cb->dst_offset = work->dst_offset;
+ work->src_offset = work->src_offset - src_cache_off;
+ comp_cb->len = work->len;
+ work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES);
+ comp_cb->header_padding = src_cache_off;
+ }
+ comp_cb->temp_buf = temp;
+
+ err = scif_map_single(&comp_cb->temp_phys, temp,
+ work->remote_dev, SCIF_KMEM_UNALIGNED_BUF_SIZE);
+ if (err)
+ goto free_temp_buf;
+ comp_cb->sdev = work->remote_dev;
+ if (scif_rma_list_dma_copy_unaligned(work, temp, chan, src_local) < 0)
+ goto free_temp_buf;
+ if (!src_local)
+ work->fence_type = SCIF_DMA_INTR;
+ return 0;
+free_temp_buf:
+ if (comp_cb->is_cache)
+ kmem_cache_free(unaligned_cache, comp_cb->temp_buf_to_free);
+ else
+ kfree(comp_cb->temp_buf_to_free);
+free_comp_cb:
+ kfree(comp_cb);
+error:
+ return -ENOMEM;
+}
+
+/**
+ * scif_rma_copy:
+ * @epd: end point descriptor.
+ * @loffset: offset in local registered address space to/from which to copy
+ * @addr: user virtual address to/from which to copy
+ * @len: length of range to copy
+ * @roffset: offset in remote registered address space to/from which to copy
+ * @flags: flags
+ * @dir: LOCAL->REMOTE or vice versa.
+ * @last_chunk: true if this is the last chunk of a larger transfer
+ *
+ * Validate parameters, check if src/dst registered ranges requested for copy
+ * are valid and initiate either CPU or DMA copy.
+ */
+static int scif_rma_copy(scif_epd_t epd, off_t loffset, unsigned long addr,
+ size_t len, off_t roffset, int flags,
+ enum scif_rma_dir dir, bool last_chunk)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct scif_rma_req remote_req;
+ struct scif_rma_req req;
+ struct scif_window *local_window = NULL;
+ struct scif_window *remote_window = NULL;
+ struct scif_copy_work copy_work;
+ bool loopback;
+ int err = 0;
+ struct dma_chan *chan;
+ struct scif_mmu_notif *mmn = NULL;
+ bool cache = false;
+ struct device *spdev;
+
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+
+ if (flags && !(flags & (SCIF_RMA_USECPU | SCIF_RMA_USECACHE |
+ SCIF_RMA_SYNC | SCIF_RMA_ORDERED)))
+ return -EINVAL;
+
+ loopback = scifdev_self(ep->remote_dev) ? true : false;
+ copy_work.fence_type = ((flags & SCIF_RMA_SYNC) && last_chunk) ?
+ SCIF_DMA_POLL : 0;
+ copy_work.ordered = !!((flags & SCIF_RMA_ORDERED) && last_chunk);
+
+ /* Use CPU for Mgmt node <-> Mgmt node copies */
+ if (loopback && scif_is_mgmt_node()) {
+ flags |= SCIF_RMA_USECPU;
+ copy_work.fence_type = 0x0;
+ }
+
+ cache = scif_is_set_reg_cache(flags);
+
+ remote_req.out_window = &remote_window;
+ remote_req.offset = roffset;
+ remote_req.nr_bytes = len;
+ /*
+ * If transfer is from local to remote then the remote window
+ * must be writeable and vice versa.
+ */
+ remote_req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_WRITE : VM_READ;
+ remote_req.type = SCIF_WINDOW_PARTIAL;
+ remote_req.head = &ep->rma_info.remote_reg_list;
+
+ spdev = scif_get_peer_dev(ep->remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ return err;
+ }
+
+ if (addr && cache) {
+ mutex_lock(&ep->rma_info.mmn_lock);
+ mmn = scif_find_mmu_notifier(current->mm, &ep->rma_info);
+ if (!mmn)
+ scif_add_mmu_notifier(current->mm, ep);
+ mutex_unlock(&ep->rma_info.mmn_lock);
+ if (IS_ERR(mmn)) {
+ scif_put_peer_dev(spdev);
+ return PTR_ERR(mmn);
+ }
+ cache = cache && !scif_rma_tc_can_cache(ep, len);
+ }
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (addr) {
+ req.out_window = &local_window;
+ req.nr_bytes = ALIGN(len + (addr & ~PAGE_MASK),
+ PAGE_SIZE);
+ req.va_for_temp = addr & PAGE_MASK;
+ req.prot = (dir == SCIF_LOCAL_TO_REMOTE ?
+ VM_READ : VM_WRITE | VM_READ);
+ /* Does a valid local window exist? */
+ if (mmn) {
+ spin_lock(&ep->rma_info.tc_lock);
+ req.head = &mmn->tc_reg_list;
+ err = scif_query_tcw(ep, &req);
+ spin_unlock(&ep->rma_info.tc_lock);
+ }
+ if (!mmn || err) {
+ err = scif_register_temp(epd, req.va_for_temp,
+ req.nr_bytes, req.prot,
+ &loffset, &local_window);
+ if (err) {
+ mutex_unlock(&ep->rma_info.rma_lock);
+ goto error;
+ }
+ if (!cache)
+ goto skip_cache;
+ atomic_inc(&ep->rma_info.tcw_refcount);
+ atomic_add_return(local_window->nr_pages,
+ &ep->rma_info.tcw_total_pages);
+ if (mmn) {
+ spin_lock(&ep->rma_info.tc_lock);
+ scif_insert_tcw(local_window,
+ &mmn->tc_reg_list);
+ spin_unlock(&ep->rma_info.tc_lock);
+ }
+ }
+skip_cache:
+ loffset = local_window->offset +
+ (addr - local_window->va_for_temp);
+ } else {
+ req.out_window = &local_window;
+ req.offset = loffset;
+ /*
+ * If transfer is from local to remote then the self window
+ * must be readable and vice versa.
+ */
+ req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_READ : VM_WRITE;
+ req.nr_bytes = len;
+ req.type = SCIF_WINDOW_PARTIAL;
+ req.head = &ep->rma_info.reg_list;
+ /* Does a valid local window exist? */
+ err = scif_query_window(&req);
+ if (err) {
+ mutex_unlock(&ep->rma_info.rma_lock);
+ goto error;
+ }
+ }
+
+ /* Does a valid remote window exist? */
+ err = scif_query_window(&remote_req);
+ if (err) {
+ mutex_unlock(&ep->rma_info.rma_lock);
+ goto error;
+ }
+
+ /*
+ * Prepare copy_work for submitting work to the DMA kernel thread
+ * or CPU copy routine.
+ */
+ copy_work.len = len;
+ copy_work.loopback = loopback;
+ copy_work.remote_dev = ep->remote_dev;
+ if (dir == SCIF_LOCAL_TO_REMOTE) {
+ copy_work.src_offset = loffset;
+ copy_work.src_window = local_window;
+ copy_work.dst_offset = roffset;
+ copy_work.dst_window = remote_window;
+ } else {
+ copy_work.src_offset = roffset;
+ copy_work.src_window = remote_window;
+ copy_work.dst_offset = loffset;
+ copy_work.dst_window = local_window;
+ }
+
+ if (flags & SCIF_RMA_USECPU) {
+ scif_rma_list_cpu_copy(&copy_work);
+ } else {
+ chan = ep->rma_info.dma_chan;
+ err = scif_rma_list_dma_copy_wrapper(epd, &copy_work,
+ chan, loffset);
+ }
+ if (addr && !cache)
+ atomic_inc(&ep->rma_info.tw_refcount);
+
+ mutex_unlock(&ep->rma_info.rma_lock);
+
+ if (last_chunk) {
+ struct scif_dev *rdev = ep->remote_dev;
+
+ if (copy_work.fence_type == SCIF_DMA_POLL)
+ err = scif_drain_dma_poll(rdev->sdev,
+ ep->rma_info.dma_chan);
+ else if (copy_work.fence_type == SCIF_DMA_INTR)
+ err = scif_drain_dma_intr(rdev->sdev,
+ ep->rma_info.dma_chan);
+ }
+
+ if (addr && !cache)
+ scif_queue_for_cleanup(local_window, &scif_info.rma);
+ scif_put_peer_dev(spdev);
+ return err;
+error:
+ if (err) {
+ if (addr && local_window && !cache)
+ scif_destroy_window(ep, local_window);
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d len 0x%lx\n",
+ __func__, __LINE__, err, len);
+ }
+ scif_put_peer_dev(spdev);
+ return err;
+}
+
+int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len,
+ off_t roffset, int flags)
+{
+ int err;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI readfrom: ep %p loffset 0x%lx len 0x%lx offset 0x%lx flags 0x%x\n",
+ epd, loffset, len, roffset, flags);
+ if (scif_unaligned(loffset, roffset)) {
+ while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+ err = scif_rma_copy(epd, loffset, 0x0,
+ SCIF_MAX_UNALIGNED_BUF_SIZE,
+ roffset, flags,
+ SCIF_REMOTE_TO_LOCAL, false);
+ if (err)
+ goto readfrom_err;
+ loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+ }
+ }
+ err = scif_rma_copy(epd, loffset, 0x0, len,
+ roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
+readfrom_err:
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_readfrom);
+
+int scif_writeto(scif_epd_t epd, off_t loffset, size_t len,
+ off_t roffset, int flags)
+{
+ int err;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI writeto: ep %p loffset 0x%lx len 0x%lx roffset 0x%lx flags 0x%x\n",
+ epd, loffset, len, roffset, flags);
+ if (scif_unaligned(loffset, roffset)) {
+ while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+ err = scif_rma_copy(epd, loffset, 0x0,
+ SCIF_MAX_UNALIGNED_BUF_SIZE,
+ roffset, flags,
+ SCIF_LOCAL_TO_REMOTE, false);
+ if (err)
+ goto writeto_err;
+ loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+ }
+ }
+ err = scif_rma_copy(epd, loffset, 0x0, len,
+ roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
+writeto_err:
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_writeto);
+
+int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len,
+ off_t roffset, int flags)
+{
+ int err;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI vreadfrom: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
+ epd, addr, len, roffset, flags);
+ if (scif_unaligned((off_t __force)addr, roffset)) {
+ if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
+ flags &= ~SCIF_RMA_USECACHE;
+
+ while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+ err = scif_rma_copy(epd, 0, (u64)addr,
+ SCIF_MAX_UNALIGNED_BUF_SIZE,
+ roffset, flags,
+ SCIF_REMOTE_TO_LOCAL, false);
+ if (err)
+ goto vreadfrom_err;
+ addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+ }
+ }
+ err = scif_rma_copy(epd, 0, (u64)addr, len,
+ roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
+vreadfrom_err:
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_vreadfrom);
+
+int scif_vwriteto(scif_epd_t epd, void *addr, size_t len,
+ off_t roffset, int flags)
+{
+ int err;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI vwriteto: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
+ epd, addr, len, roffset, flags);
+ if (scif_unaligned((off_t __force)addr, roffset)) {
+ if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
+ flags &= ~SCIF_RMA_USECACHE;
+
+ while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+ err = scif_rma_copy(epd, 0, (u64)addr,
+ SCIF_MAX_UNALIGNED_BUF_SIZE,
+ roffset, flags,
+ SCIF_LOCAL_TO_REMOTE, false);
+ if (err)
+ goto vwriteto_err;
+ addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+ len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+ }
+ }
+ err = scif_rma_copy(epd, 0, (u64)addr, len,
+ roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
+vwriteto_err:
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_vwriteto);
diff --git a/drivers/misc/mic/scif/scif_epd.c b/drivers/misc/mic/scif/scif_epd.c
index b4bfbb08a8e3..00e5d6d66e7b 100644
--- a/drivers/misc/mic/scif/scif_epd.c
+++ b/drivers/misc/mic/scif/scif_epd.c
@@ -65,14 +65,14 @@ void scif_teardown_ep(void *endpt)
void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held)
{
if (!eplock_held)
- spin_lock(&scif_info.eplock);
+ mutex_lock(&scif_info.eplock);
spin_lock(&ep->lock);
ep->state = SCIFEP_ZOMBIE;
spin_unlock(&ep->lock);
list_add_tail(&ep->list, &scif_info.zombie);
scif_info.nr_zombies++;
if (!eplock_held)
- spin_unlock(&scif_info.eplock);
+ mutex_unlock(&scif_info.eplock);
schedule_work(&scif_info.misc_work);
}
@@ -81,16 +81,15 @@ static struct scif_endpt *scif_find_listen_ep(u16 port)
struct scif_endpt *ep = NULL;
struct list_head *pos, *tmpq;
- spin_lock(&scif_info.eplock);
+ mutex_lock(&scif_info.eplock);
list_for_each_safe(pos, tmpq, &scif_info.listen) {
ep = list_entry(pos, struct scif_endpt, list);
if (ep->port.port == port) {
- spin_lock(&ep->lock);
- spin_unlock(&scif_info.eplock);
+ mutex_unlock(&scif_info.eplock);
return ep;
}
}
- spin_unlock(&scif_info.eplock);
+ mutex_unlock(&scif_info.eplock);
return NULL;
}
@@ -99,14 +98,17 @@ void scif_cleanup_zombie_epd(void)
struct list_head *pos, *tmpq;
struct scif_endpt *ep;
- spin_lock(&scif_info.eplock);
+ mutex_lock(&scif_info.eplock);
list_for_each_safe(pos, tmpq, &scif_info.zombie) {
ep = list_entry(pos, struct scif_endpt, list);
- list_del(pos);
- scif_info.nr_zombies--;
- kfree(ep);
+ if (scif_rma_ep_can_uninit(ep)) {
+ list_del(pos);
+ scif_info.nr_zombies--;
+ put_iova_domain(&ep->rma_info.iovad);
+ kfree(ep);
+ }
}
- spin_unlock(&scif_info.eplock);
+ mutex_unlock(&scif_info.eplock);
}
/**
@@ -137,6 +139,8 @@ void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg)
if (!ep)
/* Send reject due to no listening ports */
goto conreq_sendrej_free;
+ else
+ spin_lock(&ep->lock);
if (ep->backlog <= ep->conreqcnt) {
/* Send reject due to too many pending requests */
diff --git a/drivers/misc/mic/scif/scif_epd.h b/drivers/misc/mic/scif/scif_epd.h
index 331322a25213..1771d7a9b8d0 100644
--- a/drivers/misc/mic/scif/scif_epd.h
+++ b/drivers/misc/mic/scif/scif_epd.h
@@ -96,7 +96,11 @@ struct scif_endpt_qp_info {
* @conn_port: Connection port
* @conn_err: Errors during connection
* @conn_async_state: Async connection
+ * @conn_pend_wq: Used by poll while waiting for incoming connections
* @conn_list: List of async connection requests
+ * @rma_info: Information for triggering SCIF RMA and DMA operations
+ * @mmu_list: link to list of MMU notifier cleanup work
+ * @anon: anonymous file for use in kernel mode scif poll
*/
struct scif_endpt {
enum scif_epd_state state;
@@ -125,7 +129,11 @@ struct scif_endpt {
struct scif_port_id conn_port;
int conn_err;
int conn_async_state;
+ wait_queue_head_t conn_pend_wq;
struct list_head conn_list;
+ struct scif_endpt_rma_info rma_info;
+ struct list_head mmu_list;
+ struct file *anon;
};
static inline int scifdev_alive(struct scif_endpt *ep)
@@ -133,6 +141,43 @@ static inline int scifdev_alive(struct scif_endpt *ep)
return _scifdev_alive(ep->remote_dev);
}
+/*
+ * scif_verify_epd:
+ * ep: SCIF endpoint
+ *
+ * Checks several generic error conditions and returns the
+ * appropriate error.
+ */
+static inline int scif_verify_epd(struct scif_endpt *ep)
+{
+ if (ep->state == SCIFEP_DISCONNECTED)
+ return -ECONNRESET;
+
+ if (ep->state != SCIFEP_CONNECTED)
+ return -ENOTCONN;
+
+ if (!scifdev_alive(ep))
+ return -ENODEV;
+
+ return 0;
+}
+
+static inline int scif_anon_inode_getfile(scif_epd_t epd)
+{
+ epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0);
+ if (IS_ERR(epd->anon))
+ return PTR_ERR(epd->anon);
+ return 0;
+}
+
+static inline void scif_anon_inode_fput(scif_epd_t epd)
+{
+ if (epd->anon) {
+ fput(epd->anon);
+ epd->anon = NULL;
+ }
+}
+
void scif_cleanup_zombie_epd(void);
void scif_teardown_ep(void *endpt);
void scif_cleanup_ep_qp(struct scif_endpt *ep);
@@ -157,4 +202,9 @@ void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg);
void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg);
int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block);
int __scif_flush(scif_epd_t epd);
+int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd);
+unsigned int __scif_pollfd(struct file *f, poll_table *wait,
+ struct scif_endpt *ep);
+int __scif_pin_pages(void *addr, size_t len, int *out_prot,
+ int map_flags, scif_pinned_pages_t *pages);
#endif /* SCIF_EPD_H */
diff --git a/drivers/misc/mic/scif/scif_fd.c b/drivers/misc/mic/scif/scif_fd.c
index eccf7e7135f9..f7e826142a72 100644
--- a/drivers/misc/mic/scif/scif_fd.c
+++ b/drivers/misc/mic/scif/scif_fd.c
@@ -34,6 +34,20 @@ static int scif_fdclose(struct inode *inode, struct file *f)
return scif_close(priv);
}
+static int scif_fdmmap(struct file *f, struct vm_area_struct *vma)
+{
+ struct scif_endpt *priv = f->private_data;
+
+ return scif_mmap(vma, priv);
+}
+
+static unsigned int scif_fdpoll(struct file *f, poll_table *wait)
+{
+ struct scif_endpt *priv = f->private_data;
+
+ return __scif_pollfd(f, wait, priv);
+}
+
static int scif_fdflush(struct file *f, fl_owner_t id)
{
struct scif_endpt *ep = f->private_data;
@@ -140,12 +154,12 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
* Add to the list of user mode eps where the second half
* of the accept is not yet completed.
*/
- spin_lock(&scif_info.eplock);
+ mutex_lock(&scif_info.eplock);
list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept);
list_add_tail(&((*ep)->liacceptlist), &priv->li_accept);
(*ep)->listenep = priv;
priv->acceptcnt++;
- spin_unlock(&scif_info.eplock);
+ mutex_unlock(&scif_info.eplock);
return 0;
}
@@ -163,7 +177,7 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
return -EFAULT;
/* Remove form the user accept queue */
- spin_lock(&scif_info.eplock);
+ mutex_lock(&scif_info.eplock);
list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
tmpep = list_entry(pos,
struct scif_endpt, miacceptlist);
@@ -175,7 +189,7 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
}
if (!fep) {
- spin_unlock(&scif_info.eplock);
+ mutex_unlock(&scif_info.eplock);
return -ENOENT;
}
@@ -190,9 +204,10 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
}
}
- spin_unlock(&scif_info.eplock);
+ mutex_unlock(&scif_info.eplock);
/* Free the resources automatically created from the open. */
+ scif_anon_inode_fput(priv);
scif_teardown_ep(priv);
scif_add_epd_to_zombie_list(priv, !SCIF_EPLOCK_HELD);
f->private_data = newep;
@@ -290,6 +305,157 @@ getnodes_err1:
getnodes_err2:
return err;
}
+ case SCIF_REG:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_reg reg;
+ off_t ret;
+
+ if (copy_from_user(&reg, argp, sizeof(reg))) {
+ err = -EFAULT;
+ goto reg_err;
+ }
+ if (reg.flags & SCIF_MAP_KERNEL) {
+ err = -EINVAL;
+ goto reg_err;
+ }
+ ret = scif_register(priv, (void *)reg.addr, reg.len,
+ reg.offset, reg.prot, reg.flags);
+ if (ret < 0) {
+ err = (int)ret;
+ goto reg_err;
+ }
+
+ if (copy_to_user(&((struct scifioctl_reg __user *)argp)
+ ->out_offset, &ret, sizeof(reg.out_offset))) {
+ err = -EFAULT;
+ goto reg_err;
+ }
+ err = 0;
+reg_err:
+ scif_err_debug(err, "scif_register");
+ return err;
+ }
+ case SCIF_UNREG:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_unreg unreg;
+
+ if (copy_from_user(&unreg, argp, sizeof(unreg))) {
+ err = -EFAULT;
+ goto unreg_err;
+ }
+ err = scif_unregister(priv, unreg.offset, unreg.len);
+unreg_err:
+ scif_err_debug(err, "scif_unregister");
+ return err;
+ }
+ case SCIF_READFROM:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_copy copy;
+
+ if (copy_from_user(&copy, argp, sizeof(copy))) {
+ err = -EFAULT;
+ goto readfrom_err;
+ }
+ err = scif_readfrom(priv, copy.loffset, copy.len, copy.roffset,
+ copy.flags);
+readfrom_err:
+ scif_err_debug(err, "scif_readfrom");
+ return err;
+ }
+ case SCIF_WRITETO:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_copy copy;
+
+ if (copy_from_user(&copy, argp, sizeof(copy))) {
+ err = -EFAULT;
+ goto writeto_err;
+ }
+ err = scif_writeto(priv, copy.loffset, copy.len, copy.roffset,
+ copy.flags);
+writeto_err:
+ scif_err_debug(err, "scif_writeto");
+ return err;
+ }
+ case SCIF_VREADFROM:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_copy copy;
+
+ if (copy_from_user(&copy, argp, sizeof(copy))) {
+ err = -EFAULT;
+ goto vreadfrom_err;
+ }
+ err = scif_vreadfrom(priv, (void __force *)copy.addr, copy.len,
+ copy.roffset, copy.flags);
+vreadfrom_err:
+ scif_err_debug(err, "scif_vreadfrom");
+ return err;
+ }
+ case SCIF_VWRITETO:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_copy copy;
+
+ if (copy_from_user(&copy, argp, sizeof(copy))) {
+ err = -EFAULT;
+ goto vwriteto_err;
+ }
+ err = scif_vwriteto(priv, (void __force *)copy.addr, copy.len,
+ copy.roffset, copy.flags);
+vwriteto_err:
+ scif_err_debug(err, "scif_vwriteto");
+ return err;
+ }
+ case SCIF_FENCE_MARK:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_fence_mark mark;
+ int tmp_mark = 0;
+
+ if (copy_from_user(&mark, argp, sizeof(mark))) {
+ err = -EFAULT;
+ goto fence_mark_err;
+ }
+ err = scif_fence_mark(priv, mark.flags, &tmp_mark);
+ if (err)
+ goto fence_mark_err;
+ if (copy_to_user((void __user *)mark.mark, &tmp_mark,
+ sizeof(tmp_mark))) {
+ err = -EFAULT;
+ goto fence_mark_err;
+ }
+fence_mark_err:
+ scif_err_debug(err, "scif_fence_mark");
+ return err;
+ }
+ case SCIF_FENCE_WAIT:
+ {
+ struct scif_endpt *priv = f->private_data;
+
+ err = scif_fence_wait(priv, arg);
+ scif_err_debug(err, "scif_fence_wait");
+ return err;
+ }
+ case SCIF_FENCE_SIGNAL:
+ {
+ struct scif_endpt *priv = f->private_data;
+ struct scifioctl_fence_signal signal;
+
+ if (copy_from_user(&signal, argp, sizeof(signal))) {
+ err = -EFAULT;
+ goto fence_signal_err;
+ }
+
+ err = scif_fence_signal(priv, signal.loff, signal.lval,
+ signal.roff, signal.rval, signal.flags);
+fence_signal_err:
+ scif_err_debug(err, "scif_fence_signal");
+ return err;
+ }
}
return -EINVAL;
}
@@ -298,6 +464,8 @@ const struct file_operations scif_fops = {
.open = scif_fdopen,
.release = scif_fdclose,
.unlocked_ioctl = scif_fdioctl,
+ .mmap = scif_fdmmap,
+ .poll = scif_fdpoll,
.flush = scif_fdflush,
.owner = THIS_MODULE,
};
diff --git a/drivers/misc/mic/scif/scif_fence.c b/drivers/misc/mic/scif/scif_fence.c
new file mode 100644
index 000000000000..7f2c96f57066
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_fence.c
@@ -0,0 +1,771 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+
+#include "scif_main.h"
+
+/**
+ * scif_recv_mark: Handle SCIF_MARK request
+ * @msg: Interrupt message
+ *
+ * The peer has requested a mark.
+ */
+void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ int mark, err;
+
+ err = _scif_fence_mark(ep, &mark);
+ if (err)
+ msg->uop = SCIF_MARK_NACK;
+ else
+ msg->uop = SCIF_MARK_ACK;
+ msg->payload[0] = ep->remote_ep;
+ msg->payload[2] = mark;
+ scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_mark_resp: Handle SCIF_MARK_(N)ACK messages.
+ * @msg: Interrupt message
+ *
+ * The peer has responded to a SCIF_MARK message.
+ */
+void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ struct scif_fence_info *fence_req =
+ (struct scif_fence_info *)msg->payload[1];
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (msg->uop == SCIF_MARK_ACK) {
+ fence_req->state = OP_COMPLETED;
+ fence_req->dma_mark = (int)msg->payload[2];
+ } else {
+ fence_req->state = OP_FAILED;
+ }
+ mutex_unlock(&ep->rma_info.rma_lock);
+ complete(&fence_req->comp);
+}
+
+/**
+ * scif_recv_wait: Handle SCIF_WAIT request
+ * @msg: Interrupt message
+ *
+ * The peer has requested waiting on a fence.
+ */
+void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ struct scif_remote_fence_info *fence;
+
+ /*
+ * Allocate structure for remote fence information and
+ * send a NACK if the allocation failed. The peer will
+ * return ENOMEM upon receiving a NACK.
+ */
+ fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+ if (!fence) {
+ msg->payload[0] = ep->remote_ep;
+ msg->uop = SCIF_WAIT_NACK;
+ scif_nodeqp_send(ep->remote_dev, msg);
+ return;
+ }
+
+ /* Prepare the fence request */
+ memcpy(&fence->msg, msg, sizeof(struct scifmsg));
+ INIT_LIST_HEAD(&fence->list);
+
+ /* Insert to the global remote fence request list */
+ mutex_lock(&scif_info.fencelock);
+ atomic_inc(&ep->rma_info.fence_refcount);
+ list_add_tail(&fence->list, &scif_info.fence);
+ mutex_unlock(&scif_info.fencelock);
+
+ schedule_work(&scif_info.misc_work);
+}
+
+/**
+ * scif_recv_wait_resp: Handle SCIF_WAIT_(N)ACK messages.
+ * @msg: Interrupt message
+ *
+ * The peer has responded to a SCIF_WAIT message.
+ */
+void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ struct scif_fence_info *fence_req =
+ (struct scif_fence_info *)msg->payload[1];
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (msg->uop == SCIF_WAIT_ACK)
+ fence_req->state = OP_COMPLETED;
+ else
+ fence_req->state = OP_FAILED;
+ mutex_unlock(&ep->rma_info.rma_lock);
+ complete(&fence_req->comp);
+}
+
+/**
+ * scif_recv_sig_local: Handle SCIF_SIG_LOCAL request
+ * @msg: Interrupt message
+ *
+ * The peer has requested a signal on a local offset.
+ */
+void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ int err;
+
+ err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
+ SCIF_WINDOW_SELF);
+ if (err)
+ msg->uop = SCIF_SIG_NACK;
+ else
+ msg->uop = SCIF_SIG_ACK;
+ msg->payload[0] = ep->remote_ep;
+ scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_sig_remote: Handle SCIF_SIGNAL_REMOTE request
+ * @msg: Interrupt message
+ *
+ * The peer has requested a signal on a remote offset.
+ */
+void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ int err;
+
+ err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
+ SCIF_WINDOW_PEER);
+ if (err)
+ msg->uop = SCIF_SIG_NACK;
+ else
+ msg->uop = SCIF_SIG_ACK;
+ msg->payload[0] = ep->remote_ep;
+ scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_sig_resp: Handle SCIF_SIG_(N)ACK messages.
+ * @msg: Interrupt message
+ *
+ * The peer has responded to a signal request.
+ */
+void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ struct scif_fence_info *fence_req =
+ (struct scif_fence_info *)msg->payload[3];
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (msg->uop == SCIF_SIG_ACK)
+ fence_req->state = OP_COMPLETED;
+ else
+ fence_req->state = OP_FAILED;
+ mutex_unlock(&ep->rma_info.rma_lock);
+ complete(&fence_req->comp);
+}
+
+static inline void *scif_get_local_va(off_t off, struct scif_window *window)
+{
+ struct page **pages = window->pinned_pages->pages;
+ int page_nr = (off - window->offset) >> PAGE_SHIFT;
+ off_t page_off = off & ~PAGE_MASK;
+
+ return page_address(pages[page_nr]) + page_off;
+}
+
+static void scif_prog_signal_cb(void *arg)
+{
+ struct scif_status *status = arg;
+
+ dma_pool_free(status->ep->remote_dev->signal_pool, status,
+ status->src_dma_addr);
+}
+
+static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct dma_chan *chan = ep->rma_info.dma_chan;
+ struct dma_device *ddev = chan->device;
+ bool x100 = !is_dma_copy_aligned(chan->device, 1, 1, 1);
+ struct dma_async_tx_descriptor *tx;
+ struct scif_status *status = NULL;
+ dma_addr_t src;
+ dma_cookie_t cookie;
+ int err;
+
+ tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
+ if (!tx) {
+ err = -ENOMEM;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto alloc_fail;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ err = (int)cookie;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto alloc_fail;
+ }
+ dma_async_issue_pending(chan);
+ if (x100) {
+ /*
+ * For X100 use the status descriptor to write the value to
+ * the destination.
+ */
+ tx = ddev->device_prep_dma_imm_data(chan, dst, val, 0);
+ } else {
+ status = dma_pool_alloc(ep->remote_dev->signal_pool, GFP_KERNEL,
+ &src);
+ if (!status) {
+ err = -ENOMEM;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto alloc_fail;
+ }
+ status->val = val;
+ status->src_dma_addr = src;
+ status->ep = ep;
+ src += offsetof(struct scif_status, val);
+ tx = ddev->device_prep_dma_memcpy(chan, dst, src, sizeof(val),
+ DMA_PREP_INTERRUPT);
+ }
+ if (!tx) {
+ err = -ENOMEM;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto dma_fail;
+ }
+ if (!x100) {
+ tx->callback = scif_prog_signal_cb;
+ tx->callback_param = status;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ err = -EIO;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto dma_fail;
+ }
+ dma_async_issue_pending(chan);
+ return 0;
+dma_fail:
+ if (!x100)
+ dma_pool_free(ep->remote_dev->signal_pool, status,
+ status->src_dma_addr);
+alloc_fail:
+ return err;
+}
+
+/*
+ * scif_prog_signal:
+ * @epd - Endpoint Descriptor
+ * @offset - registered address to write @val to
+ * @val - Value to be written at @offset
+ * @type - Type of the window.
+ *
+ * Arrange to write a value to the registered offset after ensuring that the
+ * offset provided is indeed valid.
+ */
+int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
+ enum scif_window_type type)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct scif_window *window = NULL;
+ struct scif_rma_req req;
+ dma_addr_t dst_dma_addr;
+ int err;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ req.out_window = &window;
+ req.offset = offset;
+ req.nr_bytes = sizeof(u64);
+ req.prot = SCIF_PROT_WRITE;
+ req.type = SCIF_WINDOW_SINGLE;
+ if (type == SCIF_WINDOW_SELF)
+ req.head = &ep->rma_info.reg_list;
+ else
+ req.head = &ep->rma_info.remote_reg_list;
+ /* Does a valid window exist? */
+ err = scif_query_window(&req);
+ if (err) {
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto unlock_ret;
+ }
+
+ if (scif_is_mgmt_node() && scifdev_self(ep->remote_dev)) {
+ u64 *dst_virt;
+
+ if (type == SCIF_WINDOW_SELF)
+ dst_virt = scif_get_local_va(offset, window);
+ else
+ dst_virt =
+ scif_get_local_va(offset, (struct scif_window *)
+ window->peer_window);
+ *dst_virt = val;
+ } else {
+ dst_dma_addr = __scif_off_to_dma_addr(window, offset);
+ err = _scif_prog_signal(epd, dst_dma_addr, val);
+ }
+unlock_ret:
+ mutex_unlock(&ep->rma_info.rma_lock);
+ return err;
+}
+
+static int _scif_fence_wait(scif_epd_t epd, int mark)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ dma_cookie_t cookie = mark & ~SCIF_REMOTE_FENCE;
+ int err;
+
+ /* Wait for DMA callback in scif_fence_mark_cb(..) */
+ err = wait_event_interruptible_timeout(ep->rma_info.markwq,
+ dma_async_is_tx_complete(
+ ep->rma_info.dma_chan,
+ cookie, NULL, NULL) ==
+ DMA_COMPLETE,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ if (!err)
+ err = -ETIMEDOUT;
+ else if (err > 0)
+ err = 0;
+ return err;
+}
+
+/**
+ * scif_rma_handle_remote_fences:
+ *
+ * This routine services remote fence requests.
+ */
+void scif_rma_handle_remote_fences(void)
+{
+ struct list_head *item, *tmp;
+ struct scif_remote_fence_info *fence;
+ struct scif_endpt *ep;
+ int mark, err;
+
+ might_sleep();
+ mutex_lock(&scif_info.fencelock);
+ list_for_each_safe(item, tmp, &scif_info.fence) {
+ fence = list_entry(item, struct scif_remote_fence_info,
+ list);
+ /* Remove fence from global list */
+ list_del(&fence->list);
+
+ /* Initiate the fence operation */
+ ep = (struct scif_endpt *)fence->msg.payload[0];
+ mark = fence->msg.payload[2];
+ err = _scif_fence_wait(ep, mark);
+ if (err)
+ fence->msg.uop = SCIF_WAIT_NACK;
+ else
+ fence->msg.uop = SCIF_WAIT_ACK;
+ fence->msg.payload[0] = ep->remote_ep;
+ scif_nodeqp_send(ep->remote_dev, &fence->msg);
+ kfree(fence);
+ if (!atomic_sub_return(1, &ep->rma_info.fence_refcount))
+ schedule_work(&scif_info.misc_work);
+ }
+ mutex_unlock(&scif_info.fencelock);
+}
+
+static int _scif_send_fence(scif_epd_t epd, int uop, int mark, int *out_mark)
+{
+ int err;
+ struct scifmsg msg;
+ struct scif_fence_info *fence_req;
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+ fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
+ if (!fence_req) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ fence_req->state = OP_IN_PROGRESS;
+ init_completion(&fence_req->comp);
+
+ msg.src = ep->port;
+ msg.uop = uop;
+ msg.payload[0] = ep->remote_ep;
+ msg.payload[1] = (u64)fence_req;
+ if (uop == SCIF_WAIT)
+ msg.payload[2] = mark;
+ spin_lock(&ep->lock);
+ if (ep->state == SCIFEP_CONNECTED)
+ err = scif_nodeqp_send(ep->remote_dev, &msg);
+ else
+ err = -ENOTCONN;
+ spin_unlock(&ep->lock);
+ if (err)
+ goto error_free;
+retry:
+ /* Wait for a SCIF_WAIT_(N)ACK message */
+ err = wait_for_completion_timeout(&fence_req->comp,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ if (!err && scifdev_alive(ep))
+ goto retry;
+ if (!err)
+ err = -ENODEV;
+ if (err > 0)
+ err = 0;
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (err < 0) {
+ if (fence_req->state == OP_IN_PROGRESS)
+ fence_req->state = OP_FAILED;
+ }
+ if (fence_req->state == OP_FAILED && !err)
+ err = -ENOMEM;
+ if (uop == SCIF_MARK && fence_req->state == OP_COMPLETED)
+ *out_mark = SCIF_REMOTE_FENCE | fence_req->dma_mark;
+ mutex_unlock(&ep->rma_info.rma_lock);
+error_free:
+ kfree(fence_req);
+error:
+ return err;
+}
+
+/**
+ * scif_send_fence_mark:
+ * @epd: end point descriptor.
+ * @out_mark: Output DMA mark reported by peer.
+ *
+ * Send a remote fence mark request.
+ */
+static int scif_send_fence_mark(scif_epd_t epd, int *out_mark)
+{
+ return _scif_send_fence(epd, SCIF_MARK, 0, out_mark);
+}
+
+/**
+ * scif_send_fence_wait:
+ * @epd: end point descriptor.
+ * @mark: DMA mark to wait for.
+ *
+ * Send a remote fence wait request.
+ */
+static int scif_send_fence_wait(scif_epd_t epd, int mark)
+{
+ return _scif_send_fence(epd, SCIF_WAIT, mark, NULL);
+}
+
+static int _scif_send_fence_signal_wait(struct scif_endpt *ep,
+ struct scif_fence_info *fence_req)
+{
+ int err;
+
+retry:
+ /* Wait for a SCIF_SIG_(N)ACK message */
+ err = wait_for_completion_timeout(&fence_req->comp,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ if (!err && scifdev_alive(ep))
+ goto retry;
+ if (!err)
+ err = -ENODEV;
+ if (err > 0)
+ err = 0;
+ if (err < 0) {
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (fence_req->state == OP_IN_PROGRESS)
+ fence_req->state = OP_FAILED;
+ mutex_unlock(&ep->rma_info.rma_lock);
+ }
+ if (fence_req->state == OP_FAILED && !err)
+ err = -ENXIO;
+ return err;
+}
+
+/**
+ * scif_send_fence_signal:
+ * @epd - endpoint descriptor
+ * @loff - local offset
+ * @lval - local value to write to loffset
+ * @roff - remote offset
+ * @rval - remote value to write to roffset
+ * @flags - flags
+ *
+ * Sends a remote fence signal request
+ */
+static int scif_send_fence_signal(scif_epd_t epd, off_t roff, u64 rval,
+ off_t loff, u64 lval, int flags)
+{
+ int err = 0;
+ struct scifmsg msg;
+ struct scif_fence_info *fence_req;
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+ fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
+ if (!fence_req) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ fence_req->state = OP_IN_PROGRESS;
+ init_completion(&fence_req->comp);
+ msg.src = ep->port;
+ if (flags & SCIF_SIGNAL_LOCAL) {
+ msg.uop = SCIF_SIG_LOCAL;
+ msg.payload[0] = ep->remote_ep;
+ msg.payload[1] = roff;
+ msg.payload[2] = rval;
+ msg.payload[3] = (u64)fence_req;
+ spin_lock(&ep->lock);
+ if (ep->state == SCIFEP_CONNECTED)
+ err = scif_nodeqp_send(ep->remote_dev, &msg);
+ else
+ err = -ENOTCONN;
+ spin_unlock(&ep->lock);
+ if (err)
+ goto error_free;
+ err = _scif_send_fence_signal_wait(ep, fence_req);
+ if (err)
+ goto error_free;
+ }
+ fence_req->state = OP_IN_PROGRESS;
+
+ if (flags & SCIF_SIGNAL_REMOTE) {
+ msg.uop = SCIF_SIG_REMOTE;
+ msg.payload[0] = ep->remote_ep;
+ msg.payload[1] = loff;
+ msg.payload[2] = lval;
+ msg.payload[3] = (u64)fence_req;
+ spin_lock(&ep->lock);
+ if (ep->state == SCIFEP_CONNECTED)
+ err = scif_nodeqp_send(ep->remote_dev, &msg);
+ else
+ err = -ENOTCONN;
+ spin_unlock(&ep->lock);
+ if (err)
+ goto error_free;
+ err = _scif_send_fence_signal_wait(ep, fence_req);
+ }
+error_free:
+ kfree(fence_req);
+error:
+ return err;
+}
+
+static void scif_fence_mark_cb(void *arg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)arg;
+
+ wake_up_interruptible(&ep->rma_info.markwq);
+ atomic_dec(&ep->rma_info.fence_refcount);
+}
+
+/*
+ * _scif_fence_mark:
+ *
+ * @epd - endpoint descriptor
+ * Set up a mark for this endpoint and return the value of the mark.
+ */
+int _scif_fence_mark(scif_epd_t epd, int *mark)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct dma_chan *chan = ep->rma_info.dma_chan;
+ struct dma_device *ddev = chan->device;
+ struct dma_async_tx_descriptor *tx;
+ dma_cookie_t cookie;
+ int err;
+
+ tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
+ if (!tx) {
+ err = -ENOMEM;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ return err;
+ }
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ err = (int)cookie;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ return err;
+ }
+ dma_async_issue_pending(chan);
+ tx = ddev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT);
+ if (!tx) {
+ err = -ENOMEM;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ return err;
+ }
+ tx->callback = scif_fence_mark_cb;
+ tx->callback_param = ep;
+ *mark = cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ err = (int)cookie;
+ dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ return err;
+ }
+ atomic_inc(&ep->rma_info.fence_refcount);
+ dma_async_issue_pending(chan);
+ return 0;
+}
+
+#define SCIF_LOOPB_MAGIC_MARK 0xdead
+
+int scif_fence_mark(scif_epd_t epd, int flags, int *mark)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int err = 0;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x\n",
+ ep, flags, *mark);
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+
+ /* Invalid flags? */
+ if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))
+ return -EINVAL;
+
+ /* At least one of init self or peer RMA should be set */
+ if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
+ return -EINVAL;
+
+ /* Exactly one of init self or peer RMA should be set but not both */
+ if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
+ return -EINVAL;
+
+ /*
+ * Management node loopback does not need to use DMA.
+ * Return a valid mark to be symmetric.
+ */
+ if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
+ *mark = SCIF_LOOPB_MAGIC_MARK;
+ return 0;
+ }
+
+ if (flags & SCIF_FENCE_INIT_SELF)
+ err = _scif_fence_mark(epd, mark);
+ else
+ err = scif_send_fence_mark(ep, mark);
+
+ if (err)
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x err %d\n",
+ ep, flags, *mark, err);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_mark);
+
+int scif_fence_wait(scif_epd_t epd, int mark)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int err = 0;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI fence_wait: ep %p mark 0x%x\n",
+ ep, mark);
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+ /*
+ * Management node loopback does not need to use DMA.
+ * The only valid mark provided is 0 so simply
+ * return success if the mark is valid.
+ */
+ if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
+ if (mark == SCIF_LOOPB_MAGIC_MARK)
+ return 0;
+ else
+ return -EINVAL;
+ }
+ if (mark & SCIF_REMOTE_FENCE)
+ err = scif_send_fence_wait(epd, mark);
+ else
+ err = _scif_fence_wait(epd, mark);
+ if (err < 0)
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_wait);
+
+int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval,
+ off_t roff, u64 rval, int flags)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ int err = 0;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI fence_signal: ep %p loff 0x%lx lval 0x%llx roff 0x%lx rval 0x%llx flags 0x%x\n",
+ ep, loff, lval, roff, rval, flags);
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+
+ /* Invalid flags? */
+ if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER |
+ SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE))
+ return -EINVAL;
+
+ /* At least one of init self or peer RMA should be set */
+ if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
+ return -EINVAL;
+
+ /* Exactly one of init self or peer RMA should be set but not both */
+ if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
+ return -EINVAL;
+
+ /* At least one of SCIF_SIGNAL_LOCAL or SCIF_SIGNAL_REMOTE required */
+ if (!(flags & (SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE)))
+ return -EINVAL;
+
+ /* Only Dword offsets allowed */
+ if ((flags & SCIF_SIGNAL_LOCAL) && (loff & (sizeof(u32) - 1)))
+ return -EINVAL;
+
+ /* Only Dword aligned offsets allowed */
+ if ((flags & SCIF_SIGNAL_REMOTE) && (roff & (sizeof(u32) - 1)))
+ return -EINVAL;
+
+ if (flags & SCIF_FENCE_INIT_PEER) {
+ err = scif_send_fence_signal(epd, roff, rval, loff,
+ lval, flags);
+ } else {
+ /* Local Signal in Local RAS */
+ if (flags & SCIF_SIGNAL_LOCAL) {
+ err = scif_prog_signal(epd, loff, lval,
+ SCIF_WINDOW_SELF);
+ if (err)
+ goto error_ret;
+ }
+
+ /* Signal in Remote RAS */
+ if (flags & SCIF_SIGNAL_REMOTE)
+ err = scif_prog_signal(epd, roff,
+ rval, SCIF_WINDOW_PEER);
+ }
+error_ret:
+ if (err)
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_signal);
diff --git a/drivers/misc/mic/scif/scif_main.c b/drivers/misc/mic/scif/scif_main.c
index 6ce851f5c7e6..36d847af1209 100644
--- a/drivers/misc/mic/scif/scif_main.c
+++ b/drivers/misc/mic/scif/scif_main.c
@@ -34,6 +34,7 @@ struct scif_info scif_info = {
};
struct scif_dev *scif_dev;
+struct kmem_cache *unaligned_cache;
static atomic_t g_loopb_cnt;
/* Runs in the context of intr_wq */
@@ -80,35 +81,6 @@ irqreturn_t scif_intr_handler(int irq, void *data)
return IRQ_HANDLED;
}
-static int scif_peer_probe(struct scif_peer_dev *spdev)
-{
- struct scif_dev *scifdev = &scif_dev[spdev->dnode];
-
- mutex_lock(&scif_info.conflock);
- scif_info.total++;
- scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid);
- mutex_unlock(&scif_info.conflock);
- rcu_assign_pointer(scifdev->spdev, spdev);
-
- /* In the future SCIF kernel client devices will be added here */
- return 0;
-}
-
-static void scif_peer_remove(struct scif_peer_dev *spdev)
-{
- struct scif_dev *scifdev = &scif_dev[spdev->dnode];
-
- /* In the future SCIF kernel client devices will be removed here */
- spdev = rcu_dereference(scifdev->spdev);
- if (spdev)
- RCU_INIT_POINTER(scifdev->spdev, NULL);
- synchronize_rcu();
-
- mutex_lock(&scif_info.conflock);
- scif_info.total--;
- mutex_unlock(&scif_info.conflock);
-}
-
static void scif_qp_setup_handler(struct work_struct *work)
{
struct scif_dev *scifdev = container_of(work, struct scif_dev,
@@ -139,20 +111,13 @@ static void scif_qp_setup_handler(struct work_struct *work)
}
}
-static int scif_setup_scifdev(struct scif_hw_dev *sdev)
+static int scif_setup_scifdev(void)
{
+ /* We support a maximum of 129 SCIF nodes including the mgmt node */
+#define MAX_SCIF_NODES 129
int i;
- u8 num_nodes;
-
- if (sdev->snode) {
- struct mic_bootparam __iomem *bp = sdev->rdp;
-
- num_nodes = ioread8(&bp->tot_nodes);
- } else {
- struct mic_bootparam *bp = sdev->dp;
+ u8 num_nodes = MAX_SCIF_NODES;
- num_nodes = bp->tot_nodes;
- }
scif_dev = kcalloc(num_nodes, sizeof(*scif_dev), GFP_KERNEL);
if (!scif_dev)
return -ENOMEM;
@@ -163,7 +128,7 @@ static int scif_setup_scifdev(struct scif_hw_dev *sdev)
scifdev->exit = OP_IDLE;
init_waitqueue_head(&scifdev->disconn_wq);
mutex_init(&scifdev->lock);
- INIT_WORK(&scifdev->init_msg_work, scif_qp_response_ack);
+ INIT_WORK(&scifdev->peer_add_work, scif_add_peer_device);
INIT_DELAYED_WORK(&scifdev->p2p_dwork,
scif_poll_qp_state);
INIT_DELAYED_WORK(&scifdev->qp_dwork,
@@ -181,27 +146,21 @@ static void scif_destroy_scifdev(void)
static int scif_probe(struct scif_hw_dev *sdev)
{
- struct scif_dev *scifdev;
+ struct scif_dev *scifdev = &scif_dev[sdev->dnode];
int rc;
dev_set_drvdata(&sdev->dev, sdev);
+ scifdev->sdev = sdev;
+
if (1 == atomic_add_return(1, &g_loopb_cnt)) {
- struct scif_dev *loopb_dev;
+ struct scif_dev *loopb_dev = &scif_dev[sdev->snode];
- rc = scif_setup_scifdev(sdev);
- if (rc)
- goto exit;
- scifdev = &scif_dev[sdev->dnode];
- scifdev->sdev = sdev;
- loopb_dev = &scif_dev[sdev->snode];
loopb_dev->sdev = sdev;
rc = scif_setup_loopback_qp(loopb_dev);
if (rc)
- goto free_sdev;
- } else {
- scifdev = &scif_dev[sdev->dnode];
- scifdev->sdev = sdev;
+ goto exit;
}
+
rc = scif_setup_intr_wq(scifdev);
if (rc)
goto destroy_loopb;
@@ -237,8 +196,6 @@ destroy_intr:
destroy_loopb:
if (atomic_dec_and_test(&g_loopb_cnt))
scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
-free_sdev:
- scif_destroy_scifdev();
exit:
return rc;
}
@@ -290,13 +247,6 @@ static void scif_remove(struct scif_hw_dev *sdev)
scifdev->sdev = NULL;
}
-static struct scif_peer_driver scif_peer_driver = {
- .driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
- .probe = scif_peer_probe,
- .remove = scif_peer_remove,
-};
-
static struct scif_hw_dev_id id_table[] = {
{ MIC_SCIF_DEV, SCIF_DEV_ANY_ID },
{ 0 },
@@ -312,29 +262,54 @@ static struct scif_driver scif_driver = {
static int _scif_init(void)
{
- spin_lock_init(&scif_info.eplock);
+ int rc;
+
+ mutex_init(&scif_info.eplock);
+ spin_lock_init(&scif_info.rmalock);
spin_lock_init(&scif_info.nb_connect_lock);
spin_lock_init(&scif_info.port_lock);
mutex_init(&scif_info.conflock);
mutex_init(&scif_info.connlock);
+ mutex_init(&scif_info.fencelock);
INIT_LIST_HEAD(&scif_info.uaccept);
INIT_LIST_HEAD(&scif_info.listen);
INIT_LIST_HEAD(&scif_info.zombie);
INIT_LIST_HEAD(&scif_info.connected);
INIT_LIST_HEAD(&scif_info.disconnected);
+ INIT_LIST_HEAD(&scif_info.rma);
+ INIT_LIST_HEAD(&scif_info.rma_tc);
+ INIT_LIST_HEAD(&scif_info.mmu_notif_cleanup);
+ INIT_LIST_HEAD(&scif_info.fence);
INIT_LIST_HEAD(&scif_info.nb_connect_list);
init_waitqueue_head(&scif_info.exitwq);
+ scif_info.rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT;
scif_info.en_msg_log = 0;
scif_info.p2p_enable = 1;
+ rc = scif_setup_scifdev();
+ if (rc)
+ goto error;
+ unaligned_cache = kmem_cache_create("Unaligned_DMA",
+ SCIF_KMEM_UNALIGNED_BUF_SIZE,
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!unaligned_cache) {
+ rc = -ENOMEM;
+ goto free_sdev;
+ }
INIT_WORK(&scif_info.misc_work, scif_misc_handler);
+ INIT_WORK(&scif_info.mmu_notif_work, scif_mmu_notif_handler);
INIT_WORK(&scif_info.conn_work, scif_conn_handler);
idr_init(&scif_ports);
return 0;
+free_sdev:
+ scif_destroy_scifdev();
+error:
+ return rc;
}
static void _scif_exit(void)
{
idr_destroy(&scif_ports);
+ kmem_cache_destroy(unaligned_cache);
scif_destroy_scifdev();
}
@@ -344,15 +319,13 @@ static int __init scif_init(void)
int rc;
_scif_init();
+ iova_cache_get();
rc = scif_peer_bus_init();
if (rc)
goto exit;
- rc = scif_peer_register_driver(&scif_peer_driver);
- if (rc)
- goto peer_bus_exit;
rc = scif_register_driver(&scif_driver);
if (rc)
- goto unreg_scif_peer;
+ goto peer_bus_exit;
rc = misc_register(mdev);
if (rc)
goto unreg_scif;
@@ -360,8 +333,6 @@ static int __init scif_init(void)
return 0;
unreg_scif:
scif_unregister_driver(&scif_driver);
-unreg_scif_peer:
- scif_peer_unregister_driver(&scif_peer_driver);
peer_bus_exit:
scif_peer_bus_exit();
exit:
@@ -374,8 +345,8 @@ static void __exit scif_exit(void)
scif_exit_debugfs();
misc_deregister(&scif_info.mdev);
scif_unregister_driver(&scif_driver);
- scif_peer_unregister_driver(&scif_peer_driver);
scif_peer_bus_exit();
+ iova_cache_put();
_scif_exit();
}
diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h
index 580bc63e1b23..a08f0b600a9e 100644
--- a/drivers/misc/mic/scif/scif_main.h
+++ b/drivers/misc/mic/scif/scif_main.h
@@ -22,15 +22,18 @@
#include <linux/pci.h>
#include <linux/miscdevice.h>
#include <linux/dmaengine.h>
+#include <linux/iova.h>
+#include <linux/anon_inodes.h>
#include <linux/file.h>
+#include <linux/vmalloc.h>
#include <linux/scif.h>
-
#include "../common/mic_dev.h"
#define SCIF_MGMT_NODE 0
#define SCIF_DEFAULT_WATCHDOG_TO 30
#define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ)
#define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ)
+#define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000
/*
* Generic state used for certain node QP message exchanges
@@ -73,13 +76,21 @@ enum scif_msg_state {
* @loopb_work: Used for submitting work to loopb_wq
* @loopb_recv_q: List of messages received on the loopb_wq
* @card_initiated_exit: set when the card has initiated the exit
+ * @rmalock: Synchronize access to RMA operations
+ * @fencelock: Synchronize access to list of remote fences requested.
+ * @rma: List of temporary registered windows to be destroyed.
+ * @rma_tc: List of temporary registered & cached Windows to be destroyed
+ * @fence: List of remote fence requests
+ * @mmu_notif_work: Work for registration caching MMU notifier workqueue
+ * @mmu_notif_cleanup: List of temporary cached windows for reg cache
+ * @rma_tc_limit: RMA temporary cache limit
*/
struct scif_info {
u8 nodeid;
u8 maxid;
u8 total;
u32 nr_zombies;
- spinlock_t eplock;
+ struct mutex eplock;
struct mutex connlock;
spinlock_t nb_connect_lock;
spinlock_t port_lock;
@@ -102,6 +113,14 @@ struct scif_info {
struct work_struct loopb_work;
struct list_head loopb_recv_q;
bool card_initiated_exit;
+ spinlock_t rmalock;
+ struct mutex fencelock;
+ struct list_head rma;
+ struct list_head rma_tc;
+ struct list_head fence;
+ struct work_struct mmu_notif_work;
+ struct list_head mmu_notif_cleanup;
+ unsigned long rma_tc_limit;
};
/*
@@ -139,7 +158,7 @@ struct scif_p2p_info {
* @db: doorbell the peer will trigger to generate an interrupt on self
* @rdb: Doorbell to trigger on the peer to generate an interrupt on the peer
* @cookie: Cookie received while registering the interrupt handler
- * init_msg_work: work scheduled for SCIF_INIT message processing
+ * @peer_add_work: Work for handling device_add for peer devices
* @p2p_dwork: Delayed work to enable polling for P2P state
* @qp_dwork: Delayed work for enabling polling for remote QP information
* @p2p_retry: Number of times to retry polling of P2P state
@@ -152,6 +171,8 @@ struct scif_p2p_info {
* @disconn_rescnt: Keeps track of number of node remove requests sent
* @exit: Status of exit message
* @qp_dma_addr: Queue pair DMA address passed to the peer
+ * @dma_ch_idx: Round robin index for DMA channels
+ * @signal_pool: DMA pool used for scheduling scif_fence_signal DMA's
*/
struct scif_dev {
u8 node;
@@ -165,7 +186,7 @@ struct scif_dev {
int db;
int rdb;
struct mic_irq *cookie;
- struct work_struct init_msg_work;
+ struct work_struct peer_add_work;
struct delayed_work p2p_dwork;
struct delayed_work qp_dwork;
int p2p_retry;
@@ -178,17 +199,25 @@ struct scif_dev {
atomic_t disconn_rescnt;
enum scif_msg_state exit;
dma_addr_t qp_dma_addr;
+ int dma_ch_idx;
+ struct dma_pool *signal_pool;
};
+extern bool scif_reg_cache_enable;
+extern bool scif_ulimit_check;
extern struct scif_info scif_info;
extern struct idr scif_ports;
+extern struct bus_type scif_peer_bus;
extern struct scif_dev *scif_dev;
extern const struct file_operations scif_fops;
+extern const struct file_operations scif_anon_fops;
/* Size of the RB for the Node QP */
#define SCIF_NODE_QP_SIZE 0x10000
#include "scif_nodeqp.h"
+#include "scif_rma.h"
+#include "scif_rma_list.h"
/*
* scifdev_self:
diff --git a/drivers/misc/mic/scif/scif_map.h b/drivers/misc/mic/scif/scif_map.h
index 20e50b4e19b2..3e86360ba5a6 100644
--- a/drivers/misc/mic/scif/scif_map.h
+++ b/drivers/misc/mic/scif/scif_map.h
@@ -80,7 +80,7 @@ scif_unmap_single(dma_addr_t local, struct scif_dev *scifdev,
size_t size)
{
if (!scifdev_self(scifdev)) {
- if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr)
+ if (scifdev_is_p2p(scifdev))
local = local - scifdev->base_addr;
dma_unmap_single(&scifdev->sdev->dev, local,
size, DMA_BIDIRECTIONAL);
@@ -110,4 +110,27 @@ scif_iounmap(void *virt, size_t len, struct scif_dev *scifdev)
sdev->hw_ops->iounmap(sdev, (void __force __iomem *)virt);
}
}
+
+static __always_inline int
+scif_map_page(dma_addr_t *dma_handle, struct page *page,
+ struct scif_dev *scifdev)
+{
+ int err = 0;
+
+ if (scifdev_self(scifdev)) {
+ *dma_handle = page_to_phys(page);
+ } else {
+ struct scif_hw_dev *sdev = scifdev->sdev;
+ *dma_handle = dma_map_page(&sdev->dev,
+ page, 0x0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&sdev->dev, *dma_handle))
+ err = -ENOMEM;
+ else if (scifdev_is_p2p(scifdev))
+ *dma_handle = *dma_handle + scifdev->base_addr;
+ }
+ if (err)
+ *dma_handle = 0;
+ return err;
+}
#endif /* SCIF_MAP_H */
diff --git a/drivers/misc/mic/scif/scif_mmap.c b/drivers/misc/mic/scif/scif_mmap.c
new file mode 100644
index 000000000000..49cb8f7b4672
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_mmap.c
@@ -0,0 +1,699 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+
+/*
+ * struct scif_vma_info - Information about a remote memory mapping
+ * created via scif_mmap(..)
+ * @vma: VM area struct
+ * @list: link to list of active vmas
+ */
+struct scif_vma_info {
+ struct vm_area_struct *vma;
+ struct list_head list;
+};
+
+void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_rma_req req;
+ struct scif_window *window = NULL;
+ struct scif_window *recv_window =
+ (struct scif_window *)msg->payload[0];
+ struct scif_endpt *ep;
+
+ ep = (struct scif_endpt *)recv_window->ep;
+ req.out_window = &window;
+ req.offset = recv_window->offset;
+ req.prot = recv_window->prot;
+ req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
+ req.type = SCIF_WINDOW_FULL;
+ req.head = &ep->rma_info.reg_list;
+ msg->payload[0] = ep->remote_ep;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ /* Does a valid window exist? */
+ if (scif_query_window(&req)) {
+ dev_err(&scifdev->sdev->dev,
+ "%s %d -ENXIO\n", __func__, __LINE__);
+ msg->uop = SCIF_UNREGISTER_ACK;
+ goto error;
+ }
+
+ scif_put_window(window, window->nr_pages);
+
+ if (!window->ref_count) {
+ atomic_inc(&ep->rma_info.tw_refcount);
+ ep->rma_info.async_list_del = 1;
+ list_del_init(&window->list);
+ scif_free_window_offset(ep, window, window->offset);
+ }
+error:
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if (window && !window->ref_count)
+ scif_queue_for_cleanup(window, &scif_info.rma);
+}
+
+/*
+ * Remove valid remote memory mappings created via scif_mmap(..) from the
+ * process address space since the remote node is lost
+ */
+static void __scif_zap_mmaps(struct scif_endpt *ep)
+{
+ struct list_head *item;
+ struct scif_vma_info *info;
+ struct vm_area_struct *vma;
+ unsigned long size;
+
+ spin_lock(&ep->lock);
+ list_for_each(item, &ep->rma_info.vma_list) {
+ info = list_entry(item, struct scif_vma_info, list);
+ vma = info->vma;
+ size = vma->vm_end - vma->vm_start;
+ zap_vma_ptes(vma, vma->vm_start, size);
+ dev_dbg(scif_info.mdev.this_device,
+ "%s ep %p zap vma %p size 0x%lx\n",
+ __func__, ep, info->vma, size);
+ }
+ spin_unlock(&ep->lock);
+}
+
+/*
+ * Traverse the list of endpoints for a particular remote node and
+ * zap valid remote memory mappings since the remote node is lost
+ */
+static void _scif_zap_mmaps(int node, struct list_head *head)
+{
+ struct scif_endpt *ep;
+ struct list_head *item;
+
+ mutex_lock(&scif_info.connlock);
+ list_for_each(item, head) {
+ ep = list_entry(item, struct scif_endpt, list);
+ if (ep->remote_dev->node == node)
+ __scif_zap_mmaps(ep);
+ }
+ mutex_unlock(&scif_info.connlock);
+}
+
+/*
+ * Wrapper for removing remote memory mappings for a particular node. This API
+ * is called by peer nodes as part of handling a lost node.
+ */
+void scif_zap_mmaps(int node)
+{
+ _scif_zap_mmaps(node, &scif_info.connected);
+ _scif_zap_mmaps(node, &scif_info.disconnected);
+}
+
+/*
+ * This API is only called while handling a lost node:
+ * a) Remote node is dead.
+ * b) Remote memory mappings have been zapped
+ * So we can traverse the remote_reg_list without any locks. Since
+ * the window has not yet been unregistered we can drop the ref count
+ * and queue it to the cleanup thread.
+ */
+static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep)
+{
+ struct list_head *pos, *tmp;
+ struct scif_window *window;
+
+ list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) {
+ window = list_entry(pos, struct scif_window, list);
+ if (window->ref_count)
+ scif_put_window(window, window->nr_pages);
+ else
+ dev_err(scif_info.mdev.this_device,
+ "%s %d unexpected\n",
+ __func__, __LINE__);
+ if (!window->ref_count) {
+ atomic_inc(&ep->rma_info.tw_refcount);
+ list_del_init(&window->list);
+ scif_queue_for_cleanup(window, &scif_info.rma);
+ }
+ }
+}
+
+/* Cleanup remote registration lists for zombie endpoints */
+void scif_cleanup_rma_for_zombies(int node)
+{
+ struct scif_endpt *ep;
+ struct list_head *item;
+
+ mutex_lock(&scif_info.eplock);
+ list_for_each(item, &scif_info.zombie) {
+ ep = list_entry(item, struct scif_endpt, list);
+ if (ep->remote_dev && ep->remote_dev->node == node)
+ __scif_cleanup_rma_for_zombies(ep);
+ }
+ mutex_unlock(&scif_info.eplock);
+ flush_work(&scif_info.misc_work);
+}
+
+/* Insert the VMA into the per endpoint VMA list */
+static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
+{
+ struct scif_vma_info *info;
+ int err = 0;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ err = -ENOMEM;
+ goto done;
+ }
+ info->vma = vma;
+ spin_lock(&ep->lock);
+ list_add_tail(&info->list, &ep->rma_info.vma_list);
+ spin_unlock(&ep->lock);
+done:
+ return err;
+}
+
+/* Delete the VMA from the per endpoint VMA list */
+static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
+{
+ struct list_head *item;
+ struct scif_vma_info *info;
+
+ spin_lock(&ep->lock);
+ list_for_each(item, &ep->rma_info.vma_list) {
+ info = list_entry(item, struct scif_vma_info, list);
+ if (info->vma == vma) {
+ list_del(&info->list);
+ kfree(info);
+ break;
+ }
+ }
+ spin_unlock(&ep->lock);
+}
+
+static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep)
+{
+ struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev;
+ struct scif_hw_dev *sdev = scifdev->sdev;
+ phys_addr_t out_phys, apt_base = 0;
+
+ /*
+ * If the DMA address is card relative then we need to add the
+ * aperture base for mmap to work correctly
+ */
+ if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da)
+ apt_base = sdev->aper->pa;
+ out_phys = apt_base + phys;
+ return out_phys;
+}
+
+int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
+ struct scif_range **pages)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct scif_rma_req req;
+ struct scif_window *window = NULL;
+ int nr_pages, err, i;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n",
+ ep, offset, len);
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+
+ if (!len || (offset < 0) ||
+ (offset + len < offset) ||
+ (ALIGN(offset, PAGE_SIZE) != offset) ||
+ (ALIGN(len, PAGE_SIZE) != len))
+ return -EINVAL;
+
+ nr_pages = len >> PAGE_SHIFT;
+
+ req.out_window = &window;
+ req.offset = offset;
+ req.prot = 0;
+ req.nr_bytes = len;
+ req.type = SCIF_WINDOW_SINGLE;
+ req.head = &ep->rma_info.remote_reg_list;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ /* Does a valid window exist? */
+ err = scif_query_window(&req);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto error;
+ }
+
+ /* Allocate scif_range */
+ *pages = kzalloc(sizeof(**pages), GFP_KERNEL);
+ if (!*pages) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ /* Allocate phys addr array */
+ (*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t));
+ if (!((*pages)->phys_addr)) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) {
+ /* Allocate virtual address array */
+ ((*pages)->va = scif_zalloc(nr_pages * sizeof(void *)));
+ if (!(*pages)->va) {
+ err = -ENOMEM;
+ goto error;
+ }
+ }
+ /* Populate the values */
+ (*pages)->cookie = window;
+ (*pages)->nr_pages = nr_pages;
+ (*pages)->prot_flags = window->prot;
+
+ for (i = 0; i < nr_pages; i++) {
+ (*pages)->phys_addr[i] =
+ __scif_off_to_dma_addr(window, offset +
+ (i * PAGE_SIZE));
+ (*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i],
+ ep);
+ if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev))
+ (*pages)->va[i] =
+ ep->remote_dev->sdev->aper->va +
+ (*pages)->phys_addr[i] -
+ ep->remote_dev->sdev->aper->pa;
+ }
+
+ scif_get_window(window, nr_pages);
+error:
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if (err) {
+ if (*pages) {
+ scif_free((*pages)->phys_addr,
+ nr_pages * sizeof(dma_addr_t));
+ scif_free((*pages)->va,
+ nr_pages * sizeof(void *));
+ kfree(*pages);
+ *pages = NULL;
+ }
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ }
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_get_pages);
+
+int scif_put_pages(struct scif_range *pages)
+{
+ struct scif_endpt *ep;
+ struct scif_window *window;
+ struct scifmsg msg;
+
+ if (!pages || !pages->cookie)
+ return -EINVAL;
+
+ window = pages->cookie;
+
+ if (!window || window->magic != SCIFEP_MAGIC)
+ return -EINVAL;
+
+ ep = (struct scif_endpt *)window->ep;
+ /*
+ * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the
+ * callee should be allowed to release references to the pages,
+ * else the endpoint was not connected in the first place,
+ * hence the ENOTCONN.
+ */
+ if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED)
+ return -ENOTCONN;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+
+ scif_put_window(window, pages->nr_pages);
+
+ /* Initiate window destruction if ref count is zero */
+ if (!window->ref_count) {
+ list_del(&window->list);
+ mutex_unlock(&ep->rma_info.rma_lock);
+ scif_drain_dma_intr(ep->remote_dev->sdev,
+ ep->rma_info.dma_chan);
+ /* Inform the peer about this window being destroyed. */
+ msg.uop = SCIF_MUNMAP;
+ msg.src = ep->port;
+ msg.payload[0] = window->peer_window;
+ /* No error handling for notification messages */
+ scif_nodeqp_send(ep->remote_dev, &msg);
+ /* Destroy this window from the peer's registered AS */
+ scif_destroy_remote_window(window);
+ } else {
+ mutex_unlock(&ep->rma_info.rma_lock);
+ }
+
+ scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t));
+ scif_free(pages->va, pages->nr_pages * sizeof(void *));
+ kfree(pages);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(scif_put_pages);
+
+/*
+ * scif_rma_list_mmap:
+ *
+ * Traverse the remote registration list starting from start_window:
+ * 1) Create VtoP mappings via remap_pfn_range(..)
+ * 2) Once step 1) and 2) complete successfully then traverse the range of
+ * windows again and bump the reference count.
+ * RMA lock must be held.
+ */
+static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset,
+ int nr_pages, struct vm_area_struct *vma)
+{
+ s64 end_offset, loop_offset = offset;
+ struct scif_window *window = start_window;
+ int loop_nr_pages, nr_pages_left = nr_pages;
+ struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
+ struct list_head *head = &ep->rma_info.remote_reg_list;
+ int i, err = 0;
+ dma_addr_t phys_addr;
+ struct scif_window_iter src_win_iter;
+ size_t contig_bytes = 0;
+
+ might_sleep();
+ list_for_each_entry_from(window, head, list) {
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ loop_nr_pages = min_t(int,
+ (end_offset - loop_offset) >> PAGE_SHIFT,
+ nr_pages_left);
+ scif_init_window_iter(window, &src_win_iter);
+ for (i = 0; i < loop_nr_pages; i++) {
+ phys_addr = scif_off_to_dma_addr(window, loop_offset,
+ &contig_bytes,
+ &src_win_iter);
+ phys_addr = scif_get_phys(phys_addr, ep);
+ err = remap_pfn_range(vma,
+ vma->vm_start +
+ loop_offset - offset,
+ phys_addr >> PAGE_SHIFT,
+ PAGE_SIZE,
+ vma->vm_page_prot);
+ if (err)
+ goto error;
+ loop_offset += PAGE_SIZE;
+ }
+ nr_pages_left -= loop_nr_pages;
+ if (!nr_pages_left)
+ break;
+ }
+ /*
+ * No more failures expected. Bump up the ref count for all
+ * the windows. Another traversal from start_window required
+ * for handling errors encountered across windows during
+ * remap_pfn_range(..).
+ */
+ loop_offset = offset;
+ nr_pages_left = nr_pages;
+ window = start_window;
+ head = &ep->rma_info.remote_reg_list;
+ list_for_each_entry_from(window, head, list) {
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ loop_nr_pages = min_t(int,
+ (end_offset - loop_offset) >> PAGE_SHIFT,
+ nr_pages_left);
+ scif_get_window(window, loop_nr_pages);
+ nr_pages_left -= loop_nr_pages;
+ loop_offset += (loop_nr_pages << PAGE_SHIFT);
+ if (!nr_pages_left)
+ break;
+ }
+error:
+ if (err)
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ return err;
+}
+
+/*
+ * scif_rma_list_munmap:
+ *
+ * Traverse the remote registration list starting from window:
+ * 1) Decrement ref count.
+ * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer.
+ * RMA lock must be held.
+ */
+static void scif_rma_list_munmap(struct scif_window *start_window,
+ s64 offset, int nr_pages)
+{
+ struct scifmsg msg;
+ s64 loop_offset = offset, end_offset;
+ int loop_nr_pages, nr_pages_left = nr_pages;
+ struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
+ struct list_head *head = &ep->rma_info.remote_reg_list;
+ struct scif_window *window = start_window, *_window;
+
+ msg.uop = SCIF_MUNMAP;
+ msg.src = ep->port;
+ loop_offset = offset;
+ nr_pages_left = nr_pages;
+ list_for_each_entry_safe_from(window, _window, head, list) {
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ loop_nr_pages = min_t(int,
+ (end_offset - loop_offset) >> PAGE_SHIFT,
+ nr_pages_left);
+ scif_put_window(window, loop_nr_pages);
+ if (!window->ref_count) {
+ struct scif_dev *rdev = ep->remote_dev;
+
+ scif_drain_dma_intr(rdev->sdev,
+ ep->rma_info.dma_chan);
+ /* Inform the peer about this munmap */
+ msg.payload[0] = window->peer_window;
+ /* No error handling for Notification messages. */
+ scif_nodeqp_send(ep->remote_dev, &msg);
+ list_del(&window->list);
+ /* Destroy this window from the peer's registered AS */
+ scif_destroy_remote_window(window);
+ }
+ nr_pages_left -= loop_nr_pages;
+ loop_offset += (loop_nr_pages << PAGE_SHIFT);
+ if (!nr_pages_left)
+ break;
+ }
+}
+
+/*
+ * The private data field of each VMA used to mmap a remote window
+ * points to an instance of struct vma_pvt
+ */
+struct vma_pvt {
+ struct scif_endpt *ep; /* End point for remote window */
+ s64 offset; /* offset within remote window */
+ bool valid_offset; /* offset is valid only if the original
+ * mmap request was for a single page
+ * else the offset within the vma is
+ * the correct offset
+ */
+ struct kref ref;
+};
+
+static void vma_pvt_release(struct kref *ref)
+{
+ struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref);
+
+ kfree(vmapvt);
+}
+
+/**
+ * scif_vma_open - VMA open driver callback
+ * @vma: VMM memory area.
+ * The open method is called by the kernel to allow the subsystem implementing
+ * the VMA to initialize the area. This method is invoked any time a new
+ * reference to the VMA is made (when a process forks, for example).
+ * The one exception happens when the VMA is first created by mmap;
+ * in this case, the driver's mmap method is called instead.
+ * This function is also invoked when an existing VMA is split by the kernel
+ * due to a call to munmap on a subset of the VMA resulting in two VMAs.
+ * The kernel invokes this function only on one of the two VMAs.
+ */
+static void scif_vma_open(struct vm_area_struct *vma)
+{
+ struct vma_pvt *vmapvt = vma->vm_private_data;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n",
+ vma->vm_start, vma->vm_end);
+ scif_insert_vma(vmapvt->ep, vma);
+ kref_get(&vmapvt->ref);
+}
+
+/**
+ * scif_munmap - VMA close driver callback.
+ * @vma: VMM memory area.
+ * When an area is destroyed, the kernel calls its close operation.
+ * Note that there's no usage count associated with VMA's; the area
+ * is opened and closed exactly once by each process that uses it.
+ */
+static void scif_munmap(struct vm_area_struct *vma)
+{
+ struct scif_endpt *ep;
+ struct vma_pvt *vmapvt = vma->vm_private_data;
+ int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ s64 offset;
+ struct scif_rma_req req;
+ struct scif_window *window = NULL;
+ int err;
+
+ might_sleep();
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n",
+ vma->vm_start, vma->vm_end);
+ ep = vmapvt->ep;
+ offset = vmapvt->valid_offset ? vmapvt->offset :
+ (vma->vm_pgoff) << PAGE_SHIFT;
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n",
+ ep, nr_pages, offset);
+ req.out_window = &window;
+ req.offset = offset;
+ req.nr_bytes = vma->vm_end - vma->vm_start;
+ req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
+ req.type = SCIF_WINDOW_PARTIAL;
+ req.head = &ep->rma_info.remote_reg_list;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+
+ err = scif_query_window(&req);
+ if (err)
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ else
+ scif_rma_list_munmap(window, offset, nr_pages);
+
+ mutex_unlock(&ep->rma_info.rma_lock);
+ /*
+ * The kernel probably zeroes these out but we still want
+ * to clean up our own mess just in case.
+ */
+ vma->vm_ops = NULL;
+ vma->vm_private_data = NULL;
+ kref_put(&vmapvt->ref, vma_pvt_release);
+ scif_delete_vma(ep, vma);
+}
+
+static const struct vm_operations_struct scif_vm_ops = {
+ .open = scif_vma_open,
+ .close = scif_munmap,
+};
+
+/**
+ * scif_mmap - Map pages in virtual address space to a remote window.
+ * @vma: VMM memory area.
+ * @epd: endpoint descriptor
+ *
+ * Return: Upon successful completion, scif_mmap() returns zero
+ * else an apt error is returned as documented in scif.h
+ */
+int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd)
+{
+ struct scif_rma_req req;
+ struct scif_window *window = NULL;
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ s64 start_offset = vma->vm_pgoff << PAGE_SHIFT;
+ int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ int err;
+ struct vma_pvt *vmapvt;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n",
+ ep, start_offset, nr_pages);
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+
+ might_sleep();
+
+ err = scif_insert_vma(ep, vma);
+ if (err)
+ return err;
+
+ vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL);
+ if (!vmapvt) {
+ scif_delete_vma(ep, vma);
+ return -ENOMEM;
+ }
+
+ vmapvt->ep = ep;
+ kref_init(&vmapvt->ref);
+
+ req.out_window = &window;
+ req.offset = start_offset;
+ req.nr_bytes = vma->vm_end - vma->vm_start;
+ req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
+ req.type = SCIF_WINDOW_PARTIAL;
+ req.head = &ep->rma_info.remote_reg_list;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ /* Does a valid window exist? */
+ err = scif_query_window(&req);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto error_unlock;
+ }
+
+ /* Default prot for loopback */
+ if (!scifdev_self(ep->remote_dev))
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+
+ /*
+ * VM_DONTCOPY - Do not copy this vma on fork
+ * VM_DONTEXPAND - Cannot expand with mremap()
+ * VM_RESERVED - Count as reserved_vm like IO
+ * VM_PFNMAP - Page-ranges managed without "struct page"
+ * VM_IO - Memory mapped I/O or similar
+ *
+ * We do not want to copy this VMA automatically on a fork(),
+ * expand this VMA due to mremap() or swap out these pages since
+ * the VMA is actually backed by physical pages in the remote
+ * node's physical memory and not via a struct page.
+ */
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
+
+ if (!scifdev_self(ep->remote_dev))
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+
+ /* Map this range of windows */
+ err = scif_rma_list_mmap(window, start_offset, nr_pages, vma);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto error_unlock;
+ }
+ /* Set up the driver call back */
+ vma->vm_ops = &scif_vm_ops;
+ vma->vm_private_data = vmapvt;
+error_unlock:
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if (err) {
+ kfree(vmapvt);
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ scif_delete_vma(ep, vma);
+ }
+ return err;
+}
diff --git a/drivers/misc/mic/scif/scif_nm.c b/drivers/misc/mic/scif/scif_nm.c
index 9b4c5382d6a7..79f26a02a1cb 100644
--- a/drivers/misc/mic/scif/scif_nm.c
+++ b/drivers/misc/mic/scif/scif_nm.c
@@ -34,6 +34,7 @@ static void scif_invalidate_ep(int node)
list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
ep = list_entry(pos, struct scif_endpt, list);
if (ep->remote_dev->node == node) {
+ scif_unmap_all_windows(ep);
spin_lock(&ep->lock);
scif_cleanup_ep_qp(ep);
spin_unlock(&ep->lock);
@@ -50,6 +51,7 @@ static void scif_invalidate_ep(int node)
wake_up_interruptible(&ep->sendwq);
wake_up_interruptible(&ep->recvwq);
spin_unlock(&ep->lock);
+ scif_unmap_all_windows(ep);
}
}
mutex_unlock(&scif_info.connlock);
@@ -61,8 +63,8 @@ void scif_free_qp(struct scif_dev *scifdev)
if (!qp)
return;
- scif_free_coherent((void *)qp->inbound_q.rb_base,
- qp->local_buf, scifdev, qp->inbound_q.size);
+ scif_unmap_single(qp->local_buf, scifdev, qp->inbound_q.size);
+ kfree(qp->inbound_q.rb_base);
scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp));
kfree(scifdev->qpairs);
scifdev->qpairs = NULL;
@@ -125,8 +127,12 @@ void scif_cleanup_scifdev(struct scif_dev *dev)
}
scif_destroy_intr_wq(dev);
}
+ flush_work(&scif_info.misc_work);
scif_destroy_p2p(dev);
scif_invalidate_ep(dev->node);
+ scif_zap_mmaps(dev->node);
+ scif_cleanup_rma_for_zombies(dev->node);
+ flush_work(&scif_info.misc_work);
scif_send_acks(dev);
if (!dev->node && scif_info.card_initiated_exit) {
/*
@@ -147,14 +153,8 @@ void scif_cleanup_scifdev(struct scif_dev *dev)
void scif_handle_remove_node(int node)
{
struct scif_dev *scifdev = &scif_dev[node];
- struct scif_peer_dev *spdev;
-
- rcu_read_lock();
- spdev = rcu_dereference(scifdev->spdev);
- rcu_read_unlock();
- if (spdev)
- scif_peer_unregister_device(spdev);
- else
+
+ if (scif_peer_unregister_device(scifdev))
scif_send_acks(scifdev);
}
diff --git a/drivers/misc/mic/scif/scif_nodeqp.c b/drivers/misc/mic/scif/scif_nodeqp.c
index 6dfdae3452d6..c66ca1a5814e 100644
--- a/drivers/misc/mic/scif/scif_nodeqp.c
+++ b/drivers/misc/mic/scif/scif_nodeqp.c
@@ -105,18 +105,22 @@
int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
int local_size, struct scif_dev *scifdev)
{
- void *local_q = NULL;
+ void *local_q = qp->inbound_q.rb_base;
int err = 0;
u32 tmp_rd = 0;
spin_lock_init(&qp->send_lock);
spin_lock_init(&qp->recv_lock);
- local_q = kzalloc(local_size, GFP_KERNEL);
+ /* Allocate rb only if not already allocated */
if (!local_q) {
- err = -ENOMEM;
- return err;
+ local_q = kzalloc(local_size, GFP_KERNEL);
+ if (!local_q) {
+ err = -ENOMEM;
+ return err;
+ }
}
+
err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
if (err)
goto kfree;
@@ -260,6 +264,11 @@ int scif_setup_qp_connect_response(struct scif_dev *scifdev,
r_buf,
get_count_order(remote_size));
/*
+ * Because the node QP may already be processing an INIT message, set
+ * the read pointer so the cached read offset isn't lost
+ */
+ qp->remote_qp->local_read = qp->inbound_q.current_read_offset;
+ /*
* resetup the inbound_q now that we know where the
* inbound_read really is.
*/
@@ -426,6 +435,21 @@ free_p2p:
return NULL;
}
+/* Uninitialize and release resources from a p2p mapping */
+static void scif_deinit_p2p_info(struct scif_dev *scifdev,
+ struct scif_p2p_info *p2p)
+{
+ struct scif_hw_dev *sdev = scifdev->sdev;
+
+ dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+ p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
+ dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+ p2p->sg_nentries[SCIF_PPI_APER], DMA_BIDIRECTIONAL);
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+ scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+ kfree(p2p);
+}
+
/**
* scif_node_connect: Respond to SCIF_NODE_CONNECT interrupt message
* @dst: Destination node
@@ -468,8 +492,10 @@ static void scif_node_connect(struct scif_dev *scifdev, int dst)
if (!p2p_ij)
return;
p2p_ji = scif_init_p2p_info(dev_j, dev_i);
- if (!p2p_ji)
+ if (!p2p_ji) {
+ scif_deinit_p2p_info(dev_i, p2p_ij);
return;
+ }
list_add_tail(&p2p_ij->ppi_list, &dev_i->p2p);
list_add_tail(&p2p_ji->ppi_list, &dev_j->p2p);
@@ -529,27 +555,6 @@ static void scif_p2p_setup(void)
}
}
-void scif_qp_response_ack(struct work_struct *work)
-{
- struct scif_dev *scifdev = container_of(work, struct scif_dev,
- init_msg_work);
- struct scif_peer_dev *spdev;
-
- /* Drop the INIT message if it has already been received */
- if (_scifdev_alive(scifdev))
- return;
-
- spdev = scif_peer_register_device(scifdev);
- if (IS_ERR(spdev))
- return;
-
- if (scif_is_mgmt_node()) {
- mutex_lock(&scif_info.conflock);
- scif_p2p_setup();
- mutex_unlock(&scif_info.conflock);
- }
-}
-
static char *message_types[] = {"BAD",
"INIT",
"EXIT",
@@ -568,7 +573,29 @@ static char *message_types[] = {"BAD",
"DISCNT_ACK",
"CLIENT_SENT",
"CLIENT_RCVD",
- "SCIF_GET_NODE_INFO"};
+ "SCIF_GET_NODE_INFO",
+ "REGISTER",
+ "REGISTER_ACK",
+ "REGISTER_NACK",
+ "UNREGISTER",
+ "UNREGISTER_ACK",
+ "UNREGISTER_NACK",
+ "ALLOC_REQ",
+ "ALLOC_GNT",
+ "ALLOC_REJ",
+ "FREE_PHYS",
+ "FREE_VIRT",
+ "MUNMAP",
+ "MARK",
+ "MARK_ACK",
+ "MARK_NACK",
+ "WAIT",
+ "WAIT_ACK",
+ "WAIT_NACK",
+ "SIGNAL_LOCAL",
+ "SIGNAL_REMOTE",
+ "SIG_ACK",
+ "SIG_NACK"};
static void
scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg,
@@ -662,10 +689,16 @@ int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
*
* Work queue handler for servicing miscellaneous SCIF tasks.
* Examples include:
- * 1) Cleanup of zombie endpoints.
+ * 1) Remote fence requests.
+ * 2) Destruction of temporary registered windows
+ * created during scif_vreadfrom()/scif_vwriteto().
+ * 3) Cleanup of zombie endpoints.
*/
void scif_misc_handler(struct work_struct *work)
{
+ scif_rma_handle_remote_fences();
+ scif_rma_destroy_windows();
+ scif_rma_destroy_tcw_invalid();
scif_cleanup_zombie_epd();
}
@@ -682,13 +715,14 @@ scif_init(struct scif_dev *scifdev, struct scifmsg *msg)
* address to complete initializing the inbound_q.
*/
flush_delayed_work(&scifdev->qp_dwork);
- /*
- * Delegate the peer device registration to a workqueue, otherwise if
- * SCIF client probe (called during peer device registration) calls
- * scif_connect(..), it will block the message processing thread causing
- * a deadlock.
- */
- schedule_work(&scifdev->init_msg_work);
+
+ scif_peer_register_device(scifdev);
+
+ if (scif_is_mgmt_node()) {
+ mutex_lock(&scif_info.conflock);
+ scif_p2p_setup();
+ mutex_unlock(&scif_info.conflock);
+ }
}
/**
@@ -838,13 +872,13 @@ void scif_poll_qp_state(struct work_struct *work)
msecs_to_jiffies(SCIF_NODE_QP_TIMEOUT));
return;
}
- scif_peer_register_device(peerdev);
return;
timeout:
dev_err(&peerdev->sdev->dev,
"%s %d remote node %d offline, state = 0x%x\n",
__func__, __LINE__, peerdev->node, qp->qp_state);
qp->remote_qp->qp_state = SCIF_QP_OFFLINE;
+ scif_peer_unregister_device(peerdev);
scif_cleanup_scifdev(peerdev);
}
@@ -894,6 +928,9 @@ scif_node_add_ack(struct scif_dev *scifdev, struct scifmsg *msg)
goto local_error;
peerdev->rdb = msg->payload[2];
qp->remote_qp->qp_state = SCIF_QP_ONLINE;
+
+ scif_peer_register_device(peerdev);
+
schedule_delayed_work(&peerdev->p2p_dwork, 0);
return;
local_error:
@@ -1007,6 +1044,27 @@ static void (*scif_intr_func[SCIF_MAX_MSG + 1])
scif_clientsend, /* SCIF_CLIENT_SENT */
scif_clientrcvd, /* SCIF_CLIENT_RCVD */
scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */
+ scif_recv_reg, /* SCIF_REGISTER */
+ scif_recv_reg_ack, /* SCIF_REGISTER_ACK */
+ scif_recv_reg_nack, /* SCIF_REGISTER_NACK */
+ scif_recv_unreg, /* SCIF_UNREGISTER */
+ scif_recv_unreg_ack, /* SCIF_UNREGISTER_ACK */
+ scif_recv_unreg_nack, /* SCIF_UNREGISTER_NACK */
+ scif_alloc_req, /* SCIF_ALLOC_REQ */
+ scif_alloc_gnt_rej, /* SCIF_ALLOC_GNT */
+ scif_alloc_gnt_rej, /* SCIF_ALLOC_REJ */
+ scif_free_virt, /* SCIF_FREE_VIRT */
+ scif_recv_munmap, /* SCIF_MUNMAP */
+ scif_recv_mark, /* SCIF_MARK */
+ scif_recv_mark_resp, /* SCIF_MARK_ACK */
+ scif_recv_mark_resp, /* SCIF_MARK_NACK */
+ scif_recv_wait, /* SCIF_WAIT */
+ scif_recv_wait_resp, /* SCIF_WAIT_ACK */
+ scif_recv_wait_resp, /* SCIF_WAIT_NACK */
+ scif_recv_sig_local, /* SCIF_SIG_LOCAL */
+ scif_recv_sig_remote, /* SCIF_SIG_REMOTE */
+ scif_recv_sig_resp, /* SCIF_SIG_ACK */
+ scif_recv_sig_resp, /* SCIF_SIG_NACK */
};
/**
@@ -1169,7 +1227,6 @@ int scif_setup_loopback_qp(struct scif_dev *scifdev)
int err = 0;
void *local_q;
struct scif_qp *qp;
- struct scif_peer_dev *spdev;
err = scif_setup_intr_wq(scifdev);
if (err)
@@ -1216,15 +1273,11 @@ int scif_setup_loopback_qp(struct scif_dev *scifdev)
&qp->local_write,
local_q, get_count_order(SCIF_NODE_QP_SIZE));
scif_info.nodeid = scifdev->node;
- spdev = scif_peer_register_device(scifdev);
- if (IS_ERR(spdev)) {
- err = PTR_ERR(spdev);
- goto free_local_q;
- }
+
+ scif_peer_register_device(scifdev);
+
scif_info.loopb_dev = scifdev;
return err;
-free_local_q:
- kfree(local_q);
free_qpairs:
kfree(scifdev->qpairs);
destroy_loopb_wq:
@@ -1243,13 +1296,7 @@ exit:
*/
int scif_destroy_loopback_qp(struct scif_dev *scifdev)
{
- struct scif_peer_dev *spdev;
-
- rcu_read_lock();
- spdev = rcu_dereference(scifdev->spdev);
- rcu_read_unlock();
- if (spdev)
- scif_peer_unregister_device(spdev);
+ scif_peer_unregister_device(scifdev);
destroy_workqueue(scif_info.loopb_wq);
scif_destroy_intr_wq(scifdev);
kfree(scifdev->qpairs->outbound_q.rb_base);
diff --git a/drivers/misc/mic/scif/scif_nodeqp.h b/drivers/misc/mic/scif/scif_nodeqp.h
index 6c0ed6783479..95896273138e 100644
--- a/drivers/misc/mic/scif/scif_nodeqp.h
+++ b/drivers/misc/mic/scif/scif_nodeqp.h
@@ -74,7 +74,28 @@
#define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */
#define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */
#define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/
-#define SCIF_MAX_MSG SCIF_GET_NODE_INFO
+#define SCIF_REGISTER 19 /* Tell peer about a new registered window */
+#define SCIF_REGISTER_ACK 20 /* Notify peer about unregistration success */
+#define SCIF_REGISTER_NACK 21 /* Notify peer about registration success */
+#define SCIF_UNREGISTER 22 /* Tell peer about unregistering a window */
+#define SCIF_UNREGISTER_ACK 23 /* Notify peer about registration failure */
+#define SCIF_UNREGISTER_NACK 24 /* Notify peer about unregistration failure */
+#define SCIF_ALLOC_REQ 25 /* Request a mapped buffer */
+#define SCIF_ALLOC_GNT 26 /* Notify peer about allocation success */
+#define SCIF_ALLOC_REJ 27 /* Notify peer about allocation failure */
+#define SCIF_FREE_VIRT 28 /* Free previously allocated virtual memory */
+#define SCIF_MUNMAP 29 /* Acknowledgment for a SCIF_MMAP request */
+#define SCIF_MARK 30 /* SCIF Remote Fence Mark Request */
+#define SCIF_MARK_ACK 31 /* SCIF Remote Fence Mark Success */
+#define SCIF_MARK_NACK 32 /* SCIF Remote Fence Mark Failure */
+#define SCIF_WAIT 33 /* SCIF Remote Fence Wait Request */
+#define SCIF_WAIT_ACK 34 /* SCIF Remote Fence Wait Success */
+#define SCIF_WAIT_NACK 35 /* SCIF Remote Fence Wait Failure */
+#define SCIF_SIG_LOCAL 36 /* SCIF Remote Fence Local Signal Request */
+#define SCIF_SIG_REMOTE 37 /* SCIF Remote Fence Remote Signal Request */
+#define SCIF_SIG_ACK 38 /* SCIF Remote Fence Remote Signal Success */
+#define SCIF_SIG_NACK 39 /* SCIF Remote Fence Remote Signal Failure */
+#define SCIF_MAX_MSG SCIF_SIG_NACK
/*
* struct scifmsg - Node QP message format
@@ -92,6 +113,24 @@ struct scifmsg {
} __packed;
/*
+ * struct scif_allocmsg - Used with SCIF_ALLOC_REQ to request
+ * the remote note to allocate memory
+ *
+ * phys_addr: Physical address of the buffer
+ * vaddr: Virtual address of the buffer
+ * size: Size of the buffer
+ * state: Current state
+ * allocwq: wait queue for status
+ */
+struct scif_allocmsg {
+ dma_addr_t phys_addr;
+ unsigned long vaddr;
+ size_t size;
+ enum scif_msg_state state;
+ wait_queue_head_t allocwq;
+};
+
+/*
* struct scif_qp - Node Queue Pair
*
* Interesting structure -- a little difficult because we can only
@@ -158,7 +197,6 @@ int scif_setup_qp_connect_response(struct scif_dev *scifdev,
int scif_setup_loopback_qp(struct scif_dev *scifdev);
int scif_destroy_loopback_qp(struct scif_dev *scifdev);
void scif_poll_qp_state(struct work_struct *work);
-void scif_qp_response_ack(struct work_struct *work);
void scif_destroy_p2p(struct scif_dev *scifdev);
void scif_send_exit(struct scif_dev *scifdev);
static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev)
diff --git a/drivers/misc/mic/scif/scif_peer_bus.c b/drivers/misc/mic/scif/scif_peer_bus.c
index 589ae9ad2501..6ffa3bdbd45b 100644
--- a/drivers/misc/mic/scif/scif_peer_bus.c
+++ b/drivers/misc/mic/scif/scif_peer_bus.c
@@ -24,93 +24,152 @@ dev_to_scif_peer(struct device *dev)
return container_of(dev, struct scif_peer_dev, dev);
}
-static inline struct scif_peer_driver *
-drv_to_scif_peer(struct device_driver *drv)
-{
- return container_of(drv, struct scif_peer_driver, driver);
-}
+struct bus_type scif_peer_bus = {
+ .name = "scif_peer_bus",
+};
-static int scif_peer_dev_match(struct device *dv, struct device_driver *dr)
+static void scif_peer_release_dev(struct device *d)
{
- return !strncmp(dev_name(dv), dr->name, 4);
+ struct scif_peer_dev *sdev = dev_to_scif_peer(d);
+ struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+
+ scif_cleanup_scifdev(scifdev);
+ kfree(sdev);
}
-static int scif_peer_dev_probe(struct device *d)
+static int scif_peer_initialize_device(struct scif_dev *scifdev)
{
- struct scif_peer_dev *dev = dev_to_scif_peer(d);
- struct scif_peer_driver *drv = drv_to_scif_peer(dev->dev.driver);
+ struct scif_peer_dev *spdev;
+ int ret;
- return drv->probe(dev);
-}
+ spdev = kzalloc(sizeof(*spdev), GFP_KERNEL);
+ if (!spdev) {
+ ret = -ENOMEM;
+ goto err;
+ }
-static int scif_peer_dev_remove(struct device *d)
-{
- struct scif_peer_dev *dev = dev_to_scif_peer(d);
- struct scif_peer_driver *drv = drv_to_scif_peer(dev->dev.driver);
+ spdev->dev.parent = scifdev->sdev->dev.parent;
+ spdev->dev.release = scif_peer_release_dev;
+ spdev->dnode = scifdev->node;
+ spdev->dev.bus = &scif_peer_bus;
+ dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode);
+
+ device_initialize(&spdev->dev);
+ get_device(&spdev->dev);
+ rcu_assign_pointer(scifdev->spdev, spdev);
- drv->remove(dev);
+ mutex_lock(&scif_info.conflock);
+ scif_info.total++;
+ scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid);
+ mutex_unlock(&scif_info.conflock);
return 0;
+err:
+ dev_err(&scifdev->sdev->dev,
+ "dnode %d: initialize_device rc %d\n", scifdev->node, ret);
+ return ret;
}
-static struct bus_type scif_peer_bus = {
- .name = "scif_peer_bus",
- .match = scif_peer_dev_match,
- .probe = scif_peer_dev_probe,
- .remove = scif_peer_dev_remove,
-};
-
-int scif_peer_register_driver(struct scif_peer_driver *driver)
+static int scif_peer_add_device(struct scif_dev *scifdev)
{
- driver->driver.bus = &scif_peer_bus;
- return driver_register(&driver->driver);
+ struct scif_peer_dev *spdev = rcu_dereference(scifdev->spdev);
+ char pool_name[16];
+ int ret;
+
+ ret = device_add(&spdev->dev);
+ put_device(&spdev->dev);
+ if (ret) {
+ dev_err(&scifdev->sdev->dev,
+ "dnode %d: peer device_add failed\n", scifdev->node);
+ goto put_spdev;
+ }
+
+ scnprintf(pool_name, sizeof(pool_name), "scif-%d", spdev->dnode);
+ scifdev->signal_pool = dmam_pool_create(pool_name, &scifdev->sdev->dev,
+ sizeof(struct scif_status), 1,
+ 0);
+ if (!scifdev->signal_pool) {
+ dev_err(&scifdev->sdev->dev,
+ "dnode %d: dmam_pool_create failed\n", scifdev->node);
+ ret = -ENOMEM;
+ goto del_spdev;
+ }
+ dev_dbg(&spdev->dev, "Added peer dnode %d\n", spdev->dnode);
+ return 0;
+del_spdev:
+ device_del(&spdev->dev);
+put_spdev:
+ RCU_INIT_POINTER(scifdev->spdev, NULL);
+ synchronize_rcu();
+ put_device(&spdev->dev);
+
+ mutex_lock(&scif_info.conflock);
+ scif_info.total--;
+ mutex_unlock(&scif_info.conflock);
+ return ret;
}
-void scif_peer_unregister_driver(struct scif_peer_driver *driver)
+void scif_add_peer_device(struct work_struct *work)
{
- driver_unregister(&driver->driver);
+ struct scif_dev *scifdev = container_of(work, struct scif_dev,
+ peer_add_work);
+
+ scif_peer_add_device(scifdev);
}
-static void scif_peer_release_dev(struct device *d)
+/*
+ * Peer device registration is split into a device_initialize and a device_add.
+ * The reason for doing this is as follows: First, peer device registration
+ * itself cannot be done in the message processing thread and must be delegated
+ * to another workqueue, otherwise if SCIF client probe, called during peer
+ * device registration, calls scif_connect(..), it will block the message
+ * processing thread causing a deadlock. Next, device_initialize is done in the
+ * "top-half" message processing thread and device_add in the "bottom-half"
+ * workqueue. If this is not done, SCIF_CNCT_REQ message processing executing
+ * concurrently with SCIF_INIT message processing is unable to get a reference
+ * on the peer device, thereby failing the connect request.
+ */
+void scif_peer_register_device(struct scif_dev *scifdev)
{
- struct scif_peer_dev *sdev = dev_to_scif_peer(d);
- struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+ int ret;
- scif_cleanup_scifdev(scifdev);
- kfree(sdev);
+ mutex_lock(&scifdev->lock);
+ ret = scif_peer_initialize_device(scifdev);
+ if (ret)
+ goto exit;
+ schedule_work(&scifdev->peer_add_work);
+exit:
+ mutex_unlock(&scifdev->lock);
}
-struct scif_peer_dev *
-scif_peer_register_device(struct scif_dev *scifdev)
+int scif_peer_unregister_device(struct scif_dev *scifdev)
{
- int ret;
struct scif_peer_dev *spdev;
- spdev = kzalloc(sizeof(*spdev), GFP_KERNEL);
- if (!spdev)
- return ERR_PTR(-ENOMEM);
-
- spdev->dev.parent = scifdev->sdev->dev.parent;
- spdev->dev.release = scif_peer_release_dev;
- spdev->dnode = scifdev->node;
- spdev->dev.bus = &scif_peer_bus;
+ mutex_lock(&scifdev->lock);
+ /* Flush work to ensure device register is complete */
+ flush_work(&scifdev->peer_add_work);
- dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode);
/*
- * device_register() causes the bus infrastructure to look for a
- * matching driver.
+ * Continue holding scifdev->lock since theoretically unregister_device
+ * can be called simultaneously from multiple threads
*/
- ret = device_register(&spdev->dev);
- if (ret)
- goto free_spdev;
- return spdev;
-free_spdev:
- kfree(spdev);
- return ERR_PTR(ret);
-}
-
-void scif_peer_unregister_device(struct scif_peer_dev *sdev)
-{
- device_unregister(&sdev->dev);
+ spdev = rcu_dereference(scifdev->spdev);
+ if (!spdev) {
+ mutex_unlock(&scifdev->lock);
+ return -ENODEV;
+ }
+
+ RCU_INIT_POINTER(scifdev->spdev, NULL);
+ synchronize_rcu();
+ mutex_unlock(&scifdev->lock);
+
+ dev_dbg(&spdev->dev, "Removing peer dnode %d\n", spdev->dnode);
+ device_unregister(&spdev->dev);
+
+ mutex_lock(&scif_info.conflock);
+ scif_info.total--;
+ mutex_unlock(&scif_info.conflock);
+ return 0;
}
int scif_peer_bus_init(void)
diff --git a/drivers/misc/mic/scif/scif_peer_bus.h b/drivers/misc/mic/scif/scif_peer_bus.h
index 33f0dbb30152..a3b8dd2edaa5 100644
--- a/drivers/misc/mic/scif/scif_peer_bus.h
+++ b/drivers/misc/mic/scif/scif_peer_bus.h
@@ -19,47 +19,13 @@
#include <linux/device.h>
#include <linux/mic_common.h>
-
-/*
- * Peer devices show up as PCIe devices for the mgmt node but not the cards.
- * The mgmt node discovers all the cards on the PCIe bus and informs the other
- * cards about their peers. Upon notification of a peer a node adds a peer
- * device to the peer bus to maintain symmetry in the way devices are
- * discovered across all nodes in the SCIF network.
- */
-/**
- * scif_peer_dev - representation of a peer SCIF device
- * @dev: underlying device
- * @dnode - The destination node which this device will communicate with.
- */
-struct scif_peer_dev {
- struct device dev;
- u8 dnode;
-};
-
-/**
- * scif_peer_driver - operations for a scif_peer I/O driver
- * @driver: underlying device driver (populate name and owner).
- * @id_table: the ids serviced by this driver.
- * @probe: the function to call when a device is found. Returns 0 or -errno.
- * @remove: the function to call when a device is removed.
- */
-struct scif_peer_driver {
- struct device_driver driver;
- const struct scif_peer_dev_id *id_table;
-
- int (*probe)(struct scif_peer_dev *dev);
- void (*remove)(struct scif_peer_dev *dev);
-};
+#include <linux/scif.h>
struct scif_dev;
-int scif_peer_register_driver(struct scif_peer_driver *driver);
-void scif_peer_unregister_driver(struct scif_peer_driver *driver);
-
-struct scif_peer_dev *scif_peer_register_device(struct scif_dev *sdev);
-void scif_peer_unregister_device(struct scif_peer_dev *sdev);
-
+void scif_add_peer_device(struct work_struct *work);
+void scif_peer_register_device(struct scif_dev *sdev);
+int scif_peer_unregister_device(struct scif_dev *scifdev);
int scif_peer_bus_init(void);
void scif_peer_bus_exit(void);
#endif /* _SCIF_PEER_BUS_H */
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
new file mode 100644
index 000000000000..8310b4dbff06
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -0,0 +1,1775 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/dma_remapping.h>
+#include <linux/pagemap.h>
+#include "scif_main.h"
+#include "scif_map.h"
+
+/* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
+#define SCIF_MAP_ULIMIT 0x40
+
+bool scif_ulimit_check = 1;
+
+/**
+ * scif_rma_ep_init:
+ * @ep: end point
+ *
+ * Initialize RMA per EP data structures.
+ */
+void scif_rma_ep_init(struct scif_endpt *ep)
+{
+ struct scif_endpt_rma_info *rma = &ep->rma_info;
+
+ mutex_init(&rma->rma_lock);
+ init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN,
+ SCIF_DMA_64BIT_PFN);
+ spin_lock_init(&rma->tc_lock);
+ mutex_init(&rma->mmn_lock);
+ INIT_LIST_HEAD(&rma->reg_list);
+ INIT_LIST_HEAD(&rma->remote_reg_list);
+ atomic_set(&rma->tw_refcount, 0);
+ atomic_set(&rma->tcw_refcount, 0);
+ atomic_set(&rma->tcw_total_pages, 0);
+ atomic_set(&rma->fence_refcount, 0);
+
+ rma->async_list_del = 0;
+ rma->dma_chan = NULL;
+ INIT_LIST_HEAD(&rma->mmn_list);
+ INIT_LIST_HEAD(&rma->vma_list);
+ init_waitqueue_head(&rma->markwq);
+}
+
+/**
+ * scif_rma_ep_can_uninit:
+ * @ep: end point
+ *
+ * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
+ */
+int scif_rma_ep_can_uninit(struct scif_endpt *ep)
+{
+ int ret = 0;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ /* Destroy RMA Info only if both lists are empty */
+ if (list_empty(&ep->rma_info.reg_list) &&
+ list_empty(&ep->rma_info.remote_reg_list) &&
+ list_empty(&ep->rma_info.mmn_list) &&
+ !atomic_read(&ep->rma_info.tw_refcount) &&
+ !atomic_read(&ep->rma_info.tcw_refcount) &&
+ !atomic_read(&ep->rma_info.fence_refcount))
+ ret = 1;
+ mutex_unlock(&ep->rma_info.rma_lock);
+ return ret;
+}
+
+/**
+ * scif_create_pinned_pages:
+ * @nr_pages: number of pages in window
+ * @prot: read/write protection
+ *
+ * Allocate and prepare a set of pinned pages.
+ */
+static struct scif_pinned_pages *
+scif_create_pinned_pages(int nr_pages, int prot)
+{
+ struct scif_pinned_pages *pin;
+
+ might_sleep();
+ pin = scif_zalloc(sizeof(*pin));
+ if (!pin)
+ goto error;
+
+ pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
+ if (!pin->pages)
+ goto error_free_pinned_pages;
+
+ pin->prot = prot;
+ pin->magic = SCIFEP_MAGIC;
+ return pin;
+
+error_free_pinned_pages:
+ scif_free(pin, sizeof(*pin));
+error:
+ return NULL;
+}
+
+/**
+ * scif_destroy_pinned_pages:
+ * @pin: A set of pinned pages.
+ *
+ * Deallocate resources for pinned pages.
+ */
+static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
+{
+ int j;
+ int writeable = pin->prot & SCIF_PROT_WRITE;
+ int kernel = SCIF_MAP_KERNEL & pin->map_flags;
+
+ for (j = 0; j < pin->nr_pages; j++) {
+ if (pin->pages[j] && !kernel) {
+ if (writeable)
+ SetPageDirty(pin->pages[j]);
+ put_page(pin->pages[j]);
+ }
+ }
+
+ scif_free(pin->pages,
+ pin->nr_pages * sizeof(*pin->pages));
+ scif_free(pin, sizeof(*pin));
+ return 0;
+}
+
+/*
+ * scif_create_window:
+ * @ep: end point
+ * @nr_pages: number of pages
+ * @offset: registration offset
+ * @temp: true if a temporary window is being created
+ *
+ * Allocate and prepare a self registration window.
+ */
+struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
+ s64 offset, bool temp)
+{
+ struct scif_window *window;
+
+ might_sleep();
+ window = scif_zalloc(sizeof(*window));
+ if (!window)
+ goto error;
+
+ window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
+ if (!window->dma_addr)
+ goto error_free_window;
+
+ window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
+ if (!window->num_pages)
+ goto error_free_window;
+
+ window->offset = offset;
+ window->ep = (u64)ep;
+ window->magic = SCIFEP_MAGIC;
+ window->reg_state = OP_IDLE;
+ init_waitqueue_head(&window->regwq);
+ window->unreg_state = OP_IDLE;
+ init_waitqueue_head(&window->unregwq);
+ INIT_LIST_HEAD(&window->list);
+ window->type = SCIF_WINDOW_SELF;
+ window->temp = temp;
+ return window;
+
+error_free_window:
+ scif_free(window->dma_addr,
+ nr_pages * sizeof(*window->dma_addr));
+ scif_free(window, sizeof(*window));
+error:
+ return NULL;
+}
+
+/**
+ * scif_destroy_incomplete_window:
+ * @ep: end point
+ * @window: registration window
+ *
+ * Deallocate resources for self window.
+ */
+static void scif_destroy_incomplete_window(struct scif_endpt *ep,
+ struct scif_window *window)
+{
+ int err;
+ int nr_pages = window->nr_pages;
+ struct scif_allocmsg *alloc = &window->alloc_handle;
+ struct scifmsg msg;
+
+retry:
+ /* Wait for a SCIF_ALLOC_GNT/REJ message */
+ err = wait_event_timeout(alloc->allocwq,
+ alloc->state != OP_IN_PROGRESS,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ if (!err && scifdev_alive(ep))
+ goto retry;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ if (alloc->state == OP_COMPLETED) {
+ msg.uop = SCIF_FREE_VIRT;
+ msg.src = ep->port;
+ msg.payload[0] = ep->remote_ep;
+ msg.payload[1] = window->alloc_handle.vaddr;
+ msg.payload[2] = (u64)window;
+ msg.payload[3] = SCIF_REGISTER;
+ _scif_nodeqp_send(ep->remote_dev, &msg);
+ }
+ mutex_unlock(&ep->rma_info.rma_lock);
+
+ scif_free_window_offset(ep, window, window->offset);
+ scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
+ scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
+ scif_free(window, sizeof(*window));
+}
+
+/**
+ * scif_unmap_window:
+ * @remote_dev: SCIF remote device
+ * @window: registration window
+ *
+ * Delete any DMA mappings created for a registered self window
+ */
+void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
+{
+ int j;
+
+ if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
+ if (window->st) {
+ dma_unmap_sg(&remote_dev->sdev->dev,
+ window->st->sgl, window->st->nents,
+ DMA_BIDIRECTIONAL);
+ sg_free_table(window->st);
+ kfree(window->st);
+ window->st = NULL;
+ }
+ } else {
+ for (j = 0; j < window->nr_contig_chunks; j++) {
+ if (window->dma_addr[j]) {
+ scif_unmap_single(window->dma_addr[j],
+ remote_dev,
+ window->num_pages[j] <<
+ PAGE_SHIFT);
+ window->dma_addr[j] = 0x0;
+ }
+ }
+ }
+}
+
+static inline struct mm_struct *__scif_acquire_mm(void)
+{
+ if (scif_ulimit_check)
+ return get_task_mm(current);
+ return NULL;
+}
+
+static inline void __scif_release_mm(struct mm_struct *mm)
+{
+ if (mm)
+ mmput(mm);
+}
+
+static inline int
+__scif_dec_pinned_vm_lock(struct mm_struct *mm,
+ int nr_pages, bool try_lock)
+{
+ if (!mm || !nr_pages || !scif_ulimit_check)
+ return 0;
+ if (try_lock) {
+ if (!down_write_trylock(&mm->mmap_sem)) {
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err\n", __func__, __LINE__);
+ return -1;
+ }
+ } else {
+ down_write(&mm->mmap_sem);
+ }
+ mm->pinned_vm -= nr_pages;
+ up_write(&mm->mmap_sem);
+ return 0;
+}
+
+static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
+ int nr_pages)
+{
+ unsigned long locked, lock_limit;
+
+ if (!mm || !nr_pages || !scif_ulimit_check)
+ return 0;
+
+ locked = nr_pages;
+ locked += mm->pinned_vm;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ dev_err(scif_info.mdev.this_device,
+ "locked(%lu) > lock_limit(%lu)\n",
+ locked, lock_limit);
+ return -ENOMEM;
+ }
+ mm->pinned_vm = locked;
+ return 0;
+}
+
+/**
+ * scif_destroy_window:
+ * @ep: end point
+ * @window: registration window
+ *
+ * Deallocate resources for self window.
+ */
+int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
+{
+ int j;
+ struct scif_pinned_pages *pinned_pages = window->pinned_pages;
+ int nr_pages = window->nr_pages;
+
+ might_sleep();
+ if (!window->temp && window->mm) {
+ __scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0);
+ __scif_release_mm(window->mm);
+ window->mm = NULL;
+ }
+
+ scif_free_window_offset(ep, window, window->offset);
+ scif_unmap_window(ep->remote_dev, window);
+ /*
+ * Decrement references for this set of pinned pages from
+ * this window.
+ */
+ j = atomic_sub_return(1, &pinned_pages->ref_count);
+ if (j < 0)
+ dev_err(scif_info.mdev.this_device,
+ "%s %d incorrect ref count %d\n",
+ __func__, __LINE__, j);
+ /*
+ * If the ref count for pinned_pages is zero then someone
+ * has already called scif_unpin_pages() for it and we should
+ * destroy the page cache.
+ */
+ if (!j)
+ scif_destroy_pinned_pages(window->pinned_pages);
+ scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
+ scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
+ window->magic = 0;
+ scif_free(window, sizeof(*window));
+ return 0;
+}
+
+/**
+ * scif_create_remote_lookup:
+ * @remote_dev: SCIF remote device
+ * @window: remote window
+ *
+ * Allocate and prepare lookup entries for the remote
+ * end to copy over the physical addresses.
+ * Returns 0 on success and appropriate errno on failure.
+ */
+static int scif_create_remote_lookup(struct scif_dev *remote_dev,
+ struct scif_window *window)
+{
+ int i, j, err = 0;
+ int nr_pages = window->nr_pages;
+ bool vmalloc_dma_phys, vmalloc_num_pages;
+
+ might_sleep();
+ /* Map window */
+ err = scif_map_single(&window->mapped_offset,
+ window, remote_dev, sizeof(*window));
+ if (err)
+ goto error_window;
+
+ /* Compute the number of lookup entries. 21 == 2MB Shift */
+ window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
+ ((2) * 1024 * 1024)) >> 21;
+
+ window->dma_addr_lookup.lookup =
+ scif_alloc_coherent(&window->dma_addr_lookup.offset,
+ remote_dev, window->nr_lookup *
+ sizeof(*window->dma_addr_lookup.lookup),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!window->dma_addr_lookup.lookup) {
+ err = -ENOMEM;
+ goto error_window;
+ }
+
+ window->num_pages_lookup.lookup =
+ scif_alloc_coherent(&window->num_pages_lookup.offset,
+ remote_dev, window->nr_lookup *
+ sizeof(*window->num_pages_lookup.lookup),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!window->num_pages_lookup.lookup) {
+ err = -ENOMEM;
+ goto error_window;
+ }
+
+ vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
+ vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
+
+ /* Now map each of the pages containing physical addresses */
+ for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
+ err = scif_map_page(&window->dma_addr_lookup.lookup[j],
+ vmalloc_dma_phys ?
+ vmalloc_to_page(&window->dma_addr[i]) :
+ virt_to_page(&window->dma_addr[i]),
+ remote_dev);
+ if (err)
+ goto error_window;
+ err = scif_map_page(&window->num_pages_lookup.lookup[j],
+ vmalloc_dma_phys ?
+ vmalloc_to_page(&window->num_pages[i]) :
+ virt_to_page(&window->num_pages[i]),
+ remote_dev);
+ if (err)
+ goto error_window;
+ }
+ return 0;
+error_window:
+ return err;
+}
+
+/**
+ * scif_destroy_remote_lookup:
+ * @remote_dev: SCIF remote device
+ * @window: remote window
+ *
+ * Destroy lookup entries used for the remote
+ * end to copy over the physical addresses.
+ */
+static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
+ struct scif_window *window)
+{
+ int i, j;
+
+ if (window->nr_lookup) {
+ struct scif_rma_lookup *lup = &window->dma_addr_lookup;
+ struct scif_rma_lookup *npup = &window->num_pages_lookup;
+
+ for (i = 0, j = 0; i < window->nr_pages;
+ i += SCIF_NR_ADDR_IN_PAGE, j++) {
+ if (lup->lookup && lup->lookup[j])
+ scif_unmap_single(lup->lookup[j],
+ remote_dev,
+ PAGE_SIZE);
+ if (npup->lookup && npup->lookup[j])
+ scif_unmap_single(npup->lookup[j],
+ remote_dev,
+ PAGE_SIZE);
+ }
+ if (lup->lookup)
+ scif_free_coherent(lup->lookup, lup->offset,
+ remote_dev, window->nr_lookup *
+ sizeof(*lup->lookup));
+ if (npup->lookup)
+ scif_free_coherent(npup->lookup, npup->offset,
+ remote_dev, window->nr_lookup *
+ sizeof(*npup->lookup));
+ if (window->mapped_offset)
+ scif_unmap_single(window->mapped_offset,
+ remote_dev, sizeof(*window));
+ window->nr_lookup = 0;
+ }
+}
+
+/**
+ * scif_create_remote_window:
+ * @ep: end point
+ * @nr_pages: number of pages in window
+ *
+ * Allocate and prepare a remote registration window.
+ */
+static struct scif_window *
+scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
+{
+ struct scif_window *window;
+
+ might_sleep();
+ window = scif_zalloc(sizeof(*window));
+ if (!window)
+ goto error_ret;
+
+ window->magic = SCIFEP_MAGIC;
+ window->nr_pages = nr_pages;
+
+ window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
+ if (!window->dma_addr)
+ goto error_window;
+
+ window->num_pages = scif_zalloc(nr_pages *
+ sizeof(*window->num_pages));
+ if (!window->num_pages)
+ goto error_window;
+
+ if (scif_create_remote_lookup(scifdev, window))
+ goto error_window;
+
+ window->type = SCIF_WINDOW_PEER;
+ window->unreg_state = OP_IDLE;
+ INIT_LIST_HEAD(&window->list);
+ return window;
+error_window:
+ scif_destroy_remote_window(window);
+error_ret:
+ return NULL;
+}
+
+/**
+ * scif_destroy_remote_window:
+ * @ep: end point
+ * @window: remote registration window
+ *
+ * Deallocate resources for remote window.
+ */
+void
+scif_destroy_remote_window(struct scif_window *window)
+{
+ scif_free(window->dma_addr, window->nr_pages *
+ sizeof(*window->dma_addr));
+ scif_free(window->num_pages, window->nr_pages *
+ sizeof(*window->num_pages));
+ window->magic = 0;
+ scif_free(window, sizeof(*window));
+}
+
+/**
+ * scif_iommu_map: create DMA mappings if the IOMMU is enabled
+ * @remote_dev: SCIF remote device
+ * @window: remote registration window
+ *
+ * Map the physical pages using dma_map_sg(..) and then detect the number
+ * of contiguous DMA mappings allocated
+ */
+static int scif_iommu_map(struct scif_dev *remote_dev,
+ struct scif_window *window)
+{
+ struct scatterlist *sg;
+ int i, err;
+ scif_pinned_pages_t pin = window->pinned_pages;
+
+ window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
+ if (!window->st)
+ return -ENOMEM;
+
+ err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
+ if (err)
+ return err;
+
+ for_each_sg(window->st->sgl, sg, window->st->nents, i)
+ sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
+
+ err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
+ window->st->nents, DMA_BIDIRECTIONAL);
+ if (!err)
+ return -ENOMEM;
+ /* Detect contiguous ranges of DMA mappings */
+ sg = window->st->sgl;
+ for (i = 0; sg; i++) {
+ dma_addr_t last_da;
+
+ window->dma_addr[i] = sg_dma_address(sg);
+ window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
+ last_da = sg_dma_address(sg) + sg_dma_len(sg);
+ while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
+ window->num_pages[i] +=
+ (sg_dma_len(sg) >> PAGE_SHIFT);
+ last_da = window->dma_addr[i] +
+ sg_dma_len(sg);
+ }
+ window->nr_contig_chunks++;
+ }
+ return 0;
+}
+
+/**
+ * scif_map_window:
+ * @remote_dev: SCIF remote device
+ * @window: self registration window
+ *
+ * Map pages of a window into the aperture/PCI.
+ * Also determine addresses required for DMA.
+ */
+int
+scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
+{
+ int i, j, k, err = 0, nr_contig_pages;
+ scif_pinned_pages_t pin;
+ phys_addr_t phys_prev, phys_curr;
+
+ might_sleep();
+
+ pin = window->pinned_pages;
+
+ if (intel_iommu_enabled && !scifdev_self(remote_dev))
+ return scif_iommu_map(remote_dev, window);
+
+ for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
+ phys_prev = page_to_phys(pin->pages[i]);
+ nr_contig_pages = 1;
+
+ /* Detect physically contiguous chunks */
+ for (k = i + 1; k < window->nr_pages; k++) {
+ phys_curr = page_to_phys(pin->pages[k]);
+ if (phys_curr != (phys_prev + PAGE_SIZE))
+ break;
+ phys_prev = phys_curr;
+ nr_contig_pages++;
+ }
+ window->num_pages[j] = nr_contig_pages;
+ window->nr_contig_chunks++;
+ if (scif_is_mgmt_node()) {
+ /*
+ * Management node has to deal with SMPT on X100 and
+ * hence the DMA mapping is required
+ */
+ err = scif_map_single(&window->dma_addr[j],
+ phys_to_virt(page_to_phys(
+ pin->pages[i])),
+ remote_dev,
+ nr_contig_pages << PAGE_SHIFT);
+ if (err)
+ return err;
+ } else {
+ window->dma_addr[j] = page_to_phys(pin->pages[i]);
+ }
+ }
+ return err;
+}
+
+/**
+ * scif_send_scif_unregister:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a SCIF_UNREGISTER message.
+ */
+static int scif_send_scif_unregister(struct scif_endpt *ep,
+ struct scif_window *window)
+{
+ struct scifmsg msg;
+
+ msg.uop = SCIF_UNREGISTER;
+ msg.src = ep->port;
+ msg.payload[0] = window->alloc_handle.vaddr;
+ msg.payload[1] = (u64)window;
+ return scif_nodeqp_send(ep->remote_dev, &msg);
+}
+
+/**
+ * scif_unregister_window:
+ * @window: self registration window
+ *
+ * Send an unregistration request and wait for a response.
+ */
+int scif_unregister_window(struct scif_window *window)
+{
+ int err = 0;
+ struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+ bool send_msg = false;
+
+ might_sleep();
+ switch (window->unreg_state) {
+ case OP_IDLE:
+ {
+ window->unreg_state = OP_IN_PROGRESS;
+ send_msg = true;
+ /* fall through */
+ }
+ case OP_IN_PROGRESS:
+ {
+ scif_get_window(window, 1);
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if (send_msg) {
+ err = scif_send_scif_unregister(ep, window);
+ if (err) {
+ window->unreg_state = OP_COMPLETED;
+ goto done;
+ }
+ } else {
+ /* Return ENXIO since unregistration is in progress */
+ mutex_lock(&ep->rma_info.rma_lock);
+ return -ENXIO;
+ }
+retry:
+ /* Wait for a SCIF_UNREGISTER_(N)ACK message */
+ err = wait_event_timeout(window->unregwq,
+ window->unreg_state != OP_IN_PROGRESS,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ if (!err && scifdev_alive(ep))
+ goto retry;
+ if (!err) {
+ err = -ENODEV;
+ window->unreg_state = OP_COMPLETED;
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ }
+ if (err > 0)
+ err = 0;
+done:
+ mutex_lock(&ep->rma_info.rma_lock);
+ scif_put_window(window, 1);
+ break;
+ }
+ case OP_FAILED:
+ {
+ if (!scifdev_alive(ep)) {
+ err = -ENODEV;
+ window->unreg_state = OP_COMPLETED;
+ }
+ break;
+ }
+ case OP_COMPLETED:
+ break;
+ default:
+ err = -ENODEV;
+ }
+
+ if (window->unreg_state == OP_COMPLETED && window->ref_count)
+ scif_put_window(window, window->nr_pages);
+
+ if (!window->ref_count) {
+ atomic_inc(&ep->rma_info.tw_refcount);
+ list_del_init(&window->list);
+ scif_free_window_offset(ep, window, window->offset);
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
+ scifdev_alive(ep)) {
+ scif_drain_dma_intr(ep->remote_dev->sdev,
+ ep->rma_info.dma_chan);
+ } else {
+ if (!__scif_dec_pinned_vm_lock(window->mm,
+ window->nr_pages, 1)) {
+ __scif_release_mm(window->mm);
+ window->mm = NULL;
+ }
+ }
+ scif_queue_for_cleanup(window, &scif_info.rma);
+ mutex_lock(&ep->rma_info.rma_lock);
+ }
+ return err;
+}
+
+/**
+ * scif_send_alloc_request:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a remote window allocation request
+ */
+static int scif_send_alloc_request(struct scif_endpt *ep,
+ struct scif_window *window)
+{
+ struct scifmsg msg;
+ struct scif_allocmsg *alloc = &window->alloc_handle;
+
+ /* Set up the Alloc Handle */
+ alloc->state = OP_IN_PROGRESS;
+ init_waitqueue_head(&alloc->allocwq);
+
+ /* Send out an allocation request */
+ msg.uop = SCIF_ALLOC_REQ;
+ msg.payload[1] = window->nr_pages;
+ msg.payload[2] = (u64)&window->alloc_handle;
+ return _scif_nodeqp_send(ep->remote_dev, &msg);
+}
+
+/**
+ * scif_prep_remote_window:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a remote window allocation request, wait for an allocation response,
+ * and prepares the remote window by copying over the page lists
+ */
+static int scif_prep_remote_window(struct scif_endpt *ep,
+ struct scif_window *window)
+{
+ struct scifmsg msg;
+ struct scif_window *remote_window;
+ struct scif_allocmsg *alloc = &window->alloc_handle;
+ dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
+ int i = 0, j = 0;
+ int nr_contig_chunks, loop_nr_contig_chunks;
+ int remaining_nr_contig_chunks, nr_lookup;
+ int err, map_err;
+
+ map_err = scif_map_window(ep->remote_dev, window);
+ if (map_err)
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d map_err %d\n", __func__, __LINE__, map_err);
+ remaining_nr_contig_chunks = window->nr_contig_chunks;
+ nr_contig_chunks = window->nr_contig_chunks;
+retry:
+ /* Wait for a SCIF_ALLOC_GNT/REJ message */
+ err = wait_event_timeout(alloc->allocwq,
+ alloc->state != OP_IN_PROGRESS,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ mutex_lock(&ep->rma_info.rma_lock);
+ /* Synchronize with the thread waking up allocwq */
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if (!err && scifdev_alive(ep))
+ goto retry;
+
+ if (!err)
+ err = -ENODEV;
+
+ if (err > 0)
+ err = 0;
+ else
+ return err;
+
+ /* Bail out. The remote end rejected this request */
+ if (alloc->state == OP_FAILED)
+ return -ENOMEM;
+
+ if (map_err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, map_err);
+ msg.uop = SCIF_FREE_VIRT;
+ msg.src = ep->port;
+ msg.payload[0] = ep->remote_ep;
+ msg.payload[1] = window->alloc_handle.vaddr;
+ msg.payload[2] = (u64)window;
+ msg.payload[3] = SCIF_REGISTER;
+ spin_lock(&ep->lock);
+ if (ep->state == SCIFEP_CONNECTED)
+ err = _scif_nodeqp_send(ep->remote_dev, &msg);
+ else
+ err = -ENOTCONN;
+ spin_unlock(&ep->lock);
+ return err;
+ }
+
+ remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
+ ep->remote_dev);
+
+ /* Compute the number of lookup entries. 21 == 2MB Shift */
+ nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
+ >> ilog2(SCIF_NR_ADDR_IN_PAGE);
+
+ dma_phys_lookup =
+ scif_ioremap(remote_window->dma_addr_lookup.offset,
+ nr_lookup *
+ sizeof(*remote_window->dma_addr_lookup.lookup),
+ ep->remote_dev);
+ num_pages_lookup =
+ scif_ioremap(remote_window->num_pages_lookup.offset,
+ nr_lookup *
+ sizeof(*remote_window->num_pages_lookup.lookup),
+ ep->remote_dev);
+
+ while (remaining_nr_contig_chunks) {
+ loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
+ (int)SCIF_NR_ADDR_IN_PAGE);
+ /* #1/2 - Copy physical addresses over to the remote side */
+
+ /* #2/2 - Copy DMA addresses (addresses that are fed into the
+ * DMA engine) We transfer bus addresses which are then
+ * converted into a MIC physical address on the remote
+ * side if it is a MIC, if the remote node is a mgmt node we
+ * transfer the MIC physical address
+ */
+ tmp = scif_ioremap(dma_phys_lookup[j],
+ loop_nr_contig_chunks *
+ sizeof(*window->dma_addr),
+ ep->remote_dev);
+ tmp1 = scif_ioremap(num_pages_lookup[j],
+ loop_nr_contig_chunks *
+ sizeof(*window->num_pages),
+ ep->remote_dev);
+ if (scif_is_mgmt_node()) {
+ memcpy_toio((void __force __iomem *)tmp,
+ &window->dma_addr[i], loop_nr_contig_chunks
+ * sizeof(*window->dma_addr));
+ memcpy_toio((void __force __iomem *)tmp1,
+ &window->num_pages[i], loop_nr_contig_chunks
+ * sizeof(*window->num_pages));
+ } else {
+ if (scifdev_is_p2p(ep->remote_dev)) {
+ /*
+ * add remote node's base address for this node
+ * to convert it into a MIC address
+ */
+ int m;
+ dma_addr_t dma_addr;
+
+ for (m = 0; m < loop_nr_contig_chunks; m++) {
+ dma_addr = window->dma_addr[i + m] +
+ ep->remote_dev->base_addr;
+ writeq(dma_addr,
+ (void __force __iomem *)&tmp[m]);
+ }
+ memcpy_toio((void __force __iomem *)tmp1,
+ &window->num_pages[i],
+ loop_nr_contig_chunks
+ * sizeof(*window->num_pages));
+ } else {
+ /* Mgmt node or loopback - transfer DMA
+ * addresses as is, this is the same as a
+ * MIC physical address (we use the dma_addr
+ * and not the phys_addr array since the
+ * phys_addr is only setup if there is a mmap()
+ * request from the mgmt node)
+ */
+ memcpy_toio((void __force __iomem *)tmp,
+ &window->dma_addr[i],
+ loop_nr_contig_chunks *
+ sizeof(*window->dma_addr));
+ memcpy_toio((void __force __iomem *)tmp1,
+ &window->num_pages[i],
+ loop_nr_contig_chunks *
+ sizeof(*window->num_pages));
+ }
+ }
+ remaining_nr_contig_chunks -= loop_nr_contig_chunks;
+ i += loop_nr_contig_chunks;
+ j++;
+ scif_iounmap(tmp, loop_nr_contig_chunks *
+ sizeof(*window->dma_addr), ep->remote_dev);
+ scif_iounmap(tmp1, loop_nr_contig_chunks *
+ sizeof(*window->num_pages), ep->remote_dev);
+ }
+
+ /* Prepare the remote window for the peer */
+ remote_window->peer_window = (u64)window;
+ remote_window->offset = window->offset;
+ remote_window->prot = window->prot;
+ remote_window->nr_contig_chunks = nr_contig_chunks;
+ remote_window->ep = ep->remote_ep;
+ scif_iounmap(num_pages_lookup,
+ nr_lookup *
+ sizeof(*remote_window->num_pages_lookup.lookup),
+ ep->remote_dev);
+ scif_iounmap(dma_phys_lookup,
+ nr_lookup *
+ sizeof(*remote_window->dma_addr_lookup.lookup),
+ ep->remote_dev);
+ scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
+ window->peer_window = alloc->vaddr;
+ return err;
+}
+
+/**
+ * scif_send_scif_register:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a SCIF_REGISTER message if EP is connected and wait for a
+ * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
+ * message so that the peer can free its remote window allocated earlier.
+ */
+static int scif_send_scif_register(struct scif_endpt *ep,
+ struct scif_window *window)
+{
+ int err = 0;
+ struct scifmsg msg;
+
+ msg.src = ep->port;
+ msg.payload[0] = ep->remote_ep;
+ msg.payload[1] = window->alloc_handle.vaddr;
+ msg.payload[2] = (u64)window;
+ spin_lock(&ep->lock);
+ if (ep->state == SCIFEP_CONNECTED) {
+ msg.uop = SCIF_REGISTER;
+ window->reg_state = OP_IN_PROGRESS;
+ err = _scif_nodeqp_send(ep->remote_dev, &msg);
+ spin_unlock(&ep->lock);
+ if (!err) {
+retry:
+ /* Wait for a SCIF_REGISTER_(N)ACK message */
+ err = wait_event_timeout(window->regwq,
+ window->reg_state !=
+ OP_IN_PROGRESS,
+ SCIF_NODE_ALIVE_TIMEOUT);
+ if (!err && scifdev_alive(ep))
+ goto retry;
+ err = !err ? -ENODEV : 0;
+ if (window->reg_state == OP_FAILED)
+ err = -ENOTCONN;
+ }
+ } else {
+ msg.uop = SCIF_FREE_VIRT;
+ msg.payload[3] = SCIF_REGISTER;
+ err = _scif_nodeqp_send(ep->remote_dev, &msg);
+ spin_unlock(&ep->lock);
+ if (!err)
+ err = -ENOTCONN;
+ }
+ return err;
+}
+
+/**
+ * scif_get_window_offset:
+ * @ep: end point descriptor
+ * @flags: flags
+ * @offset: offset hint
+ * @num_pages: number of pages
+ * @out_offset: computed offset returned by reference.
+ *
+ * Compute/Claim a new offset for this EP.
+ */
+int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
+ int num_pages, s64 *out_offset)
+{
+ s64 page_index;
+ struct iova *iova_ptr;
+ int err = 0;
+
+ if (flags & SCIF_MAP_FIXED) {
+ page_index = SCIF_IOVA_PFN(offset);
+ iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
+ page_index + num_pages - 1);
+ if (!iova_ptr)
+ err = -EADDRINUSE;
+ } else {
+ iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
+ SCIF_DMA_63BIT_PFN - 1, 0);
+ if (!iova_ptr)
+ err = -ENOMEM;
+ }
+ if (!err)
+ *out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
+ return err;
+}
+
+/**
+ * scif_free_window_offset:
+ * @ep: end point descriptor
+ * @window: registration window
+ * @offset: Offset to be freed
+ *
+ * Free offset for this EP. The callee is supposed to grab
+ * the RMA mutex before calling this API.
+ */
+void scif_free_window_offset(struct scif_endpt *ep,
+ struct scif_window *window, s64 offset)
+{
+ if ((window && !window->offset_freed) || !window) {
+ free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
+ if (window)
+ window->offset_freed = true;
+ }
+}
+
+/**
+ * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
+ * @msg: Interrupt message
+ *
+ * Remote side is requesting a memory allocation.
+ */
+void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ int err;
+ struct scif_window *window = NULL;
+ int nr_pages = msg->payload[1];
+
+ window = scif_create_remote_window(scifdev, nr_pages);
+ if (!window) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ /* The peer's allocation request is granted */
+ msg->uop = SCIF_ALLOC_GNT;
+ msg->payload[0] = (u64)window;
+ msg->payload[1] = window->mapped_offset;
+ err = scif_nodeqp_send(scifdev, msg);
+ if (err)
+ scif_destroy_remote_window(window);
+ return;
+error:
+ /* The peer's allocation request is rejected */
+ dev_err(&scifdev->sdev->dev,
+ "%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
+ __func__, __LINE__, err, window, nr_pages);
+ msg->uop = SCIF_ALLOC_REJ;
+ scif_nodeqp_send(scifdev, msg);
+}
+
+/**
+ * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
+ * @msg: Interrupt message
+ *
+ * Remote side responded to a memory allocation.
+ */
+void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
+ struct scif_window *window = container_of(handle, struct scif_window,
+ alloc_handle);
+ struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ handle->vaddr = msg->payload[0];
+ handle->phys_addr = msg->payload[1];
+ if (msg->uop == SCIF_ALLOC_GNT)
+ handle->state = OP_COMPLETED;
+ else
+ handle->state = OP_FAILED;
+ wake_up(&handle->allocwq);
+ mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
+ * @msg: Interrupt message
+ *
+ * Free up memory kmalloc'd earlier.
+ */
+void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_window *window = (struct scif_window *)msg->payload[1];
+
+ scif_destroy_remote_window(window);
+}
+
+static void
+scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
+{
+ int j;
+ struct scif_hw_dev *sdev = dev->sdev;
+ phys_addr_t apt_base = 0;
+
+ /*
+ * Add the aperture base if the DMA address is not card relative
+ * since the DMA addresses need to be an offset into the bar
+ */
+ if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
+ sdev->aper && !sdev->card_rel_da)
+ apt_base = sdev->aper->pa;
+ else
+ return;
+
+ for (j = 0; j < window->nr_contig_chunks; j++) {
+ if (window->num_pages[j])
+ window->dma_addr[j] += apt_base;
+ else
+ break;
+ }
+}
+
+/**
+ * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
+ * @msg: Interrupt message
+ *
+ * Update remote window list with a new registered window.
+ */
+void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+ struct scif_window *window =
+ (struct scif_window *)msg->payload[1];
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ spin_lock(&ep->lock);
+ if (ep->state == SCIFEP_CONNECTED) {
+ msg->uop = SCIF_REGISTER_ACK;
+ scif_nodeqp_send(ep->remote_dev, msg);
+ scif_fixup_aper_base(ep->remote_dev, window);
+ /* No further failures expected. Insert new window */
+ scif_insert_window(window, &ep->rma_info.remote_reg_list);
+ } else {
+ msg->uop = SCIF_REGISTER_NACK;
+ scif_nodeqp_send(ep->remote_dev, msg);
+ }
+ spin_unlock(&ep->lock);
+ mutex_unlock(&ep->rma_info.rma_lock);
+ /* free up any lookup resources now that page lists are transferred */
+ scif_destroy_remote_lookup(ep->remote_dev, window);
+ /*
+ * We could not insert the window but we need to
+ * destroy the window.
+ */
+ if (msg->uop == SCIF_REGISTER_NACK)
+ scif_destroy_remote_window(window);
+}
+
+/**
+ * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
+ * @msg: Interrupt message
+ *
+ * Remove window from remote registration list;
+ */
+void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_rma_req req;
+ struct scif_window *window = NULL;
+ struct scif_window *recv_window =
+ (struct scif_window *)msg->payload[0];
+ struct scif_endpt *ep;
+ int del_window = 0;
+
+ ep = (struct scif_endpt *)recv_window->ep;
+ req.out_window = &window;
+ req.offset = recv_window->offset;
+ req.prot = 0;
+ req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
+ req.type = SCIF_WINDOW_FULL;
+ req.head = &ep->rma_info.remote_reg_list;
+ msg->payload[0] = ep->remote_ep;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ /* Does a valid window exist? */
+ if (scif_query_window(&req)) {
+ dev_err(&scifdev->sdev->dev,
+ "%s %d -ENXIO\n", __func__, __LINE__);
+ msg->uop = SCIF_UNREGISTER_ACK;
+ goto error;
+ }
+ if (window) {
+ if (window->ref_count)
+ scif_put_window(window, window->nr_pages);
+ else
+ dev_err(&scifdev->sdev->dev,
+ "%s %d ref count should be +ve\n",
+ __func__, __LINE__);
+ window->unreg_state = OP_COMPLETED;
+ if (!window->ref_count) {
+ msg->uop = SCIF_UNREGISTER_ACK;
+ atomic_inc(&ep->rma_info.tw_refcount);
+ ep->rma_info.async_list_del = 1;
+ list_del_init(&window->list);
+ del_window = 1;
+ } else {
+ /* NACK! There are valid references to this window */
+ msg->uop = SCIF_UNREGISTER_NACK;
+ }
+ } else {
+ /* The window did not make its way to the list at all. ACK */
+ msg->uop = SCIF_UNREGISTER_ACK;
+ scif_destroy_remote_window(recv_window);
+ }
+error:
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if (del_window)
+ scif_drain_dma_intr(ep->remote_dev->sdev,
+ ep->rma_info.dma_chan);
+ scif_nodeqp_send(ep->remote_dev, msg);
+ if (del_window)
+ scif_queue_for_cleanup(window, &scif_info.rma);
+}
+
+/**
+ * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
+ * @msg: Interrupt message
+ *
+ * Wake up the window waiting to complete registration.
+ */
+void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_window *window =
+ (struct scif_window *)msg->payload[2];
+ struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ window->reg_state = OP_COMPLETED;
+ wake_up(&window->regwq);
+ mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
+ * @msg: Interrupt message
+ *
+ * Wake up the window waiting to inform it that registration
+ * cannot be completed.
+ */
+void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_window *window =
+ (struct scif_window *)msg->payload[2];
+ struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ window->reg_state = OP_FAILED;
+ wake_up(&window->regwq);
+ mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
+ * @msg: Interrupt message
+ *
+ * Wake up the window waiting to complete unregistration.
+ */
+void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_window *window =
+ (struct scif_window *)msg->payload[1];
+ struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ window->unreg_state = OP_COMPLETED;
+ wake_up(&window->unregwq);
+ mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
+ * @msg: Interrupt message
+ *
+ * Wake up the window waiting to inform it that unregistration
+ * cannot be completed immediately.
+ */
+void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+ struct scif_window *window =
+ (struct scif_window *)msg->payload[1];
+ struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ window->unreg_state = OP_FAILED;
+ wake_up(&window->unregwq);
+ mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+int __scif_pin_pages(void *addr, size_t len, int *out_prot,
+ int map_flags, scif_pinned_pages_t *pages)
+{
+ struct scif_pinned_pages *pinned_pages;
+ int nr_pages, err = 0, i;
+ bool vmalloc_addr = false;
+ bool try_upgrade = false;
+ int prot = *out_prot;
+ int ulimit = 0;
+ struct mm_struct *mm = NULL;
+
+ /* Unsupported flags */
+ if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
+ return -EINVAL;
+ ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
+
+ /* Unsupported protection requested */
+ if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
+ return -EINVAL;
+
+ /* addr/len must be page aligned. len should be non zero */
+ if (!len ||
+ (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
+ (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
+ return -EINVAL;
+
+ might_sleep();
+
+ nr_pages = len >> PAGE_SHIFT;
+
+ /* Allocate a set of pinned pages */
+ pinned_pages = scif_create_pinned_pages(nr_pages, prot);
+ if (!pinned_pages)
+ return -ENOMEM;
+
+ if (map_flags & SCIF_MAP_KERNEL) {
+ if (is_vmalloc_addr(addr))
+ vmalloc_addr = true;
+
+ for (i = 0; i < nr_pages; i++) {
+ if (vmalloc_addr)
+ pinned_pages->pages[i] =
+ vmalloc_to_page(addr + (i * PAGE_SIZE));
+ else
+ pinned_pages->pages[i] =
+ virt_to_page(addr + (i * PAGE_SIZE));
+ }
+ pinned_pages->nr_pages = nr_pages;
+ pinned_pages->map_flags = SCIF_MAP_KERNEL;
+ } else {
+ /*
+ * SCIF supports registration caching. If a registration has
+ * been requested with read only permissions, then we try
+ * to pin the pages with RW permissions so that a subsequent
+ * transfer with RW permission can hit the cache instead of
+ * invalidating it. If the upgrade fails with RW then we
+ * revert back to R permission and retry
+ */
+ if (prot == SCIF_PROT_READ)
+ try_upgrade = true;
+ prot |= SCIF_PROT_WRITE;
+retry:
+ mm = current->mm;
+ down_write(&mm->mmap_sem);
+ if (ulimit) {
+ err = __scif_check_inc_pinned_vm(mm, nr_pages);
+ if (err) {
+ up_write(&mm->mmap_sem);
+ pinned_pages->nr_pages = 0;
+ goto error_unmap;
+ }
+ }
+
+ pinned_pages->nr_pages = get_user_pages(
+ current,
+ mm,
+ (u64)addr,
+ nr_pages,
+ !!(prot & SCIF_PROT_WRITE),
+ 0,
+ pinned_pages->pages,
+ NULL);
+ up_write(&mm->mmap_sem);
+ if (nr_pages != pinned_pages->nr_pages) {
+ if (try_upgrade) {
+ if (ulimit)
+ __scif_dec_pinned_vm_lock(mm,
+ nr_pages, 0);
+ /* Roll back any pinned pages */
+ for (i = 0; i < pinned_pages->nr_pages; i++) {
+ if (pinned_pages->pages[i])
+ put_page(
+ pinned_pages->pages[i]);
+ }
+ prot &= ~SCIF_PROT_WRITE;
+ try_upgrade = false;
+ goto retry;
+ }
+ }
+ pinned_pages->map_flags = 0;
+ }
+
+ if (pinned_pages->nr_pages < nr_pages) {
+ err = -EFAULT;
+ pinned_pages->nr_pages = nr_pages;
+ goto dec_pinned;
+ }
+
+ *out_prot = prot;
+ atomic_set(&pinned_pages->ref_count, 1);
+ *pages = pinned_pages;
+ return err;
+dec_pinned:
+ if (ulimit)
+ __scif_dec_pinned_vm_lock(mm, nr_pages, 0);
+ /* Something went wrong! Rollback */
+error_unmap:
+ pinned_pages->nr_pages = nr_pages;
+ scif_destroy_pinned_pages(pinned_pages);
+ *pages = NULL;
+ dev_dbg(scif_info.mdev.this_device,
+ "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
+ return err;
+}
+
+int scif_pin_pages(void *addr, size_t len, int prot,
+ int map_flags, scif_pinned_pages_t *pages)
+{
+ return __scif_pin_pages(addr, len, &prot, map_flags, pages);
+}
+EXPORT_SYMBOL_GPL(scif_pin_pages);
+
+int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
+{
+ int err = 0, ret;
+
+ if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
+ return -EINVAL;
+
+ ret = atomic_sub_return(1, &pinned_pages->ref_count);
+ if (ret < 0) {
+ dev_err(scif_info.mdev.this_device,
+ "%s %d scif_unpin_pages called without pinning? rc %d\n",
+ __func__, __LINE__, ret);
+ return -EINVAL;
+ }
+ /*
+ * Destroy the window if the ref count for this set of pinned
+ * pages has dropped to zero. If it is positive then there is
+ * a valid registered window which is backed by these pages and
+ * it will be destroyed once all such windows are unregistered.
+ */
+ if (!ret)
+ err = scif_destroy_pinned_pages(pinned_pages);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_unpin_pages);
+
+static inline void
+scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
+{
+ mutex_lock(&ep->rma_info.rma_lock);
+ scif_insert_window(window, &ep->rma_info.reg_list);
+ mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+off_t scif_register_pinned_pages(scif_epd_t epd,
+ scif_pinned_pages_t pinned_pages,
+ off_t offset, int map_flags)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ s64 computed_offset;
+ struct scif_window *window;
+ int err;
+ size_t len;
+ struct device *spdev;
+
+ /* Unsupported flags */
+ if (map_flags & ~SCIF_MAP_FIXED)
+ return -EINVAL;
+
+ len = pinned_pages->nr_pages << PAGE_SHIFT;
+
+ /*
+ * Offset is not page aligned/negative or offset+len
+ * wraps around with SCIF_MAP_FIXED.
+ */
+ if ((map_flags & SCIF_MAP_FIXED) &&
+ ((ALIGN(offset, PAGE_SIZE) != offset) ||
+ (offset < 0) ||
+ (offset + (off_t)len < offset)))
+ return -EINVAL;
+
+ might_sleep();
+
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+ /*
+ * It is an error to pass pinned_pages to scif_register_pinned_pages()
+ * after calling scif_unpin_pages().
+ */
+ if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
+ return -EINVAL;
+
+ /* Compute the offset for this registration */
+ err = scif_get_window_offset(ep, map_flags, offset,
+ len, &computed_offset);
+ if (err) {
+ atomic_sub(1, &pinned_pages->ref_count);
+ return err;
+ }
+
+ /* Allocate and prepare self registration window */
+ window = scif_create_window(ep, pinned_pages->nr_pages,
+ computed_offset, false);
+ if (!window) {
+ atomic_sub(1, &pinned_pages->ref_count);
+ scif_free_window_offset(ep, NULL, computed_offset);
+ return -ENOMEM;
+ }
+
+ window->pinned_pages = pinned_pages;
+ window->nr_pages = pinned_pages->nr_pages;
+ window->prot = pinned_pages->prot;
+
+ spdev = scif_get_peer_dev(ep->remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ scif_destroy_window(ep, window);
+ return err;
+ }
+ err = scif_send_alloc_request(ep, window);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto error_unmap;
+ }
+
+ /* Prepare the remote registration window */
+ err = scif_prep_remote_window(ep, window);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto error_unmap;
+ }
+
+ /* Tell the peer about the new window */
+ err = scif_send_scif_register(ep, window);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto error_unmap;
+ }
+
+ scif_put_peer_dev(spdev);
+ /* No further failures expected. Insert new window */
+ scif_insert_local_window(window, ep);
+ return computed_offset;
+error_unmap:
+ scif_destroy_window(ep, window);
+ scif_put_peer_dev(spdev);
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
+
+off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
+ int prot, int map_flags)
+{
+ scif_pinned_pages_t pinned_pages;
+ off_t err;
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ s64 computed_offset;
+ struct scif_window *window;
+ struct mm_struct *mm = NULL;
+ struct device *spdev;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
+ epd, addr, len, offset, prot, map_flags);
+ /* Unsupported flags */
+ if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
+ return -EINVAL;
+
+ /*
+ * Offset is not page aligned/negative or offset+len
+ * wraps around with SCIF_MAP_FIXED.
+ */
+ if ((map_flags & SCIF_MAP_FIXED) &&
+ ((ALIGN(offset, PAGE_SIZE) != offset) ||
+ (offset < 0) ||
+ (offset + (off_t)len < offset)))
+ return -EINVAL;
+
+ /* Unsupported protection requested */
+ if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
+ return -EINVAL;
+
+ /* addr/len must be page aligned. len should be non zero */
+ if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
+ (ALIGN(len, PAGE_SIZE) != len))
+ return -EINVAL;
+
+ might_sleep();
+
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+
+ /* Compute the offset for this registration */
+ err = scif_get_window_offset(ep, map_flags, offset,
+ len >> PAGE_SHIFT, &computed_offset);
+ if (err)
+ return err;
+
+ spdev = scif_get_peer_dev(ep->remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ scif_free_window_offset(ep, NULL, computed_offset);
+ return err;
+ }
+ /* Allocate and prepare self registration window */
+ window = scif_create_window(ep, len >> PAGE_SHIFT,
+ computed_offset, false);
+ if (!window) {
+ scif_free_window_offset(ep, NULL, computed_offset);
+ scif_put_peer_dev(spdev);
+ return -ENOMEM;
+ }
+
+ window->nr_pages = len >> PAGE_SHIFT;
+
+ err = scif_send_alloc_request(ep, window);
+ if (err) {
+ scif_destroy_incomplete_window(ep, window);
+ scif_put_peer_dev(spdev);
+ return err;
+ }
+
+ if (!(map_flags & SCIF_MAP_KERNEL)) {
+ mm = __scif_acquire_mm();
+ map_flags |= SCIF_MAP_ULIMIT;
+ }
+ /* Pin down the pages */
+ err = __scif_pin_pages(addr, len, &prot,
+ map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
+ &pinned_pages);
+ if (err) {
+ scif_destroy_incomplete_window(ep, window);
+ __scif_release_mm(mm);
+ goto error;
+ }
+
+ window->pinned_pages = pinned_pages;
+ window->prot = pinned_pages->prot;
+ window->mm = mm;
+
+ /* Prepare the remote registration window */
+ err = scif_prep_remote_window(ep, window);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %ld\n", __func__, __LINE__, err);
+ goto error_unmap;
+ }
+
+ /* Tell the peer about the new window */
+ err = scif_send_scif_register(ep, window);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %ld\n", __func__, __LINE__, err);
+ goto error_unmap;
+ }
+
+ scif_put_peer_dev(spdev);
+ /* No further failures expected. Insert new window */
+ scif_insert_local_window(window, ep);
+ dev_dbg(&ep->remote_dev->sdev->dev,
+ "SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
+ epd, addr, len, computed_offset);
+ return computed_offset;
+error_unmap:
+ scif_destroy_window(ep, window);
+error:
+ scif_put_peer_dev(spdev);
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %ld\n", __func__, __LINE__, err);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_register);
+
+int
+scif_unregister(scif_epd_t epd, off_t offset, size_t len)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct scif_window *window = NULL;
+ struct scif_rma_req req;
+ int nr_pages, err;
+ struct device *spdev;
+
+ dev_dbg(scif_info.mdev.this_device,
+ "SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
+ ep, offset, len);
+ /* len must be page aligned. len should be non zero */
+ if (!len ||
+ (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
+ return -EINVAL;
+
+ /* Offset is not page aligned or offset+len wraps around */
+ if ((ALIGN(offset, PAGE_SIZE) != offset) ||
+ (offset + (off_t)len < offset))
+ return -EINVAL;
+
+ err = scif_verify_epd(ep);
+ if (err)
+ return err;
+
+ might_sleep();
+ nr_pages = len >> PAGE_SHIFT;
+
+ req.out_window = &window;
+ req.offset = offset;
+ req.prot = 0;
+ req.nr_bytes = len;
+ req.type = SCIF_WINDOW_FULL;
+ req.head = &ep->rma_info.reg_list;
+
+ spdev = scif_get_peer_dev(ep->remote_dev);
+ if (IS_ERR(spdev)) {
+ err = PTR_ERR(spdev);
+ return err;
+ }
+ mutex_lock(&ep->rma_info.rma_lock);
+ /* Does a valid window exist? */
+ err = scif_query_window(&req);
+ if (err) {
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+ goto error;
+ }
+ /* Unregister all the windows in this range */
+ err = scif_rma_list_unregister(window, offset, nr_pages);
+ if (err)
+ dev_err(&ep->remote_dev->sdev->dev,
+ "%s %d err %d\n", __func__, __LINE__, err);
+error:
+ mutex_unlock(&ep->rma_info.rma_lock);
+ scif_put_peer_dev(spdev);
+ return err;
+}
+EXPORT_SYMBOL_GPL(scif_unregister);
diff --git a/drivers/misc/mic/scif/scif_rma.h b/drivers/misc/mic/scif/scif_rma.h
new file mode 100644
index 000000000000..fa6722279196
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma.h
@@ -0,0 +1,464 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_RMA_H
+#define SCIF_RMA_H
+
+#include <linux/dma_remapping.h>
+#include <linux/mmu_notifier.h>
+
+#include "../bus/scif_bus.h"
+
+/* If this bit is set then the mark is a remote fence mark */
+#define SCIF_REMOTE_FENCE_BIT 31
+/* Magic value used to indicate a remote fence request */
+#define SCIF_REMOTE_FENCE BIT_ULL(SCIF_REMOTE_FENCE_BIT)
+
+#define SCIF_MAX_UNALIGNED_BUF_SIZE (1024 * 1024ULL)
+#define SCIF_KMEM_UNALIGNED_BUF_SIZE (SCIF_MAX_UNALIGNED_BUF_SIZE + \
+ (L1_CACHE_BYTES << 1))
+
+#define SCIF_IOVA_START_PFN (1)
+#define SCIF_IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
+#define SCIF_DMA_64BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(64))
+#define SCIF_DMA_63BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(63))
+
+/*
+ * struct scif_endpt_rma_info - Per Endpoint Remote Memory Access Information
+ *
+ * @reg_list: List of registration windows for self
+ * @remote_reg_list: List of registration windows for peer
+ * @iovad: Offset generator
+ * @rma_lock: Synchronizes access to self/remote list and also protects the
+ * window from being destroyed while RMAs are in progress.
+ * @tc_lock: Synchronizes access to temporary cached windows list
+ * for SCIF Registration Caching.
+ * @mmn_lock: Synchronizes access to the list of MMU notifiers registered
+ * @tw_refcount: Keeps track of number of outstanding temporary registered
+ * windows created by scif_vreadfrom/scif_vwriteto which have
+ * not been destroyed.
+ * @tcw_refcount: Same as tw_refcount but for temporary cached windows
+ * @tcw_total_pages: Same as tcw_refcount but in terms of pages pinned
+ * @mmn_list: MMU notifier so that we can destroy the windows when required
+ * @fence_refcount: Keeps track of number of outstanding remote fence
+ * requests which have been received by the peer.
+ * @dma_chan: DMA channel used for all DMA transfers for this endpoint.
+ * @async_list_del: Detect asynchronous list entry deletion
+ * @vma_list: List of vmas with remote memory mappings
+ * @markwq: Wait queue used for scif_fence_mark/scif_fence_wait
+*/
+struct scif_endpt_rma_info {
+ struct list_head reg_list;
+ struct list_head remote_reg_list;
+ struct iova_domain iovad;
+ struct mutex rma_lock;
+ spinlock_t tc_lock;
+ struct mutex mmn_lock;
+ atomic_t tw_refcount;
+ atomic_t tcw_refcount;
+ atomic_t tcw_total_pages;
+ struct list_head mmn_list;
+ atomic_t fence_refcount;
+ struct dma_chan *dma_chan;
+ int async_list_del;
+ struct list_head vma_list;
+ wait_queue_head_t markwq;
+};
+
+/*
+ * struct scif_fence_info - used for tracking fence requests
+ *
+ * @state: State of this transfer
+ * @wq: Fences wait on this queue
+ * @dma_mark: Used for storing the DMA mark
+ */
+struct scif_fence_info {
+ enum scif_msg_state state;
+ struct completion comp;
+ int dma_mark;
+};
+
+/*
+ * struct scif_remote_fence_info - used for tracking remote fence requests
+ *
+ * @msg: List of SCIF node QP fence messages
+ * @list: Link to list of remote fence requests
+ */
+struct scif_remote_fence_info {
+ struct scifmsg msg;
+ struct list_head list;
+};
+
+/*
+ * Specifies whether an RMA operation can span across partial windows, a single
+ * window or multiple contiguous windows. Mmaps can span across partial windows.
+ * Unregistration can span across complete windows. scif_get_pages() can span a
+ * single window. A window can also be of type self or peer.
+ */
+enum scif_window_type {
+ SCIF_WINDOW_PARTIAL,
+ SCIF_WINDOW_SINGLE,
+ SCIF_WINDOW_FULL,
+ SCIF_WINDOW_SELF,
+ SCIF_WINDOW_PEER
+};
+
+/* The number of physical addresses that can be stored in a PAGE. */
+#define SCIF_NR_ADDR_IN_PAGE (0x1000 >> 3)
+
+/*
+ * struct scif_rma_lookup - RMA lookup data structure for page list transfers
+ *
+ * Store an array of lookup offsets. Each offset in this array maps
+ * one 4K page containing 512 physical addresses i.e. 2MB. 512 such
+ * offsets in a 4K page will correspond to 1GB of registered address space.
+
+ * @lookup: Array of offsets
+ * @offset: DMA offset of lookup array
+ */
+struct scif_rma_lookup {
+ dma_addr_t *lookup;
+ dma_addr_t offset;
+};
+
+/*
+ * struct scif_pinned_pages - A set of pinned pages obtained with
+ * scif_pin_pages() which could be part of multiple registered
+ * windows across different end points.
+ *
+ * @nr_pages: Number of pages which is defined as a s64 instead of an int
+ * to avoid sign extension with buffers >= 2GB
+ * @prot: read/write protections
+ * @map_flags: Flags specified during the pin operation
+ * @ref_count: Reference count bumped in terms of number of pages
+ * @magic: A magic value
+ * @pages: Array of pointers to struct pages populated with get_user_pages(..)
+ */
+struct scif_pinned_pages {
+ s64 nr_pages;
+ int prot;
+ int map_flags;
+ atomic_t ref_count;
+ u64 magic;
+ struct page **pages;
+};
+
+/*
+ * struct scif_status - Stores DMA status update information
+ *
+ * @src_dma_addr: Source buffer DMA address
+ * @val: src location for value to be written to the destination
+ * @ep: SCIF endpoint
+ */
+struct scif_status {
+ dma_addr_t src_dma_addr;
+ u64 val;
+ struct scif_endpt *ep;
+};
+
+/*
+ * struct scif_window - Registration Window for Self and Remote
+ *
+ * @nr_pages: Number of pages which is defined as a s64 instead of an int
+ * to avoid sign extension with buffers >= 2GB
+ * @nr_contig_chunks: Number of contiguous physical chunks
+ * @prot: read/write protections
+ * @ref_count: reference count in terms of number of pages
+ * @magic: Cookie to detect corruption
+ * @offset: registered offset
+ * @va_for_temp: va address that this window represents
+ * @dma_mark: Used to determine if all DMAs against the window are done
+ * @ep: Pointer to EP. Useful for passing EP around with messages to
+ avoid expensive list traversals.
+ * @list: link to list of windows for the endpoint
+ * @type: self or peer window
+ * @peer_window: Pointer to peer window. Useful for sending messages to peer
+ * without requiring an extra list traversal
+ * @unreg_state: unregistration state
+ * @offset_freed: True if the offset has been freed
+ * @temp: True for temporary windows created via scif_vreadfrom/scif_vwriteto
+ * @mm: memory descriptor for the task_struct which initiated the RMA
+ * @st: scatter gather table for DMA mappings with IOMMU enabled
+ * @pinned_pages: The set of pinned_pages backing this window
+ * @alloc_handle: Handle for sending ALLOC_REQ
+ * @regwq: Wait Queue for an registration (N)ACK
+ * @reg_state: Registration state
+ * @unregwq: Wait Queue for an unregistration (N)ACK
+ * @dma_addr_lookup: Lookup for physical addresses used for DMA
+ * @nr_lookup: Number of entries in lookup
+ * @mapped_offset: Offset used to map the window by the peer
+ * @dma_addr: Array of physical addresses used for Mgmt node & MIC initiated DMA
+ * @num_pages: Array specifying number of pages for each physical address
+ */
+struct scif_window {
+ s64 nr_pages;
+ int nr_contig_chunks;
+ int prot;
+ int ref_count;
+ u64 magic;
+ s64 offset;
+ unsigned long va_for_temp;
+ int dma_mark;
+ u64 ep;
+ struct list_head list;
+ enum scif_window_type type;
+ u64 peer_window;
+ enum scif_msg_state unreg_state;
+ bool offset_freed;
+ bool temp;
+ struct mm_struct *mm;
+ struct sg_table *st;
+ union {
+ struct {
+ struct scif_pinned_pages *pinned_pages;
+ struct scif_allocmsg alloc_handle;
+ wait_queue_head_t regwq;
+ enum scif_msg_state reg_state;
+ wait_queue_head_t unregwq;
+ };
+ struct {
+ struct scif_rma_lookup dma_addr_lookup;
+ struct scif_rma_lookup num_pages_lookup;
+ int nr_lookup;
+ dma_addr_t mapped_offset;
+ };
+ };
+ dma_addr_t *dma_addr;
+ u64 *num_pages;
+} __packed;
+
+/*
+ * scif_mmu_notif - SCIF mmu notifier information
+ *
+ * @mmu_notifier ep_mmu_notifier: MMU notifier operations
+ * @tc_reg_list: List of temp registration windows for self
+ * @mm: memory descriptor for the task_struct which initiated the RMA
+ * @ep: SCIF endpoint
+ * @list: link to list of MMU notifier information
+ */
+struct scif_mmu_notif {
+#ifdef CONFIG_MMU_NOTIFIER
+ struct mmu_notifier ep_mmu_notifier;
+#endif
+ struct list_head tc_reg_list;
+ struct mm_struct *mm;
+ struct scif_endpt *ep;
+ struct list_head list;
+};
+
+enum scif_rma_dir {
+ SCIF_LOCAL_TO_REMOTE,
+ SCIF_REMOTE_TO_LOCAL
+};
+
+extern struct kmem_cache *unaligned_cache;
+/* Initialize RMA for this EP */
+void scif_rma_ep_init(struct scif_endpt *ep);
+/* Check if epd can be uninitialized */
+int scif_rma_ep_can_uninit(struct scif_endpt *ep);
+/* Obtain a new offset. Callee must grab RMA lock */
+int scif_get_window_offset(struct scif_endpt *ep, int flags,
+ s64 offset, int nr_pages, s64 *out_offset);
+/* Free offset. Callee must grab RMA lock */
+void scif_free_window_offset(struct scif_endpt *ep,
+ struct scif_window *window, s64 offset);
+/* Create self registration window */
+struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
+ s64 offset, bool temp);
+/* Destroy self registration window.*/
+int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window);
+void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window);
+/* Map pages of self window to Aperture/PCI */
+int scif_map_window(struct scif_dev *remote_dev,
+ struct scif_window *window);
+/* Unregister a self window */
+int scif_unregister_window(struct scif_window *window);
+/* Destroy remote registration window */
+void
+scif_destroy_remote_window(struct scif_window *window);
+/* remove valid remote memory mappings from process address space */
+void scif_zap_mmaps(int node);
+/* Query if any applications have remote memory mappings */
+bool scif_rma_do_apps_have_mmaps(int node);
+/* Cleanup remote registration lists for zombie endpoints */
+void scif_cleanup_rma_for_zombies(int node);
+/* Reserve a DMA channel for a particular endpoint */
+int scif_reserve_dma_chan(struct scif_endpt *ep);
+/* Setup a DMA mark for an endpoint */
+int _scif_fence_mark(scif_epd_t epd, int *mark);
+int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
+ enum scif_window_type type);
+void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_mmu_notif_handler(struct work_struct *work);
+void scif_rma_handle_remote_fences(void);
+void scif_rma_destroy_windows(void);
+void scif_rma_destroy_tcw_invalid(void);
+int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan);
+
+struct scif_window_iter {
+ s64 offset;
+ int index;
+};
+
+static inline void
+scif_init_window_iter(struct scif_window *window, struct scif_window_iter *iter)
+{
+ iter->offset = window->offset;
+ iter->index = 0;
+}
+
+dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
+ size_t *nr_bytes,
+ struct scif_window_iter *iter);
+static inline
+dma_addr_t __scif_off_to_dma_addr(struct scif_window *window, s64 off)
+{
+ return scif_off_to_dma_addr(window, off, NULL, NULL);
+}
+
+static inline bool scif_unaligned(off_t src_offset, off_t dst_offset)
+{
+ src_offset = src_offset & (L1_CACHE_BYTES - 1);
+ dst_offset = dst_offset & (L1_CACHE_BYTES - 1);
+ return !(src_offset == dst_offset);
+}
+
+/*
+ * scif_zalloc:
+ * @size: Size of the allocation request.
+ *
+ * Helper API which attempts to allocate zeroed pages via
+ * __get_free_pages(..) first and then falls back on
+ * vzalloc(..) if that fails.
+ */
+static inline void *scif_zalloc(size_t size)
+{
+ void *ret = NULL;
+ size_t align = ALIGN(size, PAGE_SIZE);
+
+ if (align && get_order(align) < MAX_ORDER)
+ ret = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(align));
+ return ret ? ret : vzalloc(align);
+}
+
+/*
+ * scif_free:
+ * @addr: Address to be freed.
+ * @size: Size of the allocation.
+ * Helper API which frees memory allocated via scif_zalloc().
+ */
+static inline void scif_free(void *addr, size_t size)
+{
+ size_t align = ALIGN(size, PAGE_SIZE);
+
+ if (is_vmalloc_addr(addr))
+ vfree(addr);
+ else
+ free_pages((unsigned long)addr, get_order(align));
+}
+
+static inline void scif_get_window(struct scif_window *window, int nr_pages)
+{
+ window->ref_count += nr_pages;
+}
+
+static inline void scif_put_window(struct scif_window *window, int nr_pages)
+{
+ window->ref_count -= nr_pages;
+}
+
+static inline void scif_set_window_ref(struct scif_window *window, int nr_pages)
+{
+ window->ref_count = nr_pages;
+}
+
+static inline void
+scif_queue_for_cleanup(struct scif_window *window, struct list_head *list)
+{
+ spin_lock(&scif_info.rmalock);
+ list_add_tail(&window->list, list);
+ spin_unlock(&scif_info.rmalock);
+ schedule_work(&scif_info.misc_work);
+}
+
+static inline void __scif_rma_destroy_tcw_helper(struct scif_window *window)
+{
+ list_del_init(&window->list);
+ scif_queue_for_cleanup(window, &scif_info.rma_tc);
+}
+
+static inline bool scif_is_iommu_enabled(void)
+{
+#ifdef CONFIG_INTEL_IOMMU
+ return intel_iommu_enabled;
+#else
+ return false;
+#endif
+}
+#endif /* SCIF_RMA_H */
diff --git a/drivers/misc/mic/scif/scif_rma_list.c b/drivers/misc/mic/scif/scif_rma_list.c
new file mode 100644
index 000000000000..e1ef8daedd5a
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma_list.c
@@ -0,0 +1,291 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include <linux/mmu_notifier.h>
+#include <linux/highmem.h>
+
+/*
+ * scif_insert_tcw:
+ *
+ * Insert a temp window to the temp registration list sorted by va_for_temp.
+ * RMA lock must be held.
+ */
+void scif_insert_tcw(struct scif_window *window, struct list_head *head)
+{
+ struct scif_window *curr = NULL;
+ struct scif_window *prev = list_entry(head, struct scif_window, list);
+ struct list_head *item;
+
+ INIT_LIST_HEAD(&window->list);
+ /* Compare with tail and if the entry is new tail add it to the end */
+ if (!list_empty(head)) {
+ curr = list_entry(head->prev, struct scif_window, list);
+ if (curr->va_for_temp < window->va_for_temp) {
+ list_add_tail(&window->list, head);
+ return;
+ }
+ }
+ list_for_each(item, head) {
+ curr = list_entry(item, struct scif_window, list);
+ if (curr->va_for_temp > window->va_for_temp)
+ break;
+ prev = curr;
+ }
+ list_add(&window->list, &prev->list);
+}
+
+/*
+ * scif_insert_window:
+ *
+ * Insert a window to the self registration list sorted by offset.
+ * RMA lock must be held.
+ */
+void scif_insert_window(struct scif_window *window, struct list_head *head)
+{
+ struct scif_window *curr = NULL, *prev = NULL;
+ struct list_head *item;
+
+ INIT_LIST_HEAD(&window->list);
+ list_for_each(item, head) {
+ curr = list_entry(item, struct scif_window, list);
+ if (curr->offset > window->offset)
+ break;
+ prev = curr;
+ }
+ if (!prev)
+ list_add(&window->list, head);
+ else
+ list_add(&window->list, &prev->list);
+ scif_set_window_ref(window, window->nr_pages);
+}
+
+/*
+ * scif_query_tcw:
+ *
+ * Query the temp cached registration list of ep for an overlapping window
+ * in case of permission mismatch, destroy the previous window. if permissions
+ * match and overlap is partial, destroy the window but return the new range
+ * RMA lock must be held.
+ */
+int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *req)
+{
+ struct list_head *item, *temp, *head = req->head;
+ struct scif_window *window;
+ u64 start_va_window, start_va_req = req->va_for_temp;
+ u64 end_va_window, end_va_req = start_va_req + req->nr_bytes;
+
+ if (!req->nr_bytes)
+ return -EINVAL;
+ /*
+ * Avoid traversing the entire list to find out that there
+ * is no entry that matches
+ */
+ if (!list_empty(head)) {
+ window = list_last_entry(head, struct scif_window, list);
+ end_va_window = window->va_for_temp +
+ (window->nr_pages << PAGE_SHIFT);
+ if (start_va_req > end_va_window)
+ return -ENXIO;
+ }
+ list_for_each_safe(item, temp, head) {
+ window = list_entry(item, struct scif_window, list);
+ start_va_window = window->va_for_temp;
+ end_va_window = window->va_for_temp +
+ (window->nr_pages << PAGE_SHIFT);
+ if (start_va_req < start_va_window &&
+ end_va_req < start_va_window)
+ break;
+ if (start_va_req >= end_va_window)
+ continue;
+ if ((window->prot & req->prot) == req->prot) {
+ if (start_va_req >= start_va_window &&
+ end_va_req <= end_va_window) {
+ *req->out_window = window;
+ return 0;
+ }
+ /* expand window */
+ if (start_va_req < start_va_window) {
+ req->nr_bytes +=
+ start_va_window - start_va_req;
+ req->va_for_temp = start_va_window;
+ }
+ if (end_va_req >= end_va_window)
+ req->nr_bytes += end_va_window - end_va_req;
+ }
+ /* Destroy the old window to create a new one */
+ __scif_rma_destroy_tcw_helper(window);
+ break;
+ }
+ return -ENXIO;
+}
+
+/*
+ * scif_query_window:
+ *
+ * Query the registration list and check if a valid contiguous
+ * range of windows exist.
+ * RMA lock must be held.
+ */
+int scif_query_window(struct scif_rma_req *req)
+{
+ struct list_head *item;
+ struct scif_window *window;
+ s64 end_offset, offset = req->offset;
+ u64 tmp_min, nr_bytes_left = req->nr_bytes;
+
+ if (!req->nr_bytes)
+ return -EINVAL;
+
+ list_for_each(item, req->head) {
+ window = list_entry(item, struct scif_window, list);
+ end_offset = window->offset +
+ (window->nr_pages << PAGE_SHIFT);
+ if (offset < window->offset)
+ /* Offset not found! */
+ return -ENXIO;
+ if (offset >= end_offset)
+ continue;
+ /* Check read/write protections. */
+ if ((window->prot & req->prot) != req->prot)
+ return -EPERM;
+ if (nr_bytes_left == req->nr_bytes)
+ /* Store the first window */
+ *req->out_window = window;
+ tmp_min = min((u64)end_offset - offset, nr_bytes_left);
+ nr_bytes_left -= tmp_min;
+ offset += tmp_min;
+ /*
+ * Range requested encompasses
+ * multiple windows contiguously.
+ */
+ if (!nr_bytes_left) {
+ /* Done for partial window */
+ if (req->type == SCIF_WINDOW_PARTIAL ||
+ req->type == SCIF_WINDOW_SINGLE)
+ return 0;
+ /* Extra logic for full windows */
+ if (offset == end_offset)
+ /* Spanning multiple whole windows */
+ return 0;
+ /* Not spanning multiple whole windows */
+ return -ENXIO;
+ }
+ if (req->type == SCIF_WINDOW_SINGLE)
+ break;
+ }
+ dev_err(scif_info.mdev.this_device,
+ "%s %d ENXIO\n", __func__, __LINE__);
+ return -ENXIO;
+}
+
+/*
+ * scif_rma_list_unregister:
+ *
+ * Traverse the self registration list starting from window:
+ * 1) Call scif_unregister_window(..)
+ * RMA lock must be held.
+ */
+int scif_rma_list_unregister(struct scif_window *window,
+ s64 offset, int nr_pages)
+{
+ struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+ struct list_head *head = &ep->rma_info.reg_list;
+ s64 end_offset;
+ int err = 0;
+ int loop_nr_pages;
+ struct scif_window *_window;
+
+ list_for_each_entry_safe_from(window, _window, head, list) {
+ end_offset = window->offset + (window->nr_pages << PAGE_SHIFT);
+ loop_nr_pages = min((int)((end_offset - offset) >> PAGE_SHIFT),
+ nr_pages);
+ err = scif_unregister_window(window);
+ if (err)
+ return err;
+ nr_pages -= loop_nr_pages;
+ offset += (loop_nr_pages << PAGE_SHIFT);
+ if (!nr_pages)
+ break;
+ }
+ return 0;
+}
+
+/*
+ * scif_unmap_all_window:
+ *
+ * Traverse all the windows in the self registration list and:
+ * 1) Delete any DMA mappings created
+ */
+void scif_unmap_all_windows(scif_epd_t epd)
+{
+ struct list_head *item, *tmp;
+ struct scif_window *window;
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct list_head *head = &ep->rma_info.reg_list;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+ list_for_each_safe(item, tmp, head) {
+ window = list_entry(item, struct scif_window, list);
+ scif_unmap_window(ep->remote_dev, window);
+ }
+ mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/*
+ * scif_unregister_all_window:
+ *
+ * Traverse all the windows in the self registration list and:
+ * 1) Call scif_unregister_window(..)
+ * RMA lock must be held.
+ */
+int scif_unregister_all_windows(scif_epd_t epd)
+{
+ struct list_head *item, *tmp;
+ struct scif_window *window;
+ struct scif_endpt *ep = (struct scif_endpt *)epd;
+ struct list_head *head = &ep->rma_info.reg_list;
+ int err = 0;
+
+ mutex_lock(&ep->rma_info.rma_lock);
+retry:
+ item = NULL;
+ tmp = NULL;
+ list_for_each_safe(item, tmp, head) {
+ window = list_entry(item, struct scif_window, list);
+ ep->rma_info.async_list_del = 0;
+ err = scif_unregister_window(window);
+ if (err)
+ dev_err(scif_info.mdev.this_device,
+ "%s %d err %d\n",
+ __func__, __LINE__, err);
+ /*
+ * Need to restart list traversal if there has been
+ * an asynchronous list entry deletion.
+ */
+ if (ACCESS_ONCE(ep->rma_info.async_list_del))
+ goto retry;
+ }
+ mutex_unlock(&ep->rma_info.rma_lock);
+ if (!list_empty(&ep->rma_info.mmn_list)) {
+ spin_lock(&scif_info.rmalock);
+ list_add_tail(&ep->mmu_list, &scif_info.mmu_notif_cleanup);
+ spin_unlock(&scif_info.rmalock);
+ schedule_work(&scif_info.mmu_notif_work);
+ }
+ return err;
+}
diff --git a/drivers/misc/mic/scif/scif_rma_list.h b/drivers/misc/mic/scif/scif_rma_list.h
new file mode 100644
index 000000000000..7d58d1d551b0
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma_list.h
@@ -0,0 +1,57 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_RMA_LIST_H
+#define SCIF_RMA_LIST_H
+
+/*
+ * struct scif_rma_req - Self Registration list RMA Request query
+ *
+ * @out_window - Returns the window if found
+ * @offset: Starting offset
+ * @nr_bytes: number of bytes
+ * @prot: protection requested i.e. read or write or both
+ * @type: Specify single, partial or multiple windows
+ * @head: Head of list on which to search
+ * @va_for_temp: VA for searching temporary cached windows
+ */
+struct scif_rma_req {
+ struct scif_window **out_window;
+ union {
+ s64 offset;
+ unsigned long va_for_temp;
+ };
+ size_t nr_bytes;
+ int prot;
+ enum scif_window_type type;
+ struct list_head *head;
+};
+
+/* Insert */
+void scif_insert_window(struct scif_window *window, struct list_head *head);
+void scif_insert_tcw(struct scif_window *window,
+ struct list_head *head);
+/* Query */
+int scif_query_window(struct scif_rma_req *request);
+int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *request);
+/* Called from close to unregister all self windows */
+int scif_unregister_all_windows(scif_epd_t epd);
+void scif_unmap_all_windows(scif_epd_t epd);
+/* Traverse list and unregister */
+int scif_rma_list_unregister(struct scif_window *window, s64 offset,
+ int nr_pages);
+#endif /* SCIF_RMA_LIST_H */
diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c
index 2f30badc6ffd..1ee8e82ba710 100644
--- a/drivers/misc/sgi-gru/gruhandles.c
+++ b/drivers/misc/sgi-gru/gruhandles.c
@@ -196,12 +196,6 @@ void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
start_instruction(tfh);
}
-void tfh_restart(struct gru_tlb_fault_handle *tfh)
-{
- tfh->opc = TFHOP_RESTART;
- start_instruction(tfh);
-}
-
void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh)
{
tfh->opc = TFHOP_USER_POLLING_MODE;
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
index 3f998b924d8f..3d7bd36a1c89 100644
--- a/drivers/misc/sgi-gru/gruhandles.h
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -524,7 +524,6 @@ int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
-void tfh_restart(struct gru_tlb_fault_handle *tfh);
void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh);
void tfh_exception(struct gru_tlb_fault_handle *tfh);
diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c
index a3700a56b8ff..313da3150262 100644
--- a/drivers/misc/sgi-gru/grukdump.c
+++ b/drivers/misc/sgi-gru/grukdump.c
@@ -78,11 +78,10 @@ static int gru_dump_tfm(struct gru_state *gru,
void __user *ubuf, void __user *ubufend)
{
struct gru_tlb_fault_map *tfm;
- int i, ret, bytes;
+ int i;
- bytes = GRU_NUM_TFM * GRU_CACHE_LINE_BYTES;
- if (bytes > ubufend - ubuf)
- ret = -EFBIG;
+ if (GRU_NUM_TFM * GRU_CACHE_LINE_BYTES > ubufend - ubuf)
+ return -EFBIG;
for (i = 0; i < GRU_NUM_TFM; i++) {
tfm = get_tfm(gru->gs_gru_base_vaddr, i);
@@ -99,11 +98,10 @@ static int gru_dump_tgh(struct gru_state *gru,
void __user *ubuf, void __user *ubufend)
{
struct gru_tlb_global_handle *tgh;
- int i, ret, bytes;
+ int i;
- bytes = GRU_NUM_TGH * GRU_CACHE_LINE_BYTES;
- if (bytes > ubufend - ubuf)
- ret = -EFBIG;
+ if (GRU_NUM_TGH * GRU_CACHE_LINE_BYTES > ubufend - ubuf)
+ return -EFBIG;
for (i = 0; i < GRU_NUM_TGH; i++) {
tgh = get_tgh(gru->gs_gru_base_vaddr, i);
@@ -196,7 +194,7 @@ int gru_dump_chiplet_request(unsigned long arg)
return -EFAULT;
/* Currently, only dump by gid is implemented */
- if (req.gid >= gru_max_gids || req.gid < 0)
+ if (req.gid >= gru_max_gids)
return -EINVAL;
gru = GID_TO_GRU(req.gid);
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
index 913de07e577c..967b9dd24fe9 100644
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -160,7 +160,12 @@ static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
down_write(&bs->bs_kgts_sema);
if (!bs->bs_kgts) {
- bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
+ do {
+ bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
+ if (!IS_ERR(bs->bs_kgts))
+ break;
+ msleep(1);
+ } while (true);
bs->bs_kgts->ts_user_blade_id = blade_id;
}
kgts = bs->bs_kgts;
@@ -429,8 +434,8 @@ int gru_get_cb_exception_detail(void *cb,
return 0;
}
-char *gru_get_cb_exception_detail_str(int ret, void *cb,
- char *buf, int size)
+static char *gru_get_cb_exception_detail_str(int ret, void *cb,
+ char *buf, int size)
{
struct gru_control_block_status *gen = (void *)cb;
struct control_block_extended_exc_detail excdet;
@@ -505,7 +510,7 @@ int gru_wait_proc(void *cb)
return ret;
}
-void gru_abort(int ret, void *cb, char *str)
+static void gru_abort(int ret, void *cb, char *str)
{
char buf[GRU_EXC_STR_SIZE];
@@ -997,7 +1002,6 @@ static int quicktest1(unsigned long arg)
{
struct gru_message_queue_desc mqd;
void *p, *mq;
- unsigned long *dw;
int i, ret = -EIO;
char mes[GRU_CACHE_LINE_BYTES], *m;
@@ -1007,7 +1011,6 @@ static int quicktest1(unsigned long arg)
return -ENOMEM;
mq = ALIGNUP(p, 1024);
memset(mes, 0xee, sizeof(mes));
- dw = mq;
gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0);
for (i = 0; i < 6; i++) {
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index ae16c8cb4f3e..1525870f460a 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -930,6 +930,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct gru_thread_state *gts;
unsigned long paddr, vaddr;
+ unsigned long expires;
vaddr = (unsigned long)vmf->virtual_address;
gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
@@ -954,7 +955,8 @@ again:
mutex_unlock(&gts->ts_ctxlock);
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
- if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies)
+ expires = gts->ts_steal_jiffies + GRU_STEAL_DELAY;
+ if (time_before(expires, jiffies))
gru_steal_context(gts);
goto again;
}
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
index 2129274ef7ab..e936d43895d2 100644
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -306,19 +306,20 @@ struct gru_mm_struct *gru_register_mmu_notifier(void)
atomic_inc(&gms->ms_refcnt);
} else {
gms = kzalloc(sizeof(*gms), GFP_KERNEL);
- if (gms) {
- STAT(gms_alloc);
- spin_lock_init(&gms->ms_asid_lock);
- gms->ms_notifier.ops = &gru_mmuops;
- atomic_set(&gms->ms_refcnt, 1);
- init_waitqueue_head(&gms->ms_wait_queue);
- err = __mmu_notifier_register(&gms->ms_notifier, current->mm);
- if (err)
- goto error;
- }
+ if (!gms)
+ return ERR_PTR(-ENOMEM);
+ STAT(gms_alloc);
+ spin_lock_init(&gms->ms_asid_lock);
+ gms->ms_notifier.ops = &gru_mmuops;
+ atomic_set(&gms->ms_refcnt, 1);
+ init_waitqueue_head(&gms->ms_wait_queue);
+ err = __mmu_notifier_register(&gms->ms_notifier, current->mm);
+ if (err)
+ goto error;
}
- gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
- atomic_read(&gms->ms_refcnt));
+ if (gms)
+ gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
+ atomic_read(&gms->ms_refcnt));
return gms;
error:
kfree(gms);
diff --git a/drivers/misc/sram.c b/drivers/misc/sram.c
index 431e1dd528bc..736dae715dbf 100644
--- a/drivers/misc/sram.c
+++ b/drivers/misc/sram.c
@@ -28,20 +28,144 @@
#define SRAM_GRANULARITY 32
+struct sram_partition {
+ void __iomem *base;
+
+ struct gen_pool *pool;
+ struct bin_attribute battr;
+ struct mutex lock;
+};
+
struct sram_dev {
struct device *dev;
void __iomem *virt_base;
struct gen_pool *pool;
struct clk *clk;
+
+ struct sram_partition *partition;
+ u32 partitions;
};
struct sram_reserve {
struct list_head list;
u32 start;
u32 size;
+ bool export;
+ bool pool;
+ const char *label;
};
+static ssize_t sram_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t pos, size_t count)
+{
+ struct sram_partition *part;
+
+ part = container_of(attr, struct sram_partition, battr);
+
+ mutex_lock(&part->lock);
+ memcpy_fromio(buf, part->base + pos, count);
+ mutex_unlock(&part->lock);
+
+ return count;
+}
+
+static ssize_t sram_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t pos, size_t count)
+{
+ struct sram_partition *part;
+
+ part = container_of(attr, struct sram_partition, battr);
+
+ mutex_lock(&part->lock);
+ memcpy_toio(part->base + pos, buf, count);
+ mutex_unlock(&part->lock);
+
+ return count;
+}
+
+static int sram_add_pool(struct sram_dev *sram, struct sram_reserve *block,
+ phys_addr_t start, struct sram_partition *part)
+{
+ int ret;
+
+ part->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY),
+ NUMA_NO_NODE, block->label);
+ if (IS_ERR(part->pool))
+ return PTR_ERR(part->pool);
+
+ ret = gen_pool_add_virt(part->pool, (unsigned long)part->base, start,
+ block->size, NUMA_NO_NODE);
+ if (ret < 0) {
+ dev_err(sram->dev, "failed to register subpool: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int sram_add_export(struct sram_dev *sram, struct sram_reserve *block,
+ phys_addr_t start, struct sram_partition *part)
+{
+ sysfs_bin_attr_init(&part->battr);
+ part->battr.attr.name = devm_kasprintf(sram->dev, GFP_KERNEL,
+ "%llx.sram",
+ (unsigned long long)start);
+ if (!part->battr.attr.name)
+ return -ENOMEM;
+
+ part->battr.attr.mode = S_IRUSR | S_IWUSR;
+ part->battr.read = sram_read;
+ part->battr.write = sram_write;
+ part->battr.size = block->size;
+
+ return device_create_bin_file(sram->dev, &part->battr);
+}
+
+static int sram_add_partition(struct sram_dev *sram, struct sram_reserve *block,
+ phys_addr_t start)
+{
+ int ret;
+ struct sram_partition *part = &sram->partition[sram->partitions];
+
+ mutex_init(&part->lock);
+ part->base = sram->virt_base + block->start;
+
+ if (block->pool) {
+ ret = sram_add_pool(sram, block, start, part);
+ if (ret)
+ return ret;
+ }
+ if (block->export) {
+ ret = sram_add_export(sram, block, start, part);
+ if (ret)
+ return ret;
+ }
+ sram->partitions++;
+
+ return 0;
+}
+
+static void sram_free_partitions(struct sram_dev *sram)
+{
+ struct sram_partition *part;
+
+ if (!sram->partitions)
+ return;
+
+ part = &sram->partition[sram->partitions - 1];
+ for (; sram->partitions; sram->partitions--, part--) {
+ if (part->battr.size)
+ device_remove_bin_file(sram->dev, &part->battr);
+
+ if (part->pool &&
+ gen_pool_avail(part->pool) < gen_pool_size(part->pool))
+ dev_err(sram->dev, "removed pool while SRAM allocated\n");
+ }
+}
+
static int sram_reserve_cmp(void *priv, struct list_head *a,
struct list_head *b)
{
@@ -57,7 +181,8 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
unsigned long size, cur_start, cur_size;
struct sram_reserve *rblocks, *block;
struct list_head reserve_list;
- unsigned int nblocks;
+ unsigned int nblocks, exports = 0;
+ const char *label;
int ret = 0;
INIT_LIST_HEAD(&reserve_list);
@@ -69,7 +194,7 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
* after the reserved blocks from the dt are processed.
*/
nblocks = (np) ? of_get_available_child_count(np) + 1 : 1;
- rblocks = kmalloc((nblocks) * sizeof(*rblocks), GFP_KERNEL);
+ rblocks = kzalloc((nblocks) * sizeof(*rblocks), GFP_KERNEL);
if (!rblocks)
return -ENOMEM;
@@ -82,7 +207,6 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
dev_err(sram->dev,
"could not get address for node %s\n",
child->full_name);
- of_node_put(child);
goto err_chunks;
}
@@ -91,7 +215,6 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
"reserved block %s outside the sram area\n",
child->full_name);
ret = -EINVAL;
- of_node_put(child);
goto err_chunks;
}
@@ -99,11 +222,42 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
block->size = resource_size(&child_res);
list_add_tail(&block->list, &reserve_list);
- dev_dbg(sram->dev, "found reserved block 0x%x-0x%x\n",
- block->start, block->start + block->size);
+ if (of_find_property(child, "export", NULL))
+ block->export = true;
+
+ if (of_find_property(child, "pool", NULL))
+ block->pool = true;
+
+ if ((block->export || block->pool) && block->size) {
+ exports++;
+
+ label = NULL;
+ ret = of_property_read_string(child, "label", &label);
+ if (ret && ret != -EINVAL) {
+ dev_err(sram->dev,
+ "%s has invalid label name\n",
+ child->full_name);
+ goto err_chunks;
+ }
+ if (!label)
+ label = child->name;
+
+ block->label = devm_kstrdup(sram->dev,
+ label, GFP_KERNEL);
+ if (!block->label)
+ goto err_chunks;
+
+ dev_dbg(sram->dev, "found %sblock '%s' 0x%x-0x%x\n",
+ block->export ? "exported " : "", block->label,
+ block->start, block->start + block->size);
+ } else {
+ dev_dbg(sram->dev, "found reserved block 0x%x-0x%x\n",
+ block->start, block->start + block->size);
+ }
block++;
}
+ child = NULL;
/* the last chunk marks the end of the region */
rblocks[nblocks - 1].start = size;
@@ -112,8 +266,17 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
list_sort(NULL, &reserve_list, sram_reserve_cmp);
- cur_start = 0;
+ if (exports) {
+ sram->partition = devm_kzalloc(sram->dev,
+ exports * sizeof(*sram->partition),
+ GFP_KERNEL);
+ if (!sram->partition) {
+ ret = -ENOMEM;
+ goto err_chunks;
+ }
+ }
+ cur_start = 0;
list_for_each_entry(block, &reserve_list, list) {
/* can only happen if sections overlap */
if (block->start < cur_start) {
@@ -121,9 +284,19 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
"block at 0x%x starts after current offset 0x%lx\n",
block->start, cur_start);
ret = -EINVAL;
+ sram_free_partitions(sram);
goto err_chunks;
}
+ if ((block->export || block->pool) && block->size) {
+ ret = sram_add_partition(sram, block,
+ res->start + block->start);
+ if (ret) {
+ sram_free_partitions(sram);
+ goto err_chunks;
+ }
+ }
+
/* current start is in a reserved block, so continue after it */
if (block->start == cur_start) {
cur_start = block->start + block->size;
@@ -143,14 +316,19 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
ret = gen_pool_add_virt(sram->pool,
(unsigned long)sram->virt_base + cur_start,
res->start + cur_start, cur_size, -1);
- if (ret < 0)
+ if (ret < 0) {
+ sram_free_partitions(sram);
goto err_chunks;
+ }
/* next allocation after this reserved block */
cur_start = block->start + block->size;
}
err_chunks:
+ if (child)
+ of_node_put(child);
+
kfree(rblocks);
return ret;
@@ -213,6 +391,8 @@ static int sram_remove(struct platform_device *pdev)
{
struct sram_dev *sram = platform_get_drvdata(pdev);
+ sram_free_partitions(sram);
+
if (gen_pool_avail(sram->pool) < gen_pool_size(sram->pool))
dev_err(sram->dev, "removed while SRAM allocated\n");
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index c8c6a363069c..6e3af8b42cdd 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -460,6 +460,13 @@ static void st_int_enqueue(struct st_data_s *st_gdata, struct sk_buff *skb)
* - TTY layer when write's finished
* - st_write (in context of the protocol stack)
*/
+static void work_fn_write_wakeup(struct work_struct *work)
+{
+ struct st_data_s *st_gdata = container_of(work, struct st_data_s,
+ work_write_wakeup);
+
+ st_tx_wakeup((void *)st_gdata);
+}
void st_tx_wakeup(struct st_data_s *st_data)
{
struct sk_buff *skb;
@@ -812,8 +819,12 @@ static void st_tty_wakeup(struct tty_struct *tty)
/* don't do an wakeup for now */
clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
- /* call our internal wakeup */
- st_tx_wakeup((void *)st_gdata);
+ /*
+ * schedule the internal wakeup instead of calling directly to
+ * avoid lockup (port->lock needed in tty->ops->write is
+ * already taken here
+ */
+ schedule_work(&st_gdata->work_write_wakeup);
}
static void st_tty_flush_buffer(struct tty_struct *tty)
@@ -881,6 +892,9 @@ int st_core_init(struct st_data_s **core_data)
pr_err("unable to un-register ldisc");
return err;
}
+
+ INIT_WORK(&st_gdata->work_write_wakeup, work_fn_write_wakeup);
+
*core_data = st_gdata;
return 0;
}
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index ffb56340d0c7..89300870fefb 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -1,7 +1,7 @@
/*
* VMware Balloon driver.
*
- * Copyright (C) 2000-2010, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2000-2014, VMware, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -37,16 +37,19 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/workqueue.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
#include <asm/hypervisor.h>
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
-MODULE_VERSION("1.3.0.0-k");
+MODULE_VERSION("1.5.0.0-k");
MODULE_ALIAS("dmi:*:svnVMware*:*");
MODULE_ALIAS("vmware_vmmemctl");
MODULE_LICENSE("GPL");
@@ -57,12 +60,6 @@ MODULE_LICENSE("GPL");
*/
/*
- * Rate of allocating memory when there is no memory pressure
- * (driver performs non-sleeping allocations).
- */
-#define VMW_BALLOON_NOSLEEP_ALLOC_MAX 16384U
-
-/*
* Rates of memory allocaton when guest experiences memory pressure
* (driver performs sleeping allocations).
*/
@@ -71,13 +68,6 @@ MODULE_LICENSE("GPL");
#define VMW_BALLOON_RATE_ALLOC_INC 16U
/*
- * Rates for releasing pages while deflating balloon.
- */
-#define VMW_BALLOON_RATE_FREE_MIN 512U
-#define VMW_BALLOON_RATE_FREE_MAX 16384U
-#define VMW_BALLOON_RATE_FREE_INC 16U
-
-/*
* When guest is under memory pressure, use a reduced page allocation
* rate for next several cycles.
*/
@@ -99,9 +89,6 @@ MODULE_LICENSE("GPL");
*/
#define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER)
-/* Maximum number of page allocations without yielding processor */
-#define VMW_BALLOON_YIELD_THRESHOLD 1024
-
/* Maximum number of refused pages we accumulate during inflation cycle */
#define VMW_BALLOON_MAX_REFUSED 16
@@ -116,17 +103,45 @@ enum vmwballoon_capabilities {
/*
* Bit 0 is reserved and not associated to any capability.
*/
- VMW_BALLOON_BASIC_CMDS = (1 << 1),
- VMW_BALLOON_BATCHED_CMDS = (1 << 2)
+ VMW_BALLOON_BASIC_CMDS = (1 << 1),
+ VMW_BALLOON_BATCHED_CMDS = (1 << 2),
+ VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3),
+ VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4),
};
-#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS)
+#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \
+ | VMW_BALLOON_BATCHED_CMDS \
+ | VMW_BALLOON_BATCHED_2M_CMDS \
+ | VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
+
+#define VMW_BALLOON_2M_SHIFT (9)
+#define VMW_BALLOON_NUM_PAGE_SIZES (2)
+
+/*
+ * Backdoor commands availability:
+ *
+ * START, GET_TARGET and GUEST_ID are always available,
+ *
+ * VMW_BALLOON_BASIC_CMDS:
+ * LOCK and UNLOCK commands,
+ * VMW_BALLOON_BATCHED_CMDS:
+ * BATCHED_LOCK and BATCHED_UNLOCK commands.
+ * VMW BALLOON_BATCHED_2M_CMDS:
+ * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
+ * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
+ * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
+ */
+#define VMW_BALLOON_CMD_START 0
+#define VMW_BALLOON_CMD_GET_TARGET 1
+#define VMW_BALLOON_CMD_LOCK 2
+#define VMW_BALLOON_CMD_UNLOCK 3
+#define VMW_BALLOON_CMD_GUEST_ID 4
+#define VMW_BALLOON_CMD_BATCHED_LOCK 6
+#define VMW_BALLOON_CMD_BATCHED_UNLOCK 7
+#define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8
+#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9
+#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10
-#define VMW_BALLOON_CMD_START 0
-#define VMW_BALLOON_CMD_GET_TARGET 1
-#define VMW_BALLOON_CMD_LOCK 2
-#define VMW_BALLOON_CMD_UNLOCK 3
-#define VMW_BALLOON_CMD_GUEST_ID 4
/* error codes */
#define VMW_BALLOON_SUCCESS 0
@@ -142,18 +157,60 @@ enum vmwballoon_capabilities {
#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000)
-#define VMWARE_BALLOON_CMD(cmd, data, result) \
+/* Batch page description */
+
+/*
+ * Layout of a page in the batch page:
+ *
+ * +-------------+----------+--------+
+ * | | | |
+ * | Page number | Reserved | Status |
+ * | | | |
+ * +-------------+----------+--------+
+ * 64 PAGE_SHIFT 6 0
+ *
+ * The reserved field should be set to 0.
+ */
+#define VMW_BALLOON_BATCH_MAX_PAGES (PAGE_SIZE / sizeof(u64))
+#define VMW_BALLOON_BATCH_STATUS_MASK ((1UL << 5) - 1)
+#define VMW_BALLOON_BATCH_PAGE_MASK (~((1UL << PAGE_SHIFT) - 1))
+
+struct vmballoon_batch_page {
+ u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
+};
+
+static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
+{
+ return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
+}
+
+static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
+ int idx)
+{
+ return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
+}
+
+static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
+ u64 pa)
+{
+ batch->pages[idx] = pa;
+}
+
+
+#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result) \
({ \
- unsigned long __status, __dummy1, __dummy2; \
+ unsigned long __status, __dummy1, __dummy2, __dummy3; \
__asm__ __volatile__ ("inl %%dx" : \
"=a"(__status), \
"=c"(__dummy1), \
"=d"(__dummy2), \
- "=b"(result) : \
+ "=b"(result), \
+ "=S" (__dummy3) : \
"0"(VMW_BALLOON_HV_MAGIC), \
"1"(VMW_BALLOON_CMD_##cmd), \
"2"(VMW_BALLOON_HV_PORT), \
- "3"(data) : \
+ "3"(arg1), \
+ "4" (arg2) : \
"memory"); \
if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \
result = __dummy1; \
@@ -164,27 +221,30 @@ enum vmwballoon_capabilities {
#ifdef CONFIG_DEBUG_FS
struct vmballoon_stats {
unsigned int timer;
+ unsigned int doorbell;
/* allocation statistics */
- unsigned int alloc;
- unsigned int alloc_fail;
+ unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
+ unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
unsigned int sleep_alloc;
unsigned int sleep_alloc_fail;
- unsigned int refused_alloc;
- unsigned int refused_free;
- unsigned int free;
+ unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
+ unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
+ unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
/* monitor operations */
- unsigned int lock;
- unsigned int lock_fail;
- unsigned int unlock;
- unsigned int unlock_fail;
+ unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES];
+ unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
+ unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES];
+ unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
unsigned int target;
unsigned int target_fail;
unsigned int start;
unsigned int start_fail;
unsigned int guest_type;
unsigned int guest_type_fail;
+ unsigned int doorbell_set;
+ unsigned int doorbell_unset;
};
#define STATS_INC(stat) (stat)++
@@ -192,14 +252,30 @@ struct vmballoon_stats {
#define STATS_INC(stat)
#endif
-struct vmballoon {
+struct vmballoon;
+struct vmballoon_ops {
+ void (*add_page)(struct vmballoon *b, int idx, struct page *p);
+ int (*lock)(struct vmballoon *b, unsigned int num_pages,
+ bool is_2m_pages, unsigned int *target);
+ int (*unlock)(struct vmballoon *b, unsigned int num_pages,
+ bool is_2m_pages, unsigned int *target);
+};
+
+struct vmballoon_page_size {
/* list of reserved physical pages */
struct list_head pages;
/* transient list of non-balloonable pages */
struct list_head refused_pages;
unsigned int n_refused_pages;
+};
+
+struct vmballoon {
+ struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
+
+ /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
+ unsigned supported_page_sizes;
/* balloon size in pages */
unsigned int size;
@@ -210,11 +286,18 @@ struct vmballoon {
/* adjustment rates (pages per second) */
unsigned int rate_alloc;
- unsigned int rate_free;
/* slowdown page allocations for next few cycles */
unsigned int slow_allocation_cycles;
+ unsigned long capabilities;
+
+ struct vmballoon_batch_page *batch_page;
+ unsigned int batch_max_pages;
+ struct page *page;
+
+ const struct vmballoon_ops *ops;
+
#ifdef CONFIG_DEBUG_FS
/* statistics */
struct vmballoon_stats stats;
@@ -226,6 +309,8 @@ struct vmballoon {
struct sysinfo sysinfo;
struct delayed_work dwork;
+
+ struct vmci_handle vmci_doorbell;
};
static struct vmballoon balloon;
@@ -234,20 +319,38 @@ static struct vmballoon balloon;
* Send "start" command to the host, communicating supported version
* of the protocol.
*/
-static bool vmballoon_send_start(struct vmballoon *b)
+static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
{
- unsigned long status, capabilities;
+ unsigned long status, capabilities, dummy = 0;
+ bool success;
STATS_INC(b->stats.start);
- status = VMWARE_BALLOON_CMD(START, VMW_BALLOON_CAPABILITIES,
- capabilities);
- if (status == VMW_BALLOON_SUCCESS)
- return true;
+ status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities);
- pr_debug("%s - failed, hv returns %ld\n", __func__, status);
- STATS_INC(b->stats.start_fail);
- return false;
+ switch (status) {
+ case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
+ b->capabilities = capabilities;
+ success = true;
+ break;
+ case VMW_BALLOON_SUCCESS:
+ b->capabilities = VMW_BALLOON_BASIC_CMDS;
+ success = true;
+ break;
+ default:
+ success = false;
+ }
+
+ if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS)
+ b->supported_page_sizes = 2;
+ else
+ b->supported_page_sizes = 1;
+
+ if (!success) {
+ pr_debug("%s - failed, hv returns %ld\n", __func__, status);
+ STATS_INC(b->stats.start_fail);
+ }
+ return success;
}
static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
@@ -273,9 +376,10 @@ static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
*/
static bool vmballoon_send_guest_id(struct vmballoon *b)
{
- unsigned long status, dummy;
+ unsigned long status, dummy = 0;
- status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy);
+ status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy,
+ dummy);
STATS_INC(b->stats.guest_type);
@@ -287,6 +391,14 @@ static bool vmballoon_send_guest_id(struct vmballoon *b)
return false;
}
+static u16 vmballoon_page_size(bool is_2m_page)
+{
+ if (is_2m_page)
+ return 1 << VMW_BALLOON_2M_SHIFT;
+
+ return 1;
+}
+
/*
* Retrieve desired balloon size from the host.
*/
@@ -295,6 +407,7 @@ static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
unsigned long status;
unsigned long target;
unsigned long limit;
+ unsigned long dummy = 0;
u32 limit32;
/*
@@ -313,7 +426,7 @@ static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
/* update stats */
STATS_INC(b->stats.target);
- status = VMWARE_BALLOON_CMD(GET_TARGET, limit, target);
+ status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target);
if (vmballoon_check_status(b, status)) {
*new_target = target;
return true;
@@ -330,23 +443,46 @@ static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
* check the return value and maybe submit a different page.
*/
static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
- unsigned int *hv_status)
+ unsigned int *hv_status, unsigned int *target)
{
- unsigned long status, dummy;
+ unsigned long status, dummy = 0;
u32 pfn32;
pfn32 = (u32)pfn;
if (pfn32 != pfn)
return -1;
- STATS_INC(b->stats.lock);
+ STATS_INC(b->stats.lock[false]);
- *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy);
+ *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target);
if (vmballoon_check_status(b, status))
return 0;
pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
- STATS_INC(b->stats.lock_fail);
+ STATS_INC(b->stats.lock_fail[false]);
+ return 1;
+}
+
+static int vmballoon_send_batched_lock(struct vmballoon *b,
+ unsigned int num_pages, bool is_2m_pages, unsigned int *target)
+{
+ unsigned long status;
+ unsigned long pfn = page_to_pfn(b->page);
+
+ STATS_INC(b->stats.lock[is_2m_pages]);
+
+ if (is_2m_pages)
+ status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages,
+ *target);
+ else
+ status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages,
+ *target);
+
+ if (vmballoon_check_status(b, status))
+ return 0;
+
+ pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
+ STATS_INC(b->stats.lock_fail[is_2m_pages]);
return 1;
}
@@ -354,26 +490,66 @@ static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
* Notify the host that guest intends to release given page back into
* the pool of available (to the guest) pages.
*/
-static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn)
+static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn,
+ unsigned int *target)
{
- unsigned long status, dummy;
+ unsigned long status, dummy = 0;
u32 pfn32;
pfn32 = (u32)pfn;
if (pfn32 != pfn)
return false;
- STATS_INC(b->stats.unlock);
+ STATS_INC(b->stats.unlock[false]);
- status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy);
+ status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target);
if (vmballoon_check_status(b, status))
return true;
pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
- STATS_INC(b->stats.unlock_fail);
+ STATS_INC(b->stats.unlock_fail[false]);
+ return false;
+}
+
+static bool vmballoon_send_batched_unlock(struct vmballoon *b,
+ unsigned int num_pages, bool is_2m_pages, unsigned int *target)
+{
+ unsigned long status;
+ unsigned long pfn = page_to_pfn(b->page);
+
+ STATS_INC(b->stats.unlock[is_2m_pages]);
+
+ if (is_2m_pages)
+ status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages,
+ *target);
+ else
+ status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages,
+ *target);
+
+ if (vmballoon_check_status(b, status))
+ return true;
+
+ pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
+ STATS_INC(b->stats.unlock_fail[is_2m_pages]);
return false;
}
+static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page)
+{
+ if (is_2m_page)
+ return alloc_pages(flags, VMW_BALLOON_2M_SHIFT);
+
+ return alloc_page(flags);
+}
+
+static void vmballoon_free_page(struct page *page, bool is_2m_page)
+{
+ if (is_2m_page)
+ __free_pages(page, VMW_BALLOON_2M_SHIFT);
+ else
+ __free_page(page);
+}
+
/*
* Quickly release all pages allocated for the balloon. This function is
* called when host decides to "reset" balloon for one reason or another.
@@ -383,35 +559,31 @@ static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn)
static void vmballoon_pop(struct vmballoon *b)
{
struct page *page, *next;
- unsigned int count = 0;
-
- list_for_each_entry_safe(page, next, &b->pages, lru) {
- list_del(&page->lru);
- __free_page(page);
- STATS_INC(b->stats.free);
- b->size--;
-
- if (++count >= b->rate_free) {
- count = 0;
+ unsigned is_2m_pages;
+
+ for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
+ is_2m_pages++) {
+ struct vmballoon_page_size *page_size =
+ &b->page_sizes[is_2m_pages];
+ u16 size_per_page = vmballoon_page_size(is_2m_pages);
+
+ list_for_each_entry_safe(page, next, &page_size->pages, lru) {
+ list_del(&page->lru);
+ vmballoon_free_page(page, is_2m_pages);
+ STATS_INC(b->stats.free[is_2m_pages]);
+ b->size -= size_per_page;
cond_resched();
}
}
-}
-/*
- * Perform standard reset sequence by popping the balloon (in case it
- * is not empty) and then restarting protocol. This operation normally
- * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
- */
-static void vmballoon_reset(struct vmballoon *b)
-{
- /* free all pages, skipping monitor unlock */
- vmballoon_pop(b);
+ if (b->batch_page) {
+ vunmap(b->batch_page);
+ b->batch_page = NULL;
+ }
- if (vmballoon_send_start(b)) {
- b->reset_required = false;
- if (!vmballoon_send_guest_id(b))
- pr_err("failed to send guest ID to the host\n");
+ if (b->page) {
+ __free_page(b->page);
+ b->page = NULL;
}
}
@@ -420,17 +592,23 @@ static void vmballoon_reset(struct vmballoon *b)
* refuse list, those refused page are then released at the end of the
* inflation cycle.
*/
-static int vmballoon_lock_page(struct vmballoon *b, struct page *page)
+static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
+ bool is_2m_pages, unsigned int *target)
{
int locked, hv_status;
+ struct page *page = b->page;
+ struct vmballoon_page_size *page_size = &b->page_sizes[false];
+
+ /* is_2m_pages can never happen as 2m pages support implies batching */
- locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status);
+ locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
+ target);
if (locked > 0) {
- STATS_INC(b->stats.refused_alloc);
+ STATS_INC(b->stats.refused_alloc[false]);
if (hv_status == VMW_BALLOON_ERROR_RESET ||
hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) {
- __free_page(page);
+ vmballoon_free_page(page, false);
return -EIO;
}
@@ -439,17 +617,17 @@ static int vmballoon_lock_page(struct vmballoon *b, struct page *page)
* and retry allocation, unless we already accumulated
* too many of them, in which case take a breather.
*/
- if (b->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
- b->n_refused_pages++;
- list_add(&page->lru, &b->refused_pages);
+ if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
+ page_size->n_refused_pages++;
+ list_add(&page->lru, &page_size->refused_pages);
} else {
- __free_page(page);
+ vmballoon_free_page(page, false);
}
return -EIO;
}
/* track allocated page */
- list_add(&page->lru, &b->pages);
+ list_add(&page->lru, &page_size->pages);
/* update balloon size */
b->size++;
@@ -457,21 +635,81 @@ static int vmballoon_lock_page(struct vmballoon *b, struct page *page)
return 0;
}
+static int vmballoon_lock_batched_page(struct vmballoon *b,
+ unsigned int num_pages, bool is_2m_pages, unsigned int *target)
+{
+ int locked, i;
+ u16 size_per_page = vmballoon_page_size(is_2m_pages);
+
+ locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages,
+ target);
+ if (locked > 0) {
+ for (i = 0; i < num_pages; i++) {
+ u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
+ struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
+
+ vmballoon_free_page(p, is_2m_pages);
+ }
+
+ return -EIO;
+ }
+
+ for (i = 0; i < num_pages; i++) {
+ u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
+ struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
+ struct vmballoon_page_size *page_size =
+ &b->page_sizes[is_2m_pages];
+
+ locked = vmballoon_batch_get_status(b->batch_page, i);
+
+ switch (locked) {
+ case VMW_BALLOON_SUCCESS:
+ list_add(&p->lru, &page_size->pages);
+ b->size += size_per_page;
+ break;
+ case VMW_BALLOON_ERROR_PPN_PINNED:
+ case VMW_BALLOON_ERROR_PPN_INVALID:
+ if (page_size->n_refused_pages
+ < VMW_BALLOON_MAX_REFUSED) {
+ list_add(&p->lru, &page_size->refused_pages);
+ page_size->n_refused_pages++;
+ break;
+ }
+ /* Fallthrough */
+ case VMW_BALLOON_ERROR_RESET:
+ case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
+ vmballoon_free_page(p, is_2m_pages);
+ break;
+ default:
+ /* This should never happen */
+ WARN_ON_ONCE(true);
+ }
+ }
+
+ return 0;
+}
+
/*
* Release the page allocated for the balloon. Note that we first notify
* the host so it can make sure the page will be available for the guest
* to use, if needed.
*/
-static int vmballoon_release_page(struct vmballoon *b, struct page *page)
+static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
+ bool is_2m_pages, unsigned int *target)
{
- if (!vmballoon_send_unlock_page(b, page_to_pfn(page)))
- return -EIO;
+ struct page *page = b->page;
+ struct vmballoon_page_size *page_size = &b->page_sizes[false];
+
+ /* is_2m_pages can never happen as 2m pages support implies batching */
- list_del(&page->lru);
+ if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) {
+ list_add(&page->lru, &page_size->pages);
+ return -EIO;
+ }
/* deallocate page */
- __free_page(page);
- STATS_INC(b->stats.free);
+ vmballoon_free_page(page, false);
+ STATS_INC(b->stats.free[false]);
/* update balloon size */
b->size--;
@@ -479,21 +717,76 @@ static int vmballoon_release_page(struct vmballoon *b, struct page *page)
return 0;
}
+static int vmballoon_unlock_batched_page(struct vmballoon *b,
+ unsigned int num_pages, bool is_2m_pages,
+ unsigned int *target)
+{
+ int locked, i, ret = 0;
+ bool hv_success;
+ u16 size_per_page = vmballoon_page_size(is_2m_pages);
+
+ hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages,
+ target);
+ if (!hv_success)
+ ret = -EIO;
+
+ for (i = 0; i < num_pages; i++) {
+ u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
+ struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
+ struct vmballoon_page_size *page_size =
+ &b->page_sizes[is_2m_pages];
+
+ locked = vmballoon_batch_get_status(b->batch_page, i);
+ if (!hv_success || locked != VMW_BALLOON_SUCCESS) {
+ /*
+ * That page wasn't successfully unlocked by the
+ * hypervisor, re-add it to the list of pages owned by
+ * the balloon driver.
+ */
+ list_add(&p->lru, &page_size->pages);
+ } else {
+ /* deallocate page */
+ vmballoon_free_page(p, is_2m_pages);
+ STATS_INC(b->stats.free[is_2m_pages]);
+
+ /* update balloon size */
+ b->size -= size_per_page;
+ }
+ }
+
+ return ret;
+}
+
/*
* Release pages that were allocated while attempting to inflate the
* balloon but were refused by the host for one reason or another.
*/
-static void vmballoon_release_refused_pages(struct vmballoon *b)
+static void vmballoon_release_refused_pages(struct vmballoon *b,
+ bool is_2m_pages)
{
struct page *page, *next;
+ struct vmballoon_page_size *page_size =
+ &b->page_sizes[is_2m_pages];
- list_for_each_entry_safe(page, next, &b->refused_pages, lru) {
+ list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
list_del(&page->lru);
- __free_page(page);
- STATS_INC(b->stats.refused_free);
+ vmballoon_free_page(page, is_2m_pages);
+ STATS_INC(b->stats.refused_free[is_2m_pages]);
}
- b->n_refused_pages = 0;
+ page_size->n_refused_pages = 0;
+}
+
+static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
+{
+ b->page = p;
+}
+
+static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
+ struct page *p)
+{
+ vmballoon_batch_set_pa(b->batch_page, idx,
+ (u64)page_to_pfn(p) << PAGE_SHIFT);
}
/*
@@ -503,12 +796,12 @@ static void vmballoon_release_refused_pages(struct vmballoon *b)
*/
static void vmballoon_inflate(struct vmballoon *b)
{
- unsigned int goal;
- unsigned int rate;
- unsigned int i;
+ unsigned rate;
unsigned int allocations = 0;
+ unsigned int num_pages = 0;
int error = 0;
gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
+ bool is_2m_pages;
pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
@@ -527,27 +820,50 @@ static void vmballoon_inflate(struct vmballoon *b)
* slowdown page allocations considerably.
*/
- goal = b->target - b->size;
/*
* Start with no sleep allocation rate which may be higher
* than sleeping allocation rate.
*/
- rate = b->slow_allocation_cycles ?
- b->rate_alloc : VMW_BALLOON_NOSLEEP_ALLOC_MAX;
+ if (b->slow_allocation_cycles) {
+ rate = b->rate_alloc;
+ is_2m_pages = false;
+ } else {
+ rate = UINT_MAX;
+ is_2m_pages =
+ b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
+ }
- pr_debug("%s - goal: %d, no-sleep rate: %d, sleep rate: %d\n",
- __func__, goal, rate, b->rate_alloc);
+ pr_debug("%s - goal: %d, no-sleep rate: %u, sleep rate: %d\n",
+ __func__, b->target - b->size, rate, b->rate_alloc);
- for (i = 0; i < goal; i++) {
+ while (!b->reset_required &&
+ b->size + num_pages * vmballoon_page_size(is_2m_pages)
+ < b->target) {
struct page *page;
if (flags == VMW_PAGE_ALLOC_NOSLEEP)
- STATS_INC(b->stats.alloc);
+ STATS_INC(b->stats.alloc[is_2m_pages]);
else
STATS_INC(b->stats.sleep_alloc);
- page = alloc_page(flags);
+ page = vmballoon_alloc_page(flags, is_2m_pages);
if (!page) {
+ STATS_INC(b->stats.alloc_fail[is_2m_pages]);
+
+ if (is_2m_pages) {
+ b->ops->lock(b, num_pages, true, &b->target);
+
+ /*
+ * ignore errors from locking as we now switch
+ * to 4k pages and we might get different
+ * errors.
+ */
+
+ num_pages = 0;
+ is_2m_pages = false;
+ continue;
+ }
+
if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
/*
* CANSLEEP page allocation failed, so guest
@@ -559,7 +875,6 @@ static void vmballoon_inflate(struct vmballoon *b)
STATS_INC(b->stats.sleep_alloc_fail);
break;
}
- STATS_INC(b->stats.alloc_fail);
/*
* NOSLEEP page allocation failed, so the guest is
@@ -571,7 +886,7 @@ static void vmballoon_inflate(struct vmballoon *b)
*/
b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES;
- if (i >= b->rate_alloc)
+ if (allocations >= b->rate_alloc)
break;
flags = VMW_PAGE_ALLOC_CANSLEEP;
@@ -580,34 +895,40 @@ static void vmballoon_inflate(struct vmballoon *b)
continue;
}
- error = vmballoon_lock_page(b, page);
- if (error)
- break;
-
- if (++allocations > VMW_BALLOON_YIELD_THRESHOLD) {
- cond_resched();
- allocations = 0;
+ b->ops->add_page(b, num_pages++, page);
+ if (num_pages == b->batch_max_pages) {
+ error = b->ops->lock(b, num_pages, is_2m_pages,
+ &b->target);
+ num_pages = 0;
+ if (error)
+ break;
}
- if (i >= rate) {
+ cond_resched();
+
+ if (allocations >= rate) {
/* We allocated enough pages, let's take a break. */
break;
}
}
+ if (num_pages > 0)
+ b->ops->lock(b, num_pages, is_2m_pages, &b->target);
+
/*
* We reached our goal without failures so try increasing
* allocation rate.
*/
- if (error == 0 && i >= b->rate_alloc) {
- unsigned int mult = i / b->rate_alloc;
+ if (error == 0 && allocations >= b->rate_alloc) {
+ unsigned int mult = allocations / b->rate_alloc;
b->rate_alloc =
min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC,
VMW_BALLOON_RATE_ALLOC_MAX);
}
- vmballoon_release_refused_pages(b);
+ vmballoon_release_refused_pages(b, true);
+ vmballoon_release_refused_pages(b, false);
}
/*
@@ -615,35 +936,176 @@ static void vmballoon_inflate(struct vmballoon *b)
*/
static void vmballoon_deflate(struct vmballoon *b)
{
- struct page *page, *next;
- unsigned int i = 0;
- unsigned int goal;
- int error;
+ unsigned is_2m_pages;
pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
- /* limit deallocation rate */
- goal = min(b->size - b->target, b->rate_free);
+ /* free pages to reach target */
+ for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
+ is_2m_pages++) {
+ struct page *page, *next;
+ unsigned int num_pages = 0;
+ struct vmballoon_page_size *page_size =
+ &b->page_sizes[is_2m_pages];
+
+ list_for_each_entry_safe(page, next, &page_size->pages, lru) {
+ if (b->reset_required ||
+ (b->target > 0 &&
+ b->size - num_pages
+ * vmballoon_page_size(is_2m_pages)
+ < b->target + vmballoon_page_size(true)))
+ break;
+
+ list_del(&page->lru);
+ b->ops->add_page(b, num_pages++, page);
- pr_debug("%s - goal: %d, rate: %d\n", __func__, goal, b->rate_free);
+ if (num_pages == b->batch_max_pages) {
+ int error;
- /* free pages to reach target */
- list_for_each_entry_safe(page, next, &b->pages, lru) {
- error = vmballoon_release_page(b, page);
- if (error) {
- /* quickly decrease rate in case of error */
- b->rate_free = max(b->rate_free / 2,
- VMW_BALLOON_RATE_FREE_MIN);
- return;
+ error = b->ops->unlock(b, num_pages,
+ is_2m_pages, &b->target);
+ num_pages = 0;
+ if (error)
+ return;
+ }
+
+ cond_resched();
}
- if (++i >= goal)
- break;
+ if (num_pages > 0)
+ b->ops->unlock(b, num_pages, is_2m_pages, &b->target);
+ }
+}
+
+static const struct vmballoon_ops vmballoon_basic_ops = {
+ .add_page = vmballoon_add_page,
+ .lock = vmballoon_lock_page,
+ .unlock = vmballoon_unlock_page
+};
+
+static const struct vmballoon_ops vmballoon_batched_ops = {
+ .add_page = vmballoon_add_batched_page,
+ .lock = vmballoon_lock_batched_page,
+ .unlock = vmballoon_unlock_batched_page
+};
+
+static bool vmballoon_init_batching(struct vmballoon *b)
+{
+ b->page = alloc_page(VMW_PAGE_ALLOC_NOSLEEP);
+ if (!b->page)
+ return false;
+
+ b->batch_page = vmap(&b->page, 1, VM_MAP, PAGE_KERNEL);
+ if (!b->batch_page) {
+ __free_page(b->page);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Receive notification and resize balloon
+ */
+static void vmballoon_doorbell(void *client_data)
+{
+ struct vmballoon *b = client_data;
+
+ STATS_INC(b->stats.doorbell);
+
+ mod_delayed_work(system_freezable_wq, &b->dwork, 0);
+}
+
+/*
+ * Clean up vmci doorbell
+ */
+static void vmballoon_vmci_cleanup(struct vmballoon *b)
+{
+ int error;
+
+ VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID,
+ VMCI_INVALID_ID, error);
+ STATS_INC(b->stats.doorbell_unset);
+
+ if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
+ vmci_doorbell_destroy(b->vmci_doorbell);
+ b->vmci_doorbell = VMCI_INVALID_HANDLE;
+ }
+}
+
+/*
+ * Initialize vmci doorbell, to get notified as soon as balloon changes
+ */
+static int vmballoon_vmci_init(struct vmballoon *b)
+{
+ int error = 0;
+
+ if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) != 0) {
+ error = vmci_doorbell_create(&b->vmci_doorbell,
+ VMCI_FLAG_DELAYED_CB,
+ VMCI_PRIVILEGE_FLAG_RESTRICTED,
+ vmballoon_doorbell, b);
+
+ if (error == VMCI_SUCCESS) {
+ VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET,
+ b->vmci_doorbell.context,
+ b->vmci_doorbell.resource, error);
+ STATS_INC(b->stats.doorbell_set);
+ }
+ }
+
+ if (error != 0) {
+ vmballoon_vmci_cleanup(b);
+
+ return -EIO;
}
- /* slowly increase rate if there were no errors */
- b->rate_free = min(b->rate_free + VMW_BALLOON_RATE_FREE_INC,
- VMW_BALLOON_RATE_FREE_MAX);
+ return 0;
+}
+
+/*
+ * Perform standard reset sequence by popping the balloon (in case it
+ * is not empty) and then restarting protocol. This operation normally
+ * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
+ */
+static void vmballoon_reset(struct vmballoon *b)
+{
+ int error;
+
+ vmballoon_vmci_cleanup(b);
+
+ /* free all pages, skipping monitor unlock */
+ vmballoon_pop(b);
+
+ if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
+ return;
+
+ if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
+ b->ops = &vmballoon_batched_ops;
+ b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
+ if (!vmballoon_init_batching(b)) {
+ /*
+ * We failed to initialize batching, inform the monitor
+ * about it by sending a null capability.
+ *
+ * The guest will retry in one second.
+ */
+ vmballoon_send_start(b, 0);
+ return;
+ }
+ } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
+ b->ops = &vmballoon_basic_ops;
+ b->batch_max_pages = 1;
+ }
+
+ b->reset_required = false;
+
+ error = vmballoon_vmci_init(b);
+ if (error)
+ pr_err("failed to initialize vmci doorbell\n");
+
+ if (!vmballoon_send_guest_id(b))
+ pr_err("failed to send guest ID to the host\n");
}
/*
@@ -664,13 +1126,14 @@ static void vmballoon_work(struct work_struct *work)
if (b->slow_allocation_cycles > 0)
b->slow_allocation_cycles--;
- if (vmballoon_send_get_target(b, &target)) {
+ if (!b->reset_required && vmballoon_send_get_target(b, &target)) {
/* update target, adjust size */
b->target = target;
if (b->size < target)
vmballoon_inflate(b);
- else if (b->size > target)
+ else if (target == 0 ||
+ b->size > target + vmballoon_page_size(true))
vmballoon_deflate(b);
}
@@ -692,6 +1155,14 @@ static int vmballoon_debug_show(struct seq_file *f, void *offset)
struct vmballoon *b = f->private;
struct vmballoon_stats *stats = &b->stats;
+ /* format capabilities info */
+ seq_printf(f,
+ "balloon capabilities: %#4x\n"
+ "used capabilities: %#4lx\n"
+ "is resetting: %c\n",
+ VMW_BALLOON_CAPABILITIES, b->capabilities,
+ b->reset_required ? 'y' : 'n');
+
/* format size info */
seq_printf(f,
"target: %8d pages\n"
@@ -700,35 +1171,48 @@ static int vmballoon_debug_show(struct seq_file *f, void *offset)
/* format rate info */
seq_printf(f,
- "rateNoSleepAlloc: %8d pages/sec\n"
- "rateSleepAlloc: %8d pages/sec\n"
- "rateFree: %8d pages/sec\n",
- VMW_BALLOON_NOSLEEP_ALLOC_MAX,
- b->rate_alloc, b->rate_free);
+ "rateSleepAlloc: %8d pages/sec\n",
+ b->rate_alloc);
seq_printf(f,
"\n"
"timer: %8u\n"
+ "doorbell: %8u\n"
"start: %8u (%4u failed)\n"
"guestType: %8u (%4u failed)\n"
+ "2m-lock: %8u (%4u failed)\n"
"lock: %8u (%4u failed)\n"
+ "2m-unlock: %8u (%4u failed)\n"
"unlock: %8u (%4u failed)\n"
"target: %8u (%4u failed)\n"
+ "prim2mAlloc: %8u (%4u failed)\n"
"primNoSleepAlloc: %8u (%4u failed)\n"
"primCanSleepAlloc: %8u (%4u failed)\n"
+ "prim2mFree: %8u\n"
"primFree: %8u\n"
+ "err2mAlloc: %8u\n"
"errAlloc: %8u\n"
- "errFree: %8u\n",
+ "err2mFree: %8u\n"
+ "errFree: %8u\n"
+ "doorbellSet: %8u\n"
+ "doorbellUnset: %8u\n",
stats->timer,
+ stats->doorbell,
stats->start, stats->start_fail,
stats->guest_type, stats->guest_type_fail,
- stats->lock, stats->lock_fail,
- stats->unlock, stats->unlock_fail,
+ stats->lock[true], stats->lock_fail[true],
+ stats->lock[false], stats->lock_fail[false],
+ stats->unlock[true], stats->unlock_fail[true],
+ stats->unlock[false], stats->unlock_fail[false],
stats->target, stats->target_fail,
- stats->alloc, stats->alloc_fail,
+ stats->alloc[true], stats->alloc_fail[true],
+ stats->alloc[false], stats->alloc_fail[false],
stats->sleep_alloc, stats->sleep_alloc_fail,
- stats->free,
- stats->refused_alloc, stats->refused_free);
+ stats->free[true],
+ stats->free[false],
+ stats->refused_alloc[true], stats->refused_alloc[false],
+ stats->refused_free[true], stats->refused_free[false],
+ stats->doorbell_set, stats->doorbell_unset);
return 0;
}
@@ -782,7 +1266,7 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b)
static int __init vmballoon_init(void)
{
int error;
-
+ unsigned is_2m_pages;
/*
* Check if we are running on VMware's hypervisor and bail out
* if we are not.
@@ -790,32 +1274,26 @@ static int __init vmballoon_init(void)
if (x86_hyper != &x86_hyper_vmware)
return -ENODEV;
- INIT_LIST_HEAD(&balloon.pages);
- INIT_LIST_HEAD(&balloon.refused_pages);
+ for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
+ is_2m_pages++) {
+ INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
+ INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
+ }
/* initialize rates */
balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX;
- balloon.rate_free = VMW_BALLOON_RATE_FREE_MAX;
INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
- /*
- * Start balloon.
- */
- if (!vmballoon_send_start(&balloon)) {
- pr_err("failed to send start command to the host\n");
- return -EIO;
- }
-
- if (!vmballoon_send_guest_id(&balloon)) {
- pr_err("failed to send guest ID to the host\n");
- return -EIO;
- }
-
error = vmballoon_debugfs_init(&balloon);
if (error)
return error;
+ balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
+ balloon.batch_page = NULL;
+ balloon.page = NULL;
+ balloon.reset_required = true;
+
queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
return 0;
@@ -824,6 +1302,7 @@ module_init(vmballoon_init);
static void __exit vmballoon_exit(void)
{
+ vmballoon_vmci_cleanup(&balloon);
cancel_delayed_work_sync(&balloon.dwork);
vmballoon_debugfs_exit(&balloon);
@@ -833,7 +1312,7 @@ static void __exit vmballoon_exit(void)
* Reset connection before deallocating memory to avoid potential for
* additional spurious resets from guest touching deallocated pages.
*/
- vmballoon_send_start(&balloon);
+ vmballoon_send_start(&balloon, 0);
vmballoon_pop(&balloon);
}
module_exit(vmballoon_exit);
diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c
index 822665245588..8a4b6bbe1bee 100644
--- a/drivers/misc/vmw_vmci/vmci_datagram.c
+++ b/drivers/misc/vmw_vmci/vmci_datagram.c
@@ -276,11 +276,10 @@ static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg)
}
/* We make a copy to enqueue. */
- new_dg = kmalloc(dg_size, GFP_KERNEL);
+ new_dg = kmemdup(dg, dg_size, GFP_KERNEL);
if (new_dg == NULL)
return VMCI_ERROR_NO_MEM;
- memcpy(new_dg, dg, dg_size);
retval = vmci_ctx_enqueue_datagram(dg->dst.context, new_dg);
if (retval < VMCI_SUCCESS) {
kfree(new_dg);