summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/common/habanalabs_drv.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/common/habanalabs_drv.c')
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_drv.c82
1 files changed, 63 insertions, 19 deletions
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 37edb69a7255..f733ead605e7 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -54,10 +54,15 @@ MODULE_PARM_DESC(boot_error_status_mask,
#define PCI_IDS_GAUDI 0x1000
#define PCI_IDS_GAUDI_SEC 0x1010
+#define PCI_IDS_GAUDI2 0x1020
+#define PCI_IDS_GAUDI2_SEC 0x1030
+
static const struct pci_device_id ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), },
+ { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), },
+ { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2_SEC), },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, ids);
@@ -84,6 +89,12 @@ static enum hl_asic_type get_asic_type(u16 device)
case PCI_IDS_GAUDI_SEC:
asic_type = ASIC_GAUDI_SEC;
break;
+ case PCI_IDS_GAUDI2:
+ asic_type = ASIC_GAUDI2;
+ break;
+ case PCI_IDS_GAUDI2_SEC:
+ asic_type = ASIC_GAUDI2_SEC;
+ break;
default:
asic_type = ASIC_INVALID;
break;
@@ -96,6 +107,7 @@ static bool is_asic_secured(enum hl_asic_type asic_type)
{
switch (asic_type) {
case ASIC_GAUDI_SEC:
+ case ASIC_GAUDI2_SEC:
return true;
default:
return false;
@@ -137,6 +149,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
mutex_init(&hpriv->notifier_event.lock);
mutex_init(&hpriv->restore_phase_mutex);
+ mutex_init(&hpriv->ctx_lock);
kref_init(&hpriv->refcount);
nonseekable_open(inode, filp);
@@ -152,7 +165,8 @@ int hl_device_open(struct inode *inode, struct file *filp)
"Can't open %s because it is %s\n",
dev_name(hdev->dev), hdev->status[status]);
- if (status == HL_DEVICE_STATUS_IN_RESET)
+ if (status == HL_DEVICE_STATUS_IN_RESET ||
+ status == HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE)
rc = -EAGAIN;
else
rc = -EPERM;
@@ -195,8 +209,9 @@ int hl_device_open(struct inode *inode, struct file *filp)
hl_debugfs_add_file(hpriv);
- atomic_set(&hdev->last_error.cs_timeout.write_disable, 0);
- atomic_set(&hdev->last_error.razwi.write_disable, 0);
+ atomic_set(&hdev->last_error.cs_timeout.write_enable, 1);
+ atomic_set(&hdev->last_error.razwi.write_enable, 1);
+ hdev->last_error.undef_opcode.write_enable = true;
hdev->open_counter++;
hdev->last_successful_open_jif = jiffies;
@@ -209,6 +224,7 @@ out_err:
hl_mem_mgr_fini(&hpriv->mem_mgr);
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
filp->private_data = NULL;
+ mutex_destroy(&hpriv->ctx_lock);
mutex_destroy(&hpriv->restore_phase_mutex);
mutex_destroy(&hpriv->notifier_event.lock);
put_pid(hpriv->taskpid);
@@ -277,43 +293,56 @@ out_err:
static void set_driver_behavior_per_device(struct hl_device *hdev)
{
- hdev->pldm = 0;
+ hdev->nic_ports_mask = 0;
hdev->fw_components = FW_TYPE_ALL_TYPES;
+ hdev->mmu_enable = MMU_EN_ALL;
hdev->cpu_queues_enable = 1;
- hdev->heartbeat = 1;
- hdev->mmu_enable = 1;
- hdev->sram_scrambler_enable = 1;
- hdev->dram_scrambler_enable = 1;
- hdev->bmc_enable = 1;
+ hdev->pldm = 0;
hdev->hard_reset_on_fw_events = 1;
+ hdev->bmc_enable = 1;
hdev->reset_on_preboot_fail = 1;
- hdev->reset_if_device_not_idle = 1;
-
- hdev->reset_pcilink = 0;
- hdev->axi_drain = 0;
+ hdev->heartbeat = 1;
}
static void copy_kernel_module_params_to_device(struct hl_device *hdev)
{
+ hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);
+
hdev->major = hl_major;
hdev->memory_scrub = memory_scrub;
hdev->reset_on_lockup = reset_on_lockup;
hdev->boot_error_status_mask = boot_error_status_mask;
+}
- if (timeout_locked)
- hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
- else
- hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
+static void fixup_device_params_per_asic(struct hl_device *hdev)
+{
+ switch (hdev->asic_type) {
+ case ASIC_GOYA:
+ case ASIC_GAUDI:
+ case ASIC_GAUDI_SEC:
+ hdev->reset_upon_device_release = 0;
+ break;
+ default:
+ hdev->reset_upon_device_release = 1;
+ break;
+ }
}
static int fixup_device_params(struct hl_device *hdev)
{
- hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);
+ int tmp_timeout;
+
+ tmp_timeout = timeout_locked;
hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
+ if (tmp_timeout)
+ hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * 1000);
+ else
+ hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
+
hdev->stop_on_err = true;
hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
@@ -321,6 +350,18 @@ static int fixup_device_params(struct hl_device *hdev)
/* Enable only after the initialization of the device */
hdev->disabled = true;
+ if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) &&
+ (hdev->fw_components & ~FW_TYPE_PREBOOT_CPU)) {
+ pr_err("Preboot must be set along with other components");
+ return -EINVAL;
+ }
+
+ /* If CPU queues not enabled, no way to do heartbeat */
+ if (!hdev->cpu_queues_enable)
+ hdev->heartbeat = 0;
+
+ fixup_device_params_per_asic(hdev);
+
return 0;
}
@@ -345,7 +386,7 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
if (!hdev)
return -ENOMEM;
- /* can be NULL in case of simulator device */
+ /* Will be NULL in case of simulator device */
hdev->pdev = pdev;
/* Assign status description string */
@@ -355,6 +396,9 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
"in device creation", HL_STR_MAX);
+ strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
+ "in reset after device release", HL_STR_MAX);
+
/* First, we must find out which ASIC are we handling. This is needed
* to configure the behavior of the driver (kernel parameters)