diff options
Diffstat (limited to 'drivers/gpu/drm/panfrost')
21 files changed, 2558 insertions, 1008 deletions
diff --git a/drivers/gpu/drm/panfrost/Makefile b/drivers/gpu/drm/panfrost/Makefile index ecf0864cb515..b71935862417 100644 --- a/drivers/gpu/drm/panfrost/Makefile +++ b/drivers/gpu/drm/panfrost/Makefile @@ -5,6 +5,7 @@ panfrost-y := \ panfrost_device.o \ panfrost_devfreq.o \ panfrost_gem.o \ + panfrost_gem_shrinker.o \ panfrost_gpu.o \ panfrost_job.o \ panfrost_mmu.o \ diff --git a/drivers/gpu/drm/panfrost/TODO b/drivers/gpu/drm/panfrost/TODO index c2e44add37d8..8c811a9e683b 100644 --- a/drivers/gpu/drm/panfrost/TODO +++ b/drivers/gpu/drm/panfrost/TODO @@ -6,22 +6,9 @@ - Bifrost specific feature and issue handling - Coherent DMA support -- Support for 2MB pages. The io-pgtable code already supports this. Finishing - support involves either copying or adapting the iommu API to handle passing - aligned addresses and sizes to the io-pgtable code. - -- Per FD address space support. The h/w supports multiple addresses spaces. - The hard part is handling when more address spaces are needed than what - the h/w provides. - -- Support pinning pages on demand (GPU page faults). - - Support userspace controlled GPU virtual addresses. Needed for Vulkan. (Tomeu) -- Support for madvise and a shrinker. - - Compute job support. So called 'compute only' jobs need to be plumbed up to userspace. -- Performance counter support. (Boris) - +- Support core dump on job failure diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c index db798532b0b6..194af7f607a6 100644 --- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c +++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c @@ -1,221 +1,224 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright 2019 Collabora ltd. */ + +#include <linux/clk.h> #include <linux/devfreq.h> +#include <linux/devfreq_cooling.h> #include <linux/platform_device.h> #include <linux/pm_opp.h> -#include <linux/clk.h> -#include <linux/regulator/consumer.h> #include "panfrost_device.h" #include "panfrost_devfreq.h" -#include "panfrost_features.h" -#include "panfrost_issues.h" -#include "panfrost_gpu.h" -#include "panfrost_regs.h" -static void panfrost_devfreq_update_utilization(struct panfrost_device *pfdev, int slot); +static void panfrost_devfreq_update_utilization(struct panfrost_devfreq *pfdevfreq) +{ + ktime_t now, last; + + now = ktime_get(); + last = pfdevfreq->time_last_update; + + if (pfdevfreq->busy_count > 0) + pfdevfreq->busy_time += ktime_sub(now, last); + else + pfdevfreq->idle_time += ktime_sub(now, last); + + pfdevfreq->time_last_update = now; +} static int panfrost_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) { - struct panfrost_device *pfdev = platform_get_drvdata(to_platform_device(dev)); struct dev_pm_opp *opp; - unsigned long old_clk_rate = pfdev->devfreq.cur_freq; - unsigned long target_volt, target_rate; - int err; opp = devfreq_recommended_opp(dev, freq, flags); if (IS_ERR(opp)) return PTR_ERR(opp); - - target_rate = dev_pm_opp_get_freq(opp); - target_volt = dev_pm_opp_get_voltage(opp); dev_pm_opp_put(opp); - if (old_clk_rate == target_rate) - return 0; - - /* - * If frequency scaling from low to high, adjust voltage first. - * If frequency scaling from high to low, adjust frequency first. - */ - if (old_clk_rate < target_rate) { - err = regulator_set_voltage(pfdev->regulator, target_volt, - target_volt); - if (err) { - dev_err(dev, "Cannot set voltage %lu uV\n", - target_volt); - return err; - } - } - - err = clk_set_rate(pfdev->clock, target_rate); - if (err) { - dev_err(dev, "Cannot set frequency %lu (%d)\n", target_rate, - err); - regulator_set_voltage(pfdev->regulator, pfdev->devfreq.cur_volt, - pfdev->devfreq.cur_volt); - return err; - } - - if (old_clk_rate > target_rate) { - err = regulator_set_voltage(pfdev->regulator, target_volt, - target_volt); - if (err) - dev_err(dev, "Cannot set voltage %lu uV\n", target_volt); - } - - pfdev->devfreq.cur_freq = target_rate; - pfdev->devfreq.cur_volt = target_volt; - - return 0; + return dev_pm_opp_set_rate(dev, *freq); } -static void panfrost_devfreq_reset(struct panfrost_device *pfdev) +static void panfrost_devfreq_reset(struct panfrost_devfreq *pfdevfreq) { - ktime_t now = ktime_get(); - int i; - - for (i = 0; i < NUM_JOB_SLOTS; i++) { - pfdev->devfreq.slot[i].busy_time = 0; - pfdev->devfreq.slot[i].idle_time = 0; - pfdev->devfreq.slot[i].time_last_update = now; - } + pfdevfreq->busy_time = 0; + pfdevfreq->idle_time = 0; + pfdevfreq->time_last_update = ktime_get(); } static int panfrost_devfreq_get_dev_status(struct device *dev, struct devfreq_dev_status *status) { - struct panfrost_device *pfdev = platform_get_drvdata(to_platform_device(dev)); - int i; - - for (i = 0; i < NUM_JOB_SLOTS; i++) { - panfrost_devfreq_update_utilization(pfdev, i); - } + struct panfrost_device *pfdev = dev_get_drvdata(dev); + struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq; + unsigned long irqflags; status->current_frequency = clk_get_rate(pfdev->clock); - status->total_time = ktime_to_ns(ktime_add(pfdev->devfreq.slot[0].busy_time, - pfdev->devfreq.slot[0].idle_time)); - status->busy_time = 0; - for (i = 0; i < NUM_JOB_SLOTS; i++) { - status->busy_time += ktime_to_ns(pfdev->devfreq.slot[i].busy_time); - } + spin_lock_irqsave(&pfdevfreq->lock, irqflags); - /* We're scheduling only to one core atm, so don't divide for now */ - /* status->busy_time /= NUM_JOB_SLOTS; */ + panfrost_devfreq_update_utilization(pfdevfreq); - panfrost_devfreq_reset(pfdev); + status->total_time = ktime_to_ns(ktime_add(pfdevfreq->busy_time, + pfdevfreq->idle_time)); - dev_dbg(pfdev->dev, "busy %lu total %lu %lu %% freq %lu MHz\n", status->busy_time, - status->total_time, - status->busy_time / (status->total_time / 100), - status->current_frequency / 1000 / 1000); + status->busy_time = ktime_to_ns(pfdevfreq->busy_time); - return 0; -} + panfrost_devfreq_reset(pfdevfreq); -static int panfrost_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) -{ - struct panfrost_device *pfdev = platform_get_drvdata(to_platform_device(dev)); + spin_unlock_irqrestore(&pfdevfreq->lock, irqflags); - *freq = pfdev->devfreq.cur_freq; + dev_dbg(pfdev->dev, "busy %lu total %lu %lu %% freq %lu MHz\n", + status->busy_time, status->total_time, + status->busy_time / (status->total_time / 100), + status->current_frequency / 1000 / 1000); return 0; } static struct devfreq_dev_profile panfrost_devfreq_profile = { + .timer = DEVFREQ_TIMER_DELAYED, .polling_ms = 50, /* ~3 frames */ .target = panfrost_devfreq_target, .get_dev_status = panfrost_devfreq_get_dev_status, - .get_cur_freq = panfrost_devfreq_get_cur_freq, }; int panfrost_devfreq_init(struct panfrost_device *pfdev) { int ret; struct dev_pm_opp *opp; - - if (!pfdev->regulator) + unsigned long cur_freq; + struct device *dev = &pfdev->pdev->dev; + struct devfreq *devfreq; + struct thermal_cooling_device *cooling; + struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq; + + if (pfdev->comp->num_supplies > 1) { + /* + * GPUs with more than 1 supply require platform-specific handling: + * continue without devfreq + */ + DRM_DEV_INFO(dev, "More than 1 supply is not supported yet\n"); return 0; + } - ret = dev_pm_opp_of_add_table(&pfdev->pdev->dev); - if (ret == -ENODEV) /* Optional, continue without devfreq */ - return 0; - else if (ret) + ret = devm_pm_opp_set_regulators(dev, pfdev->comp->supply_names, + pfdev->comp->num_supplies); + if (ret) { + /* Continue if the optional regulator is missing */ + if (ret != -ENODEV) { + if (ret != -EPROBE_DEFER) + DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n"); + return ret; + } + } + + ret = devm_pm_opp_of_add_table(dev); + if (ret) { + /* Optional, continue without devfreq */ + if (ret == -ENODEV) + ret = 0; return ret; + } + pfdevfreq->opp_of_table_added = true; + + spin_lock_init(&pfdevfreq->lock); - panfrost_devfreq_reset(pfdev); + panfrost_devfreq_reset(pfdevfreq); - pfdev->devfreq.cur_freq = clk_get_rate(pfdev->clock); + cur_freq = clk_get_rate(pfdev->clock); - opp = devfreq_recommended_opp(&pfdev->pdev->dev, &pfdev->devfreq.cur_freq, 0); + opp = devfreq_recommended_opp(dev, &cur_freq, 0); if (IS_ERR(opp)) return PTR_ERR(opp); - panfrost_devfreq_profile.initial_freq = pfdev->devfreq.cur_freq; + panfrost_devfreq_profile.initial_freq = cur_freq; dev_pm_opp_put(opp); - pfdev->devfreq.devfreq = devm_devfreq_add_device(&pfdev->pdev->dev, - &panfrost_devfreq_profile, "simple_ondemand", NULL); - if (IS_ERR(pfdev->devfreq.devfreq)) { - DRM_DEV_ERROR(&pfdev->pdev->dev, "Couldn't initialize GPU devfreq\n"); - ret = PTR_ERR(pfdev->devfreq.devfreq); - pfdev->devfreq.devfreq = NULL; - return ret; + /* + * Setup default thresholds for the simple_ondemand governor. + * The values are chosen based on experiments. + */ + pfdevfreq->gov_data.upthreshold = 45; + pfdevfreq->gov_data.downdifferential = 5; + + devfreq = devm_devfreq_add_device(dev, &panfrost_devfreq_profile, + DEVFREQ_GOV_SIMPLE_ONDEMAND, + &pfdevfreq->gov_data); + if (IS_ERR(devfreq)) { + DRM_DEV_ERROR(dev, "Couldn't initialize GPU devfreq\n"); + return PTR_ERR(devfreq); } + pfdevfreq->devfreq = devfreq; + + cooling = devfreq_cooling_em_register(devfreq, NULL); + if (IS_ERR(cooling)) + DRM_DEV_INFO(dev, "Failed to register cooling device\n"); + else + pfdevfreq->cooling = cooling; return 0; } +void panfrost_devfreq_fini(struct panfrost_device *pfdev) +{ + struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq; + + if (pfdevfreq->cooling) { + devfreq_cooling_unregister(pfdevfreq->cooling); + pfdevfreq->cooling = NULL; + } +} + void panfrost_devfreq_resume(struct panfrost_device *pfdev) { - int i; + struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq; - if (!pfdev->devfreq.devfreq) + if (!pfdevfreq->devfreq) return; - panfrost_devfreq_reset(pfdev); - for (i = 0; i < NUM_JOB_SLOTS; i++) - pfdev->devfreq.slot[i].busy = false; + panfrost_devfreq_reset(pfdevfreq); - devfreq_resume_device(pfdev->devfreq.devfreq); + devfreq_resume_device(pfdevfreq->devfreq); } void panfrost_devfreq_suspend(struct panfrost_device *pfdev) { - if (!pfdev->devfreq.devfreq) + struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq; + + if (!pfdevfreq->devfreq) return; - devfreq_suspend_device(pfdev->devfreq.devfreq); + devfreq_suspend_device(pfdevfreq->devfreq); } -static void panfrost_devfreq_update_utilization(struct panfrost_device *pfdev, int slot) +void panfrost_devfreq_record_busy(struct panfrost_devfreq *pfdevfreq) { - struct panfrost_devfreq_slot *devfreq_slot = &pfdev->devfreq.slot[slot]; - ktime_t now; - ktime_t last; + unsigned long irqflags; - if (!pfdev->devfreq.devfreq) + if (!pfdevfreq->devfreq) return; - now = ktime_get(); - last = pfdev->devfreq.slot[slot].time_last_update; + spin_lock_irqsave(&pfdevfreq->lock, irqflags); - /* If we last recorded a transition to busy, we have been idle since */ - if (devfreq_slot->busy) - pfdev->devfreq.slot[slot].busy_time += ktime_sub(now, last); - else - pfdev->devfreq.slot[slot].idle_time += ktime_sub(now, last); + panfrost_devfreq_update_utilization(pfdevfreq); + + pfdevfreq->busy_count++; - pfdev->devfreq.slot[slot].time_last_update = now; + spin_unlock_irqrestore(&pfdevfreq->lock, irqflags); } -/* The job scheduler is expected to call this at every transition busy <-> idle */ -void panfrost_devfreq_record_transition(struct panfrost_device *pfdev, int slot) +void panfrost_devfreq_record_idle(struct panfrost_devfreq *pfdevfreq) { - struct panfrost_devfreq_slot *devfreq_slot = &pfdev->devfreq.slot[slot]; + unsigned long irqflags; + + if (!pfdevfreq->devfreq) + return; + + spin_lock_irqsave(&pfdevfreq->lock, irqflags); + + panfrost_devfreq_update_utilization(pfdevfreq); + + WARN_ON(--pfdevfreq->busy_count < 0); - panfrost_devfreq_update_utilization(pfdev, slot); - devfreq_slot->busy = !devfreq_slot->busy; + spin_unlock_irqrestore(&pfdevfreq->lock, irqflags); } diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.h b/drivers/gpu/drm/panfrost/panfrost_devfreq.h index eb999531ed90..1514c1f9d91c 100644 --- a/drivers/gpu/drm/panfrost/panfrost_devfreq.h +++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.h @@ -4,11 +4,39 @@ #ifndef __PANFROST_DEVFREQ_H__ #define __PANFROST_DEVFREQ_H__ +#include <linux/devfreq.h> +#include <linux/spinlock.h> +#include <linux/ktime.h> + +struct devfreq; +struct thermal_cooling_device; + +struct panfrost_device; + +struct panfrost_devfreq { + struct devfreq *devfreq; + struct thermal_cooling_device *cooling; + struct devfreq_simple_ondemand_data gov_data; + bool opp_of_table_added; + + ktime_t busy_time; + ktime_t idle_time; + ktime_t time_last_update; + int busy_count; + /* + * Protect busy_time, idle_time, time_last_update and busy_count + * because these can be updated concurrently between multiple jobs. + */ + spinlock_t lock; +}; + int panfrost_devfreq_init(struct panfrost_device *pfdev); +void panfrost_devfreq_fini(struct panfrost_device *pfdev); void panfrost_devfreq_resume(struct panfrost_device *pfdev); void panfrost_devfreq_suspend(struct panfrost_device *pfdev); -void panfrost_devfreq_record_transition(struct panfrost_device *pfdev, int slot); +void panfrost_devfreq_record_busy(struct panfrost_devfreq *devfreq); +void panfrost_devfreq_record_idle(struct panfrost_devfreq *devfreq); #endif /* __PANFROST_DEVFREQ_H__ */ diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c index 8a111d7c0200..7f51a4682ccb 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.c +++ b/drivers/gpu/drm/panfrost/panfrost_device.c @@ -5,7 +5,7 @@ #include <linux/clk.h> #include <linux/reset.h> #include <linux/platform_device.h> -#include <linux/pm_runtime.h> +#include <linux/pm_domain.h> #include <linux/regulator/consumer.h> #include "panfrost_device.h" @@ -18,19 +18,13 @@ static int panfrost_reset_init(struct panfrost_device *pfdev) { - int err; - - pfdev->rstc = devm_reset_control_array_get(pfdev->dev, false, true); + pfdev->rstc = devm_reset_control_array_get_optional_exclusive(pfdev->dev); if (IS_ERR(pfdev->rstc)) { dev_err(pfdev->dev, "get reset failed %ld\n", PTR_ERR(pfdev->rstc)); return PTR_ERR(pfdev->rstc); } - err = reset_control_deassert(pfdev->rstc); - if (err) - return err; - - return 0; + return reset_control_deassert(pfdev->rstc); } static void panfrost_reset_fini(struct panfrost_device *pfdev) @@ -60,7 +54,8 @@ static int panfrost_clk_init(struct panfrost_device *pfdev) if (IS_ERR(pfdev->bus_clock)) { dev_err(pfdev->dev, "get bus_clock failed %ld\n", PTR_ERR(pfdev->bus_clock)); - return PTR_ERR(pfdev->bus_clock); + err = PTR_ERR(pfdev->bus_clock); + goto disable_clock; } if (pfdev->bus_clock) { @@ -88,21 +83,31 @@ static void panfrost_clk_fini(struct panfrost_device *pfdev) static int panfrost_regulator_init(struct panfrost_device *pfdev) { - int ret; - - pfdev->regulator = devm_regulator_get_optional(pfdev->dev, "mali"); - if (IS_ERR(pfdev->regulator)) { - ret = PTR_ERR(pfdev->regulator); - pfdev->regulator = NULL; - if (ret == -ENODEV) - return 0; - dev_err(pfdev->dev, "failed to get regulator: %d\n", ret); + int ret, i; + + pfdev->regulators = devm_kcalloc(pfdev->dev, pfdev->comp->num_supplies, + sizeof(*pfdev->regulators), + GFP_KERNEL); + if (!pfdev->regulators) + return -ENOMEM; + + for (i = 0; i < pfdev->comp->num_supplies; i++) + pfdev->regulators[i].supply = pfdev->comp->supply_names[i]; + + ret = devm_regulator_bulk_get(pfdev->dev, + pfdev->comp->num_supplies, + pfdev->regulators); + if (ret < 0) { + if (ret != -EPROBE_DEFER) + dev_err(pfdev->dev, "failed to get regulators: %d\n", + ret); return ret; } - ret = regulator_enable(pfdev->regulator); + ret = regulator_bulk_enable(pfdev->comp->num_supplies, + pfdev->regulators); if (ret < 0) { - dev_err(pfdev->dev, "failed to enable regulator: %d\n", ret); + dev_err(pfdev->dev, "failed to enable regulators: %d\n", ret); return ret; } @@ -111,20 +116,94 @@ static int panfrost_regulator_init(struct panfrost_device *pfdev) static void panfrost_regulator_fini(struct panfrost_device *pfdev) { - if (pfdev->regulator) - regulator_disable(pfdev->regulator); + if (!pfdev->regulators) + return; + + regulator_bulk_disable(pfdev->comp->num_supplies, pfdev->regulators); +} + +static void panfrost_pm_domain_fini(struct panfrost_device *pfdev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pfdev->pm_domain_devs); i++) { + if (!pfdev->pm_domain_devs[i]) + break; + + if (pfdev->pm_domain_links[i]) + device_link_del(pfdev->pm_domain_links[i]); + + dev_pm_domain_detach(pfdev->pm_domain_devs[i], true); + } +} + +static int panfrost_pm_domain_init(struct panfrost_device *pfdev) +{ + int err; + int i, num_domains; + + num_domains = of_count_phandle_with_args(pfdev->dev->of_node, + "power-domains", + "#power-domain-cells"); + + /* + * Single domain is handled by the core, and, if only a single power + * the power domain is requested, the property is optional. + */ + if (num_domains < 2 && pfdev->comp->num_pm_domains < 2) + return 0; + + if (num_domains != pfdev->comp->num_pm_domains) { + dev_err(pfdev->dev, + "Incorrect number of power domains: %d provided, %d needed\n", + num_domains, pfdev->comp->num_pm_domains); + return -EINVAL; + } + + if (WARN(num_domains > ARRAY_SIZE(pfdev->pm_domain_devs), + "Too many supplies in compatible structure.\n")) + return -EINVAL; + + for (i = 0; i < num_domains; i++) { + pfdev->pm_domain_devs[i] = + dev_pm_domain_attach_by_name(pfdev->dev, + pfdev->comp->pm_domain_names[i]); + if (IS_ERR_OR_NULL(pfdev->pm_domain_devs[i])) { + err = PTR_ERR(pfdev->pm_domain_devs[i]) ? : -ENODATA; + pfdev->pm_domain_devs[i] = NULL; + dev_err(pfdev->dev, + "failed to get pm-domain %s(%d): %d\n", + pfdev->comp->pm_domain_names[i], i, err); + goto err; + } + + pfdev->pm_domain_links[i] = device_link_add(pfdev->dev, + pfdev->pm_domain_devs[i], DL_FLAG_PM_RUNTIME | + DL_FLAG_STATELESS | DL_FLAG_RPM_ACTIVE); + if (!pfdev->pm_domain_links[i]) { + dev_err(pfdev->pm_domain_devs[i], + "adding device link failed!\n"); + err = -ENODEV; + goto err; + } + } + + return 0; + +err: + panfrost_pm_domain_fini(pfdev); + return err; } int panfrost_device_init(struct panfrost_device *pfdev) { int err; - struct resource *res; mutex_init(&pfdev->sched_lock); - mutex_init(&pfdev->reset_lock); INIT_LIST_HEAD(&pfdev->scheduled_jobs); + INIT_LIST_HEAD(&pfdev->as_lru_list); - spin_lock_init(&pfdev->hwaccess_lock); + spin_lock_init(&pfdev->as_lock); err = panfrost_clk_init(pfdev); if (err) { @@ -132,62 +211,68 @@ int panfrost_device_init(struct panfrost_device *pfdev) return err; } - err = panfrost_regulator_init(pfdev); + err = panfrost_devfreq_init(pfdev); if (err) { - dev_err(pfdev->dev, "regulator init failed %d\n", err); - goto err_out0; + if (err != -EPROBE_DEFER) + dev_err(pfdev->dev, "devfreq init failed %d\n", err); + goto out_clk; + } + + /* OPP will handle regulators */ + if (!pfdev->pfdevfreq.opp_of_table_added) { + err = panfrost_regulator_init(pfdev); + if (err) + goto out_devfreq; } err = panfrost_reset_init(pfdev); if (err) { dev_err(pfdev->dev, "reset init failed %d\n", err); - goto err_out1; + goto out_regulator; } - res = platform_get_resource(pfdev->pdev, IORESOURCE_MEM, 0); - pfdev->iomem = devm_ioremap_resource(pfdev->dev, res); + err = panfrost_pm_domain_init(pfdev); + if (err) + goto out_reset; + + pfdev->iomem = devm_platform_ioremap_resource(pfdev->pdev, 0); if (IS_ERR(pfdev->iomem)) { - dev_err(pfdev->dev, "failed to ioremap iomem\n"); err = PTR_ERR(pfdev->iomem); - goto err_out2; + goto out_pm_domain; } err = panfrost_gpu_init(pfdev); if (err) - goto err_out2; + goto out_pm_domain; err = panfrost_mmu_init(pfdev); if (err) - goto err_out3; + goto out_gpu; err = panfrost_job_init(pfdev); if (err) - goto err_out4; - - /* runtime PM will wake us up later */ - panfrost_gpu_power_off(pfdev); - - pm_runtime_set_active(pfdev->dev); - pm_runtime_get_sync(pfdev->dev); - pm_runtime_mark_last_busy(pfdev->dev); - pm_runtime_put_autosuspend(pfdev->dev); + goto out_mmu; err = panfrost_perfcnt_init(pfdev); if (err) - goto err_out5; + goto out_job; return 0; -err_out5: +out_job: panfrost_job_fini(pfdev); -err_out4: +out_mmu: panfrost_mmu_fini(pfdev); -err_out3: +out_gpu: panfrost_gpu_fini(pfdev); -err_out2: +out_pm_domain: + panfrost_pm_domain_fini(pfdev); +out_reset: panfrost_reset_fini(pfdev); -err_out1: +out_regulator: panfrost_regulator_fini(pfdev); -err_out0: +out_devfreq: + panfrost_devfreq_fini(pfdev); +out_clk: panfrost_clk_fini(pfdev); return err; } @@ -198,74 +283,124 @@ void panfrost_device_fini(struct panfrost_device *pfdev) panfrost_job_fini(pfdev); panfrost_mmu_fini(pfdev); panfrost_gpu_fini(pfdev); + panfrost_pm_domain_fini(pfdev); panfrost_reset_fini(pfdev); + panfrost_devfreq_fini(pfdev); panfrost_regulator_fini(pfdev); panfrost_clk_fini(pfdev); } -const char *panfrost_exception_name(struct panfrost_device *pfdev, u32 exception_code) -{ - switch (exception_code) { - /* Non-Fault Status code */ - case 0x00: return "NOT_STARTED/IDLE/OK"; - case 0x01: return "DONE"; - case 0x02: return "INTERRUPTED"; - case 0x03: return "STOPPED"; - case 0x04: return "TERMINATED"; - case 0x08: return "ACTIVE"; - /* Job exceptions */ - case 0x40: return "JOB_CONFIG_FAULT"; - case 0x41: return "JOB_POWER_FAULT"; - case 0x42: return "JOB_READ_FAULT"; - case 0x43: return "JOB_WRITE_FAULT"; - case 0x44: return "JOB_AFFINITY_FAULT"; - case 0x48: return "JOB_BUS_FAULT"; - case 0x50: return "INSTR_INVALID_PC"; - case 0x51: return "INSTR_INVALID_ENC"; - case 0x52: return "INSTR_TYPE_MISMATCH"; - case 0x53: return "INSTR_OPERAND_FAULT"; - case 0x54: return "INSTR_TLS_FAULT"; - case 0x55: return "INSTR_BARRIER_FAULT"; - case 0x56: return "INSTR_ALIGN_FAULT"; - case 0x58: return "DATA_INVALID_FAULT"; - case 0x59: return "TILE_RANGE_FAULT"; - case 0x5A: return "ADDR_RANGE_FAULT"; - case 0x60: return "OUT_OF_MEMORY"; - /* GPU exceptions */ - case 0x80: return "DELAYED_BUS_FAULT"; - case 0x88: return "SHAREABILITY_FAULT"; - /* MMU exceptions */ - case 0xC1: return "TRANSLATION_FAULT_LEVEL1"; - case 0xC2: return "TRANSLATION_FAULT_LEVEL2"; - case 0xC3: return "TRANSLATION_FAULT_LEVEL3"; - case 0xC4: return "TRANSLATION_FAULT_LEVEL4"; - case 0xC8: return "PERMISSION_FAULT"; - case 0xC9 ... 0xCF: return "PERMISSION_FAULT"; - case 0xD1: return "TRANSTAB_BUS_FAULT_LEVEL1"; - case 0xD2: return "TRANSTAB_BUS_FAULT_LEVEL2"; - case 0xD3: return "TRANSTAB_BUS_FAULT_LEVEL3"; - case 0xD4: return "TRANSTAB_BUS_FAULT_LEVEL4"; - case 0xD8: return "ACCESS_FLAG"; - case 0xD9 ... 0xDF: return "ACCESS_FLAG"; - case 0xE0 ... 0xE7: return "ADDRESS_SIZE_FAULT"; - case 0xE8 ... 0xEF: return "MEMORY_ATTRIBUTES_FAULT"; +#define PANFROST_EXCEPTION(id) \ + [DRM_PANFROST_EXCEPTION_ ## id] = { \ + .name = #id, \ } - return "UNKNOWN"; +struct panfrost_exception_info { + const char *name; +}; + +static const struct panfrost_exception_info panfrost_exception_infos[] = { + PANFROST_EXCEPTION(OK), + PANFROST_EXCEPTION(DONE), + PANFROST_EXCEPTION(INTERRUPTED), + PANFROST_EXCEPTION(STOPPED), + PANFROST_EXCEPTION(TERMINATED), + PANFROST_EXCEPTION(KABOOM), + PANFROST_EXCEPTION(EUREKA), + PANFROST_EXCEPTION(ACTIVE), + PANFROST_EXCEPTION(JOB_CONFIG_FAULT), + PANFROST_EXCEPTION(JOB_POWER_FAULT), + PANFROST_EXCEPTION(JOB_READ_FAULT), + PANFROST_EXCEPTION(JOB_WRITE_FAULT), + PANFROST_EXCEPTION(JOB_AFFINITY_FAULT), + PANFROST_EXCEPTION(JOB_BUS_FAULT), + PANFROST_EXCEPTION(INSTR_INVALID_PC), + PANFROST_EXCEPTION(INSTR_INVALID_ENC), + PANFROST_EXCEPTION(INSTR_TYPE_MISMATCH), + PANFROST_EXCEPTION(INSTR_OPERAND_FAULT), + PANFROST_EXCEPTION(INSTR_TLS_FAULT), + PANFROST_EXCEPTION(INSTR_BARRIER_FAULT), + PANFROST_EXCEPTION(INSTR_ALIGN_FAULT), + PANFROST_EXCEPTION(DATA_INVALID_FAULT), + PANFROST_EXCEPTION(TILE_RANGE_FAULT), + PANFROST_EXCEPTION(ADDR_RANGE_FAULT), + PANFROST_EXCEPTION(IMPRECISE_FAULT), + PANFROST_EXCEPTION(OOM), + PANFROST_EXCEPTION(OOM_AFBC), + PANFROST_EXCEPTION(UNKNOWN), + PANFROST_EXCEPTION(DELAYED_BUS_FAULT), + PANFROST_EXCEPTION(GPU_SHAREABILITY_FAULT), + PANFROST_EXCEPTION(SYS_SHAREABILITY_FAULT), + PANFROST_EXCEPTION(GPU_CACHEABILITY_FAULT), + PANFROST_EXCEPTION(TRANSLATION_FAULT_0), + PANFROST_EXCEPTION(TRANSLATION_FAULT_1), + PANFROST_EXCEPTION(TRANSLATION_FAULT_2), + PANFROST_EXCEPTION(TRANSLATION_FAULT_3), + PANFROST_EXCEPTION(TRANSLATION_FAULT_4), + PANFROST_EXCEPTION(TRANSLATION_FAULT_IDENTITY), + PANFROST_EXCEPTION(PERM_FAULT_0), + PANFROST_EXCEPTION(PERM_FAULT_1), + PANFROST_EXCEPTION(PERM_FAULT_2), + PANFROST_EXCEPTION(PERM_FAULT_3), + PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_0), + PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_1), + PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_2), + PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_3), + PANFROST_EXCEPTION(ACCESS_FLAG_0), + PANFROST_EXCEPTION(ACCESS_FLAG_1), + PANFROST_EXCEPTION(ACCESS_FLAG_2), + PANFROST_EXCEPTION(ACCESS_FLAG_3), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN0), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN1), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN2), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN3), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT0), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT1), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT2), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT3), + PANFROST_EXCEPTION(MEM_ATTR_FAULT_0), + PANFROST_EXCEPTION(MEM_ATTR_FAULT_1), + PANFROST_EXCEPTION(MEM_ATTR_FAULT_2), + PANFROST_EXCEPTION(MEM_ATTR_FAULT_3), + PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_0), + PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_1), + PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_2), + PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_3), +}; + +const char *panfrost_exception_name(u32 exception_code) +{ + if (WARN_ON(exception_code >= ARRAY_SIZE(panfrost_exception_infos) || + !panfrost_exception_infos[exception_code].name)) + return "Unknown exception type"; + + return panfrost_exception_infos[exception_code].name; } -#ifdef CONFIG_PM -int panfrost_device_resume(struct device *dev) +bool panfrost_exception_needs_reset(const struct panfrost_device *pfdev, + u32 exception_code) { - struct platform_device *pdev = to_platform_device(dev); - struct panfrost_device *pfdev = platform_get_drvdata(pdev); + /* Right now, none of the GPU we support need a reset, but this + * might change. + */ + return false; +} +void panfrost_device_reset(struct panfrost_device *pfdev) +{ panfrost_gpu_soft_reset(pfdev); - /* TODO: Re-enable all other address spaces */ panfrost_gpu_power_on(pfdev); - panfrost_mmu_enable(pfdev, 0); + panfrost_mmu_reset(pfdev); panfrost_job_enable_interrupts(pfdev); +} + +#ifdef CONFIG_PM +int panfrost_device_resume(struct device *dev) +{ + struct panfrost_device *pfdev = dev_get_drvdata(dev); + + panfrost_device_reset(pfdev); panfrost_devfreq_resume(pfdev); return 0; @@ -273,8 +408,7 @@ int panfrost_device_resume(struct device *dev) int panfrost_device_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct panfrost_device *pfdev = platform_get_drvdata(pdev); + struct panfrost_device *pfdev = dev_get_drvdata(dev); if (!panfrost_job_is_idle(pfdev)) return -EBUSY; diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h index 83cc01cafde1..8b25278f34c8 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.h +++ b/drivers/gpu/drm/panfrost/panfrost_device.h @@ -5,11 +5,16 @@ #ifndef __PANFROST_DEVICE_H__ #define __PANFROST_DEVICE_H__ +#include <linux/atomic.h> +#include <linux/io-pgtable.h> +#include <linux/regulator/consumer.h> #include <linux/spinlock.h> #include <drm/drm_device.h> #include <drm/drm_mm.h> #include <drm/gpu_scheduler.h> +#include "panfrost_devfreq.h" + struct panfrost_device; struct panfrost_mmu; struct panfrost_job_slot; @@ -17,6 +22,7 @@ struct panfrost_job; struct panfrost_perfcnt; #define NUM_JOB_SLOTS 3 +#define MAX_PM_DOMAINS 3 struct panfrost_features { u16 id; @@ -39,20 +45,35 @@ struct panfrost_features { u32 thread_max_workgroup_sz; u32 thread_max_barrier_sz; u32 coherency_features; + u32 afbc_features; u32 texture_features[4]; u32 js_features[16]; u32 nr_core_groups; + u32 thread_tls_alloc; unsigned long hw_features[64 / BITS_PER_LONG]; unsigned long hw_issues[64 / BITS_PER_LONG]; }; -struct panfrost_devfreq_slot { - ktime_t busy_time; - ktime_t idle_time; - ktime_t time_last_update; - bool busy; +/* + * Features that cannot be automatically detected and need matching using the + * compatible string, typically SoC-specific. + */ +struct panfrost_compatible { + /* Supplies count and names. */ + int num_supplies; + const char * const *supply_names; + /* + * Number of power domains required, note that values 0 and 1 are + * handled identically, as only values > 1 need special handling. + */ + int num_pm_domains; + /* Only required if num_pm_domains > 1. */ + const char * const *pm_domain_names; + + /* Vendor implementation quirks callback */ + void (*vendor_quirk)(struct panfrost_device *pfdev); }; struct panfrost_device { @@ -60,43 +81,65 @@ struct panfrost_device { struct drm_device *ddev; struct platform_device *pdev; - spinlock_t hwaccess_lock; - - struct drm_mm mm; - spinlock_t mm_lock; - void __iomem *iomem; struct clk *clock; struct clk *bus_clock; - struct regulator *regulator; + struct regulator_bulk_data *regulators; struct reset_control *rstc; + /* pm_domains for devices with more than one. */ + struct device *pm_domain_devs[MAX_PM_DOMAINS]; + struct device_link *pm_domain_links[MAX_PM_DOMAINS]; + bool coherent; struct panfrost_features features; + const struct panfrost_compatible *comp; + + spinlock_t as_lock; + unsigned long as_in_use_mask; + unsigned long as_alloc_mask; + unsigned long as_faulty_mask; + struct list_head as_lru_list; - struct panfrost_mmu *mmu; struct panfrost_job_slot *js; - struct panfrost_job *jobs[NUM_JOB_SLOTS]; + struct panfrost_job *jobs[NUM_JOB_SLOTS][2]; struct list_head scheduled_jobs; struct panfrost_perfcnt *perfcnt; struct mutex sched_lock; - struct mutex reset_lock; struct { - struct devfreq *devfreq; - struct thermal_cooling_device *cooling; - unsigned long cur_freq; - unsigned long cur_volt; - struct panfrost_devfreq_slot slot[NUM_JOB_SLOTS]; - } devfreq; + struct workqueue_struct *wq; + struct work_struct work; + atomic_t pending; + } reset; + + struct mutex shrinker_lock; + struct list_head shrinker_list; + struct shrinker shrinker; + + struct panfrost_devfreq pfdevfreq; +}; + +struct panfrost_mmu { + struct panfrost_device *pfdev; + struct kref refcount; + struct io_pgtable_cfg pgtbl_cfg; + struct io_pgtable_ops *pgtbl_ops; + struct drm_mm mm; + spinlock_t mm_lock; + int as; + atomic_t as_count; + struct list_head list; }; struct panfrost_file_priv { struct panfrost_device *pfdev; struct drm_sched_entity sched_entity[NUM_JOB_SLOTS]; + + struct panfrost_mmu *mmu; }; static inline struct panfrost_device *to_panfrost_device(struct drm_device *ddev) @@ -127,10 +170,96 @@ int panfrost_unstable_ioctl_check(void); int panfrost_device_init(struct panfrost_device *pfdev); void panfrost_device_fini(struct panfrost_device *pfdev); +void panfrost_device_reset(struct panfrost_device *pfdev); int panfrost_device_resume(struct device *dev); int panfrost_device_suspend(struct device *dev); -const char *panfrost_exception_name(struct panfrost_device *pfdev, u32 exception_code); +enum drm_panfrost_exception_type { + DRM_PANFROST_EXCEPTION_OK = 0x00, + DRM_PANFROST_EXCEPTION_DONE = 0x01, + DRM_PANFROST_EXCEPTION_INTERRUPTED = 0x02, + DRM_PANFROST_EXCEPTION_STOPPED = 0x03, + DRM_PANFROST_EXCEPTION_TERMINATED = 0x04, + DRM_PANFROST_EXCEPTION_KABOOM = 0x05, + DRM_PANFROST_EXCEPTION_EUREKA = 0x06, + DRM_PANFROST_EXCEPTION_ACTIVE = 0x08, + DRM_PANFROST_EXCEPTION_MAX_NON_FAULT = 0x3f, + DRM_PANFROST_EXCEPTION_JOB_CONFIG_FAULT = 0x40, + DRM_PANFROST_EXCEPTION_JOB_POWER_FAULT = 0x41, + DRM_PANFROST_EXCEPTION_JOB_READ_FAULT = 0x42, + DRM_PANFROST_EXCEPTION_JOB_WRITE_FAULT = 0x43, + DRM_PANFROST_EXCEPTION_JOB_AFFINITY_FAULT = 0x44, + DRM_PANFROST_EXCEPTION_JOB_BUS_FAULT = 0x48, + DRM_PANFROST_EXCEPTION_INSTR_INVALID_PC = 0x50, + DRM_PANFROST_EXCEPTION_INSTR_INVALID_ENC = 0x51, + DRM_PANFROST_EXCEPTION_INSTR_TYPE_MISMATCH = 0x52, + DRM_PANFROST_EXCEPTION_INSTR_OPERAND_FAULT = 0x53, + DRM_PANFROST_EXCEPTION_INSTR_TLS_FAULT = 0x54, + DRM_PANFROST_EXCEPTION_INSTR_BARRIER_FAULT = 0x55, + DRM_PANFROST_EXCEPTION_INSTR_ALIGN_FAULT = 0x56, + DRM_PANFROST_EXCEPTION_DATA_INVALID_FAULT = 0x58, + DRM_PANFROST_EXCEPTION_TILE_RANGE_FAULT = 0x59, + DRM_PANFROST_EXCEPTION_ADDR_RANGE_FAULT = 0x5a, + DRM_PANFROST_EXCEPTION_IMPRECISE_FAULT = 0x5b, + DRM_PANFROST_EXCEPTION_OOM = 0x60, + DRM_PANFROST_EXCEPTION_OOM_AFBC = 0x61, + DRM_PANFROST_EXCEPTION_UNKNOWN = 0x7f, + DRM_PANFROST_EXCEPTION_DELAYED_BUS_FAULT = 0x80, + DRM_PANFROST_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88, + DRM_PANFROST_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89, + DRM_PANFROST_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a, + DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0, + DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1, + DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2, + DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3, + DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4, + DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_IDENTITY = 0xc7, + DRM_PANFROST_EXCEPTION_PERM_FAULT_0 = 0xc8, + DRM_PANFROST_EXCEPTION_PERM_FAULT_1 = 0xc9, + DRM_PANFROST_EXCEPTION_PERM_FAULT_2 = 0xca, + DRM_PANFROST_EXCEPTION_PERM_FAULT_3 = 0xcb, + DRM_PANFROST_EXCEPTION_TRANSTAB_BUS_FAULT_0 = 0xd0, + DRM_PANFROST_EXCEPTION_TRANSTAB_BUS_FAULT_1 = 0xd1, + DRM_PANFROST_EXCEPTION_TRANSTAB_BUS_FAULT_2 = 0xd2, + DRM_PANFROST_EXCEPTION_TRANSTAB_BUS_FAULT_3 = 0xd3, + DRM_PANFROST_EXCEPTION_ACCESS_FLAG_0 = 0xd8, + DRM_PANFROST_EXCEPTION_ACCESS_FLAG_1 = 0xd9, + DRM_PANFROST_EXCEPTION_ACCESS_FLAG_2 = 0xda, + DRM_PANFROST_EXCEPTION_ACCESS_FLAG_3 = 0xdb, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_IN0 = 0xe0, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_IN1 = 0xe1, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_IN2 = 0xe2, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_IN3 = 0xe3, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7, + DRM_PANFROST_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8, + DRM_PANFROST_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9, + DRM_PANFROST_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea, + DRM_PANFROST_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb, + DRM_PANFROST_EXCEPTION_MEM_ATTR_NONCACHE_0 = 0xec, + DRM_PANFROST_EXCEPTION_MEM_ATTR_NONCACHE_1 = 0xed, + DRM_PANFROST_EXCEPTION_MEM_ATTR_NONCACHE_2 = 0xee, + DRM_PANFROST_EXCEPTION_MEM_ATTR_NONCACHE_3 = 0xef, +}; + +static inline bool +panfrost_exception_is_fault(u32 exception_code) +{ + return exception_code > DRM_PANFROST_EXCEPTION_MAX_NON_FAULT; +} + +const char *panfrost_exception_name(u32 exception_code); +bool panfrost_exception_needs_reset(const struct panfrost_device *pfdev, + u32 exception_code); + +static inline void +panfrost_device_schedule_reset(struct panfrost_device *pfdev) +{ + atomic_set(&pfdev->reset.pending, 1); + queue_work(pfdev->reset.wq, &pfdev->reset.work); +} #endif diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 85b4b51b6a0d..087e69b98d06 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -14,7 +14,6 @@ #include <drm/drm_utils.h> #include "panfrost_device.h" -#include "panfrost_devfreq.h" #include "panfrost_gem.h" #include "panfrost_mmu.h" #include "panfrost_job.h" @@ -32,10 +31,43 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct if (param->pad != 0) return -EINVAL; +#define PANFROST_FEATURE(name, member) \ + case DRM_PANFROST_PARAM_ ## name: \ + param->value = pfdev->features.member; \ + break +#define PANFROST_FEATURE_ARRAY(name, member, max) \ + case DRM_PANFROST_PARAM_ ## name ## 0 ... \ + DRM_PANFROST_PARAM_ ## name ## max: \ + param->value = pfdev->features.member[param->param - \ + DRM_PANFROST_PARAM_ ## name ## 0]; \ + break + switch (param->param) { - case DRM_PANFROST_PARAM_GPU_PROD_ID: - param->value = pfdev->features.id; - break; + PANFROST_FEATURE(GPU_PROD_ID, id); + PANFROST_FEATURE(GPU_REVISION, revision); + PANFROST_FEATURE(SHADER_PRESENT, shader_present); + PANFROST_FEATURE(TILER_PRESENT, tiler_present); + PANFROST_FEATURE(L2_PRESENT, l2_present); + PANFROST_FEATURE(STACK_PRESENT, stack_present); + PANFROST_FEATURE(AS_PRESENT, as_present); + PANFROST_FEATURE(JS_PRESENT, js_present); + PANFROST_FEATURE(L2_FEATURES, l2_features); + PANFROST_FEATURE(CORE_FEATURES, core_features); + PANFROST_FEATURE(TILER_FEATURES, tiler_features); + PANFROST_FEATURE(MEM_FEATURES, mem_features); + PANFROST_FEATURE(MMU_FEATURES, mmu_features); + PANFROST_FEATURE(THREAD_FEATURES, thread_features); + PANFROST_FEATURE(MAX_THREADS, max_threads); + PANFROST_FEATURE(THREAD_MAX_WORKGROUP_SZ, + thread_max_workgroup_sz); + PANFROST_FEATURE(THREAD_MAX_BARRIER_SZ, + thread_max_barrier_sz); + PANFROST_FEATURE(COHERENCY_FEATURES, coherency_features); + PANFROST_FEATURE(AFBC_FEATURES, afbc_features); + PANFROST_FEATURE_ARRAY(TEXTURE_FEATURES, texture_features, 3); + PANFROST_FEATURE_ARRAY(JS_FEATURES, js_features, 15); + PANFROST_FEATURE(NR_CORE_GROUPS, nr_core_groups); + PANFROST_FEATURE(THREAD_TLS_ALLOC, thread_tls_alloc); default: return -EINVAL; } @@ -46,29 +78,35 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *file) { - int ret; - struct drm_gem_shmem_object *shmem; + struct panfrost_file_priv *priv = file->driver_priv; + struct panfrost_gem_object *bo; struct drm_panfrost_create_bo *args = data; + struct panfrost_gem_mapping *mapping; - if (!args->size || args->flags || args->pad) + if (!args->size || args->pad || + (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP))) return -EINVAL; - shmem = drm_gem_shmem_create_with_handle(file, dev, args->size, - &args->handle); - if (IS_ERR(shmem)) - return PTR_ERR(shmem); + /* Heaps should never be executable */ + if ((args->flags & PANFROST_BO_HEAP) && + !(args->flags & PANFROST_BO_NOEXEC)) + return -EINVAL; - ret = panfrost_mmu_map(to_panfrost_bo(&shmem->base)); - if (ret) - goto err_free; + bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags, + &args->handle); + if (IS_ERR(bo)) + return PTR_ERR(bo); - args->offset = to_panfrost_bo(&shmem->base)->node.start << PAGE_SHIFT; + mapping = panfrost_gem_mapping_get(bo, priv); + if (!mapping) { + drm_gem_object_put(&bo->base.base); + return -EINVAL; + } - return 0; + args->offset = mapping->mmnode.start << PAGE_SHIFT; + panfrost_gem_mapping_put(mapping); -err_free: - drm_gem_handle_delete(file, args->handle); - return ret; + return 0; } /** @@ -90,24 +128,47 @@ panfrost_lookup_bos(struct drm_device *dev, struct drm_panfrost_submit *args, struct panfrost_job *job) { + struct panfrost_file_priv *priv = file_priv->driver_priv; + struct panfrost_gem_object *bo; + unsigned int i; + int ret; + job->bo_count = args->bo_handle_count; if (!job->bo_count) return 0; - job->implicit_fences = kvmalloc_array(job->bo_count, - sizeof(struct dma_fence *), - GFP_KERNEL | __GFP_ZERO); - if (!job->implicit_fences) + ret = drm_gem_objects_lookup(file_priv, + (void __user *)(uintptr_t)args->bo_handles, + job->bo_count, &job->bos); + if (ret) + return ret; + + job->mappings = kvmalloc_array(job->bo_count, + sizeof(struct panfrost_gem_mapping *), + GFP_KERNEL | __GFP_ZERO); + if (!job->mappings) return -ENOMEM; - return drm_gem_objects_lookup(file_priv, - (void __user *)(uintptr_t)args->bo_handles, - job->bo_count, &job->bos); + for (i = 0; i < job->bo_count; i++) { + struct panfrost_gem_mapping *mapping; + + bo = to_panfrost_bo(job->bos[i]); + mapping = panfrost_gem_mapping_get(bo, priv); + if (!mapping) { + ret = -EINVAL; + break; + } + + atomic_inc(&bo->gpu_usecount); + job->mappings[i] = mapping; + } + + return ret; } /** - * panfrost_copy_in_sync() - Sets up job->in_fences[] with the sync objects + * panfrost_copy_in_sync() - Sets up job->deps with the sync objects * referenced by the job. * @dev: DRM device * @file_priv: DRM file for this fd @@ -127,22 +188,14 @@ panfrost_copy_in_sync(struct drm_device *dev, { u32 *handles; int ret = 0; - int i; + int i, in_fence_count; - job->in_fence_count = args->in_sync_count; + in_fence_count = args->in_sync_count; - if (!job->in_fence_count) + if (!in_fence_count) return 0; - job->in_fences = kvmalloc_array(job->in_fence_count, - sizeof(struct dma_fence *), - GFP_KERNEL | __GFP_ZERO); - if (!job->in_fences) { - DRM_DEBUG("Failed to allocate job in fences\n"); - return -ENOMEM; - } - - handles = kvmalloc_array(job->in_fence_count, sizeof(u32), GFP_KERNEL); + handles = kvmalloc_array(in_fence_count, sizeof(u32), GFP_KERNEL); if (!handles) { ret = -ENOMEM; DRM_DEBUG("Failed to allocate incoming syncobj handles\n"); @@ -151,16 +204,23 @@ panfrost_copy_in_sync(struct drm_device *dev, if (copy_from_user(handles, (void __user *)(uintptr_t)args->in_syncs, - job->in_fence_count * sizeof(u32))) { + in_fence_count * sizeof(u32))) { ret = -EFAULT; DRM_DEBUG("Failed to copy in syncobj handles\n"); goto fail; } - for (i = 0; i < job->in_fence_count; i++) { + for (i = 0; i < in_fence_count; i++) { + struct dma_fence *fence; + ret = drm_syncobj_find_fence(file_priv, handles[i], 0, 0, - &job->in_fences[i]); - if (ret == -EINVAL) + &fence); + if (ret) + goto fail; + + ret = drm_sched_job_add_dependency(&job->base, fence); + + if (ret) goto fail; } @@ -173,10 +233,11 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file) { struct panfrost_device *pfdev = dev->dev_private; + struct panfrost_file_priv *file_priv = file->driver_priv; struct drm_panfrost_submit *args = data; struct drm_syncobj *sync_out = NULL; struct panfrost_job *job; - int ret = 0; + int ret = 0, slot; if (!args->jc) return -EINVAL; @@ -193,7 +254,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data, job = kzalloc(sizeof(*job), GFP_KERNEL); if (!job) { ret = -ENOMEM; - goto fail_out_sync; + goto out_put_syncout; } kref_init(&job->refcount); @@ -202,27 +263,38 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data, job->jc = args->jc; job->requirements = args->requirements; job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); - job->file_priv = file->driver_priv; + job->mmu = file_priv->mmu; + + slot = panfrost_job_get_slot(job); + + ret = drm_sched_job_init(&job->base, + &file_priv->sched_entity[slot], + NULL); + if (ret) + goto out_put_job; ret = panfrost_copy_in_sync(dev, file, args, job); if (ret) - goto fail_job; + goto out_cleanup_job; ret = panfrost_lookup_bos(dev, file, args, job); if (ret) - goto fail_job; + goto out_cleanup_job; ret = panfrost_job_push(job); if (ret) - goto fail_job; + goto out_cleanup_job; /* Update the return sync object for the job */ if (sync_out) drm_syncobj_replace_fence(sync_out, job->render_done_fence); -fail_job: +out_cleanup_job: + if (ret) + drm_sched_job_cleanup(&job->base); +out_put_job: panfrost_job_put(job); -fail_out_sync: +out_put_syncout: if (sync_out) drm_syncobj_put(sync_out); @@ -245,12 +317,12 @@ panfrost_ioctl_wait_bo(struct drm_device *dev, void *data, if (!gem_obj) return -ENOENT; - ret = reservation_object_wait_timeout_rcu(gem_obj->resv, true, - true, timeout); + ret = dma_resv_wait_timeout(gem_obj->resv, DMA_RESV_USAGE_READ, + true, timeout); if (!ret) ret = timeout ? -ETIMEDOUT : -EBUSY; - drm_gem_object_put_unlocked(gem_obj); + drm_gem_object_put(gem_obj); return ret; } @@ -273,18 +345,27 @@ static int panfrost_ioctl_mmap_bo(struct drm_device *dev, void *data, return -ENOENT; } + /* Don't allow mmapping of heap objects as pages are not pinned. */ + if (to_panfrost_bo(gem_obj)->is_heap) { + ret = -EINVAL; + goto out; + } + ret = drm_gem_create_mmap_offset(gem_obj); if (ret == 0) args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); - drm_gem_object_put_unlocked(gem_obj); +out: + drm_gem_object_put(gem_obj); return ret; } static int panfrost_ioctl_get_bo_offset(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct panfrost_file_priv *priv = file_priv->driver_priv; struct drm_panfrost_get_bo_offset *args = data; + struct panfrost_gem_mapping *mapping; struct drm_gem_object *gem_obj; struct panfrost_gem_object *bo; @@ -295,12 +376,77 @@ static int panfrost_ioctl_get_bo_offset(struct drm_device *dev, void *data, } bo = to_panfrost_bo(gem_obj); - args->offset = bo->node.start << PAGE_SHIFT; + mapping = panfrost_gem_mapping_get(bo, priv); + drm_gem_object_put(gem_obj); + + if (!mapping) + return -EINVAL; - drm_gem_object_put_unlocked(gem_obj); + args->offset = mapping->mmnode.start << PAGE_SHIFT; + panfrost_gem_mapping_put(mapping); return 0; } +static int panfrost_ioctl_madvise(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct panfrost_file_priv *priv = file_priv->driver_priv; + struct drm_panfrost_madvise *args = data; + struct panfrost_device *pfdev = dev->dev_private; + struct drm_gem_object *gem_obj; + struct panfrost_gem_object *bo; + int ret = 0; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + + bo = to_panfrost_bo(gem_obj); + + mutex_lock(&pfdev->shrinker_lock); + mutex_lock(&bo->mappings.lock); + if (args->madv == PANFROST_MADV_DONTNEED) { + struct panfrost_gem_mapping *first; + + first = list_first_entry(&bo->mappings.list, + struct panfrost_gem_mapping, + node); + + /* + * If we want to mark the BO purgeable, there must be only one + * user: the caller FD. + * We could do something smarter and mark the BO purgeable only + * when all its users have marked it purgeable, but globally + * visible/shared BOs are likely to never be marked purgeable + * anyway, so let's not bother. + */ + if (!list_is_singular(&bo->mappings.list) || + WARN_ON_ONCE(first->mmu != priv->mmu)) { + ret = -EINVAL; + goto out_unlock_mappings; + } + } + + args->retained = drm_gem_shmem_madvise(&bo->base, args->madv); + + if (args->retained) { + if (args->madv == PANFROST_MADV_DONTNEED) + list_add_tail(&bo->base.madv_list, + &pfdev->shrinker_list); + else if (args->madv == PANFROST_MADV_WILLNEED) + list_del_init(&bo->base.madv_list); + } + +out_unlock_mappings: + mutex_unlock(&bo->mappings.lock); + mutex_unlock(&pfdev->shrinker_lock); + + drm_gem_object_put(gem_obj); + return ret; +} + int panfrost_unstable_ioctl_check(void) { if (!unstable_ioctls) @@ -312,6 +458,7 @@ int panfrost_unstable_ioctl_check(void) static int panfrost_open(struct drm_device *dev, struct drm_file *file) { + int ret; struct panfrost_device *pfdev = dev->dev_private; struct panfrost_file_priv *panfrost_priv; @@ -322,7 +469,23 @@ panfrost_open(struct drm_device *dev, struct drm_file *file) panfrost_priv->pfdev = pfdev; file->driver_priv = panfrost_priv; - return panfrost_job_open(panfrost_priv); + panfrost_priv->mmu = panfrost_mmu_ctx_create(pfdev); + if (IS_ERR(panfrost_priv->mmu)) { + ret = PTR_ERR(panfrost_priv->mmu); + goto err_free; + } + + ret = panfrost_job_open(panfrost_priv); + if (ret) + goto err_job; + + return 0; + +err_job: + panfrost_mmu_ctx_put(panfrost_priv->mmu); +err_free: + kfree(panfrost_priv); + return ret; } static void @@ -330,21 +493,18 @@ panfrost_postclose(struct drm_device *dev, struct drm_file *file) { struct panfrost_file_priv *panfrost_priv = file->driver_priv; - panfrost_perfcnt_close(panfrost_priv); + panfrost_perfcnt_close(file); panfrost_job_close(panfrost_priv); + panfrost_mmu_ctx_put(panfrost_priv->mmu); kfree(panfrost_priv); } -/* DRM_AUTH is required on SUBMIT for now, while all clients share a single - * address space. Note that render nodes would be able to submit jobs that - * could access BOs from clients authenticated with the master node. - */ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { #define PANFROST_IOCTL(n, func, flags) \ DRM_IOCTL_DEF_DRV(PANFROST_##n, panfrost_ioctl_##func, flags) - PANFROST_IOCTL(SUBMIT, submit, DRM_RENDER_ALLOW | DRM_AUTH), + PANFROST_IOCTL(SUBMIT, submit, DRM_RENDER_ALLOW), PANFROST_IOCTL(WAIT_BO, wait_bo, DRM_RENDER_ALLOW), PANFROST_IOCTL(CREATE_BO, create_bo, DRM_RENDER_ALLOW), PANFROST_IOCTL(MMAP_BO, mmap_bo, DRM_RENDER_ALLOW), @@ -352,13 +512,19 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { PANFROST_IOCTL(GET_BO_OFFSET, get_bo_offset, DRM_RENDER_ALLOW), PANFROST_IOCTL(PERFCNT_ENABLE, perfcnt_enable, DRM_RENDER_ALLOW), PANFROST_IOCTL(PERFCNT_DUMP, perfcnt_dump, DRM_RENDER_ALLOW), + PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW), }; -DEFINE_DRM_GEM_SHMEM_FOPS(panfrost_drm_driver_fops); +DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops); -static struct drm_driver panfrost_drm_driver = { - .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_PRIME | - DRIVER_SYNCOBJ, +/* + * Panfrost driver version: + * - 1.0 - initial interface + * - 1.1 - adds HEAP and NOEXEC flags for CREATE_BO + * - 1.2 - adds AFBC_FEATURES query + */ +static const struct drm_driver panfrost_drm_driver = { + .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ, .open = panfrost_open, .postclose = panfrost_postclose, .ioctls = panfrost_drm_driver_ioctls, @@ -368,7 +534,7 @@ static struct drm_driver panfrost_drm_driver = { .desc = "panfrost DRM", .date = "20180908", .major = 1, - .minor = 0, + .minor = 2, .gem_create_object = panfrost_gem_create_object, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, @@ -392,7 +558,13 @@ static int panfrost_probe(struct platform_device *pdev) platform_set_drvdata(pdev, pfdev); - /* Allocate and initialze the DRM device. */ + pfdev->comp = of_device_get_match_data(&pdev->dev); + if (!pfdev->comp) + return -ENODEV; + + pfdev->coherent = device_get_dma_attr(&pdev->dev) == DEV_DMA_COHERENT; + + /* Allocate and initialize the DRM device. */ ddev = drm_dev_alloc(&panfrost_drm_driver, &pdev->dev); if (IS_ERR(ddev)) return PTR_ERR(ddev); @@ -400,14 +572,8 @@ static int panfrost_probe(struct platform_device *pdev) ddev->dev_private = pfdev; pfdev->ddev = ddev; - spin_lock_init(&pfdev->mm_lock); - - /* 4G enough for now. can be 48-bit */ - drm_mm_init(&pfdev->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT); - - pm_runtime_use_autosuspend(pfdev->dev); - pm_runtime_set_autosuspend_delay(pfdev->dev, 50); /* ~3 frames */ - pm_runtime_enable(pfdev->dev); + mutex_init(&pfdev->shrinker_lock); + INIT_LIST_HEAD(&pfdev->shrinker_list); err = panfrost_device_init(pfdev); if (err) { @@ -416,12 +582,11 @@ static int panfrost_probe(struct platform_device *pdev) goto err_out0; } - err = panfrost_devfreq_init(pfdev); - if (err) { - if (err != -EPROBE_DEFER) - dev_err(&pdev->dev, "Fatal error during devfreq init\n"); - goto err_out1; - } + pm_runtime_set_active(pfdev->dev); + pm_runtime_mark_last_busy(pfdev->dev); + pm_runtime_enable(pfdev->dev); + pm_runtime_set_autosuspend_delay(pfdev->dev, 50); /* ~3 frames */ + pm_runtime_use_autosuspend(pfdev->dev); /* * Register the DRM device with the core and the connectors with @@ -431,12 +596,15 @@ static int panfrost_probe(struct platform_device *pdev) if (err < 0) goto err_out1; + panfrost_gem_shrinker_init(ddev); + return 0; err_out1: + pm_runtime_disable(pfdev->dev); panfrost_device_fini(pfdev); + pm_runtime_set_suspended(pfdev->dev); err_out0: - pm_runtime_disable(pfdev->dev); drm_dev_put(ddev); return err; } @@ -447,24 +615,57 @@ static int panfrost_remove(struct platform_device *pdev) struct drm_device *ddev = pfdev->ddev; drm_dev_unregister(ddev); + panfrost_gem_shrinker_cleanup(ddev); + pm_runtime_get_sync(pfdev->dev); - pm_runtime_put_sync_autosuspend(pfdev->dev); pm_runtime_disable(pfdev->dev); panfrost_device_fini(pfdev); + pm_runtime_set_suspended(pfdev->dev); + drm_dev_put(ddev); return 0; } +static const char * const default_supplies[] = { "mali" }; +static const struct panfrost_compatible default_data = { + .num_supplies = ARRAY_SIZE(default_supplies), + .supply_names = default_supplies, + .num_pm_domains = 1, /* optional */ + .pm_domain_names = NULL, +}; + +static const struct panfrost_compatible amlogic_data = { + .num_supplies = ARRAY_SIZE(default_supplies), + .supply_names = default_supplies, + .vendor_quirk = panfrost_gpu_amlogic_quirk, +}; + +static const char * const mediatek_mt8183_supplies[] = { "mali", "sram" }; +static const char * const mediatek_mt8183_pm_domains[] = { "core0", "core1", "core2" }; +static const struct panfrost_compatible mediatek_mt8183_data = { + .num_supplies = ARRAY_SIZE(mediatek_mt8183_supplies), + .supply_names = mediatek_mt8183_supplies, + .num_pm_domains = ARRAY_SIZE(mediatek_mt8183_pm_domains), + .pm_domain_names = mediatek_mt8183_pm_domains, +}; + static const struct of_device_id dt_match[] = { - { .compatible = "arm,mali-t604" }, - { .compatible = "arm,mali-t624" }, - { .compatible = "arm,mali-t628" }, - { .compatible = "arm,mali-t720" }, - { .compatible = "arm,mali-t760" }, - { .compatible = "arm,mali-t820" }, - { .compatible = "arm,mali-t830" }, - { .compatible = "arm,mali-t860" }, - { .compatible = "arm,mali-t880" }, + /* Set first to probe before the generic compatibles */ + { .compatible = "amlogic,meson-gxm-mali", + .data = &amlogic_data, }, + { .compatible = "amlogic,meson-g12a-mali", + .data = &amlogic_data, }, + { .compatible = "arm,mali-t604", .data = &default_data, }, + { .compatible = "arm,mali-t624", .data = &default_data, }, + { .compatible = "arm,mali-t628", .data = &default_data, }, + { .compatible = "arm,mali-t720", .data = &default_data, }, + { .compatible = "arm,mali-t760", .data = &default_data, }, + { .compatible = "arm,mali-t820", .data = &default_data, }, + { .compatible = "arm,mali-t830", .data = &default_data, }, + { .compatible = "arm,mali-t860", .data = &default_data, }, + { .compatible = "arm,mali-t880", .data = &default_data, }, + { .compatible = "arm,mali-bifrost", .data = &default_data, }, + { .compatible = "mediatek,mt8183-mali", .data = &mediatek_mt8183_data }, {} }; MODULE_DEVICE_TABLE(of, dt_match); diff --git a/drivers/gpu/drm/panfrost/panfrost_features.h b/drivers/gpu/drm/panfrost/panfrost_features.h index 5056777c7744..36fadcf9634e 100644 --- a/drivers/gpu/drm/panfrost/panfrost_features.h +++ b/drivers/gpu/drm/panfrost/panfrost_features.h @@ -12,24 +12,6 @@ enum panfrost_hw_feature { HW_FEATURE_JOBCHAIN_DISAMBIGUATION, HW_FEATURE_PWRON_DURING_PWROFF_TRANS, HW_FEATURE_XAFFINITY, - HW_FEATURE_OUT_OF_ORDER_EXEC, - HW_FEATURE_MRT, - HW_FEATURE_BRNDOUT_CC, - HW_FEATURE_INTERPIPE_REG_ALIASING, - HW_FEATURE_LD_ST_TILEBUFFER, - HW_FEATURE_MSAA_16X, - HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - HW_FEATURE_OPTIMIZED_COVERAGE_MASK, - HW_FEATURE_T7XX_PAIRING_RULES, - HW_FEATURE_LD_ST_LEA_TEX, - HW_FEATURE_LINEAR_FILTER_FLOAT, - HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4, - HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS, - HW_FEATURE_TEST4_DATUM_MODE, - HW_FEATURE_NEXT_INSTRUCTION_TYPE, - HW_FEATURE_BRNDOUT_KILL, - HW_FEATURE_WARPING, HW_FEATURE_V4, HW_FEATURE_FLUSH_REDUCTION, HW_FEATURE_PROTECTED_MODE, @@ -38,132 +20,36 @@ enum panfrost_hw_feature { HW_FEATURE_AARCH64_MMU, HW_FEATURE_TLS_HASHING, HW_FEATURE_THREAD_GROUP_SPLIT, + HW_FEATURE_IDVS_GROUP_SIZE, HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG, }; #define hw_features_t600 (\ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ BIT_ULL(HW_FEATURE_V4)) -#define hw_features_t620 (\ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ - BIT_ULL(HW_FEATURE_V4)) - -#define hw_features_t720 (\ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_OPTIMIZED_COVERAGE_MASK) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ - BIT_ULL(HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_V4)) +#define hw_features_t620 hw_features_t600 +#define hw_features_t720 hw_features_t600 #define hw_features_t760 (\ BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) -// T860 -#define hw_features_t860 (\ - BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ - BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ - BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ - BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) +#define hw_features_t860 hw_features_t760 -#define hw_features_t880 hw_features_t860 +#define hw_features_t880 hw_features_t760 -#define hw_features_t830 (\ - BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ - BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ - BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ - BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) +#define hw_features_t830 hw_features_t760 -#define hw_features_t820 (\ - BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ - BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ - BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ - BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT)) +#define hw_features_t820 hw_features_t760 #define hw_features_g71 (\ BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ @@ -173,96 +59,29 @@ enum panfrost_hw_feature { BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ BIT_ULL(HW_FEATURE_COHERENCY_REG)) -#define hw_features_g51 (\ - BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ - BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ - BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ - BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ - BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ - BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ - BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ - BIT_ULL(HW_FEATURE_COHERENCY_REG)) +#define hw_features_g51 hw_features_g72 #define hw_features_g52 (\ BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ BIT_ULL(HW_FEATURE_PROTECTED_DEBUG_MODE) | \ + BIT_ULL(HW_FEATURE_IDVS_GROUP_SIZE) | \ BIT_ULL(HW_FEATURE_COHERENCY_REG)) #define hw_features_g76 (\ BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ @@ -270,27 +89,13 @@ enum panfrost_hw_feature { BIT_ULL(HW_FEATURE_COHERENCY_REG) | \ BIT_ULL(HW_FEATURE_AARCH64_MMU) | \ BIT_ULL(HW_FEATURE_TLS_HASHING) | \ + BIT_ULL(HW_FEATURE_IDVS_GROUP_SIZE) | \ BIT_ULL(HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG)) #define hw_features_g31 (\ BIT_ULL(HW_FEATURE_JOBCHAIN_DISAMBIGUATION) | \ BIT_ULL(HW_FEATURE_PWRON_DURING_PWROFF_TRANS) | \ BIT_ULL(HW_FEATURE_XAFFINITY) | \ - BIT_ULL(HW_FEATURE_WARPING) | \ - BIT_ULL(HW_FEATURE_INTERPIPE_REG_ALIASING) | \ - BIT_ULL(HW_FEATURE_32_BIT_UNIFORM_ADDRESS) | \ - BIT_ULL(HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_CC) | \ - BIT_ULL(HW_FEATURE_BRNDOUT_KILL) | \ - BIT_ULL(HW_FEATURE_LD_ST_LEA_TEX) | \ - BIT_ULL(HW_FEATURE_LD_ST_TILEBUFFER) | \ - BIT_ULL(HW_FEATURE_LINEAR_FILTER_FLOAT) | \ - BIT_ULL(HW_FEATURE_MRT) | \ - BIT_ULL(HW_FEATURE_MSAA_16X) | \ - BIT_ULL(HW_FEATURE_NEXT_INSTRUCTION_TYPE) | \ - BIT_ULL(HW_FEATURE_OUT_OF_ORDER_EXEC) | \ - BIT_ULL(HW_FEATURE_T7XX_PAIRING_RULES) | \ - BIT_ULL(HW_FEATURE_TEST4_DATUM_MODE) | \ BIT_ULL(HW_FEATURE_THREAD_GROUP_SPLIT) | \ BIT_ULL(HW_FEATURE_FLUSH_REDUCTION) | \ BIT_ULL(HW_FEATURE_PROTECTED_MODE) | \ diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index b46416be5a54..293e799e2fe8 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -19,24 +19,193 @@ static void panfrost_gem_free_object(struct drm_gem_object *obj) struct panfrost_gem_object *bo = to_panfrost_bo(obj); struct panfrost_device *pfdev = obj->dev->dev_private; - if (bo->is_mapped) - panfrost_mmu_unmap(bo); + /* + * Make sure the BO is no longer inserted in the shrinker list before + * taking care of the destruction itself. If we don't do that we have a + * race condition between this function and what's done in + * panfrost_gem_shrinker_scan(). + */ + mutex_lock(&pfdev->shrinker_lock); + list_del_init(&bo->base.madv_list); + mutex_unlock(&pfdev->shrinker_lock); - spin_lock(&pfdev->mm_lock); - drm_mm_remove_node(&bo->node); - spin_unlock(&pfdev->mm_lock); + /* + * If we still have mappings attached to the BO, there's a problem in + * our refcounting. + */ + WARN_ON_ONCE(!list_empty(&bo->mappings.list)); - drm_gem_shmem_free_object(obj); + if (bo->sgts) { + int i; + int n_sgt = bo->base.base.size / SZ_2M; + + for (i = 0; i < n_sgt; i++) { + if (bo->sgts[i].sgl) { + dma_unmap_sgtable(pfdev->dev, &bo->sgts[i], + DMA_BIDIRECTIONAL, 0); + sg_free_table(&bo->sgts[i]); + } + } + kvfree(bo->sgts); + } + + drm_gem_shmem_free(&bo->base); +} + +struct panfrost_gem_mapping * +panfrost_gem_mapping_get(struct panfrost_gem_object *bo, + struct panfrost_file_priv *priv) +{ + struct panfrost_gem_mapping *iter, *mapping = NULL; + + mutex_lock(&bo->mappings.lock); + list_for_each_entry(iter, &bo->mappings.list, node) { + if (iter->mmu == priv->mmu) { + kref_get(&iter->refcount); + mapping = iter; + break; + } + } + mutex_unlock(&bo->mappings.lock); + + return mapping; +} + +static void +panfrost_gem_teardown_mapping(struct panfrost_gem_mapping *mapping) +{ + if (mapping->active) + panfrost_mmu_unmap(mapping); + + spin_lock(&mapping->mmu->mm_lock); + if (drm_mm_node_allocated(&mapping->mmnode)) + drm_mm_remove_node(&mapping->mmnode); + spin_unlock(&mapping->mmu->mm_lock); +} + +static void panfrost_gem_mapping_release(struct kref *kref) +{ + struct panfrost_gem_mapping *mapping; + + mapping = container_of(kref, struct panfrost_gem_mapping, refcount); + + panfrost_gem_teardown_mapping(mapping); + drm_gem_object_put(&mapping->obj->base.base); + panfrost_mmu_ctx_put(mapping->mmu); + kfree(mapping); +} + +void panfrost_gem_mapping_put(struct panfrost_gem_mapping *mapping) +{ + if (!mapping) + return; + + kref_put(&mapping->refcount, panfrost_gem_mapping_release); +} + +void panfrost_gem_teardown_mappings_locked(struct panfrost_gem_object *bo) +{ + struct panfrost_gem_mapping *mapping; + + list_for_each_entry(mapping, &bo->mappings.list, node) + panfrost_gem_teardown_mapping(mapping); +} + +int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv) +{ + int ret; + size_t size = obj->size; + u64 align; + struct panfrost_gem_object *bo = to_panfrost_bo(obj); + unsigned long color = bo->noexec ? PANFROST_BO_NOEXEC : 0; + struct panfrost_file_priv *priv = file_priv->driver_priv; + struct panfrost_gem_mapping *mapping; + + mapping = kzalloc(sizeof(*mapping), GFP_KERNEL); + if (!mapping) + return -ENOMEM; + + INIT_LIST_HEAD(&mapping->node); + kref_init(&mapping->refcount); + drm_gem_object_get(obj); + mapping->obj = bo; + + /* + * Executable buffers cannot cross a 16MB boundary as the program + * counter is 24-bits. We assume executable buffers will be less than + * 16MB and aligning executable buffers to their size will avoid + * crossing a 16MB boundary. + */ + if (!bo->noexec) + align = size >> PAGE_SHIFT; + else + align = size >= SZ_2M ? SZ_2M >> PAGE_SHIFT : 0; + + mapping->mmu = panfrost_mmu_ctx_get(priv->mmu); + spin_lock(&mapping->mmu->mm_lock); + ret = drm_mm_insert_node_generic(&mapping->mmu->mm, &mapping->mmnode, + size >> PAGE_SHIFT, align, color, 0); + spin_unlock(&mapping->mmu->mm_lock); + if (ret) + goto err; + + if (!bo->is_heap) { + ret = panfrost_mmu_map(mapping); + if (ret) + goto err; + } + + mutex_lock(&bo->mappings.lock); + WARN_ON(bo->base.madv != PANFROST_MADV_WILLNEED); + list_add_tail(&mapping->node, &bo->mappings.list); + mutex_unlock(&bo->mappings.lock); + +err: + if (ret) + panfrost_gem_mapping_put(mapping); + return ret; +} + +void panfrost_gem_close(struct drm_gem_object *obj, struct drm_file *file_priv) +{ + struct panfrost_file_priv *priv = file_priv->driver_priv; + struct panfrost_gem_object *bo = to_panfrost_bo(obj); + struct panfrost_gem_mapping *mapping = NULL, *iter; + + mutex_lock(&bo->mappings.lock); + list_for_each_entry(iter, &bo->mappings.list, node) { + if (iter->mmu == priv->mmu) { + mapping = iter; + list_del(&iter->node); + break; + } + } + mutex_unlock(&bo->mappings.lock); + + panfrost_gem_mapping_put(mapping); +} + +static int panfrost_gem_pin(struct drm_gem_object *obj) +{ + struct panfrost_gem_object *bo = to_panfrost_bo(obj); + + if (bo->is_heap) + return -EINVAL; + + return drm_gem_shmem_pin(&bo->base); } static const struct drm_gem_object_funcs panfrost_gem_funcs = { .free = panfrost_gem_free_object, - .print_info = drm_gem_shmem_print_info, - .pin = drm_gem_shmem_pin, - .unpin = drm_gem_shmem_unpin, - .get_sg_table = drm_gem_shmem_get_sg_table, - .vmap = drm_gem_shmem_vmap, - .vunmap = drm_gem_shmem_vunmap, + .open = panfrost_gem_open, + .close = panfrost_gem_close, + .print_info = drm_gem_shmem_object_print_info, + .pin = panfrost_gem_pin, + .unpin = drm_gem_shmem_object_unpin, + .get_sg_table = drm_gem_shmem_object_get_sg_table, + .vmap = drm_gem_shmem_object_vmap, + .vunmap = drm_gem_shmem_object_vunmap, + .mmap = drm_gem_shmem_object_mmap, .vm_ops = &drm_gem_shmem_vm_ops, }; @@ -50,32 +219,54 @@ static const struct drm_gem_object_funcs panfrost_gem_funcs = { */ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t size) { - int ret; struct panfrost_device *pfdev = dev->dev_private; struct panfrost_gem_object *obj; - u64 align; obj = kzalloc(sizeof(*obj), GFP_KERNEL); if (!obj) - return NULL; + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&obj->mappings.list); + mutex_init(&obj->mappings.lock); obj->base.base.funcs = &panfrost_gem_funcs; + obj->base.map_wc = !pfdev->coherent; - size = roundup(size, PAGE_SIZE); - align = size >= SZ_2M ? SZ_2M >> PAGE_SHIFT : 0; + return &obj->base.base; +} - spin_lock(&pfdev->mm_lock); - ret = drm_mm_insert_node_generic(&pfdev->mm, &obj->node, - size >> PAGE_SHIFT, align, 0, 0); - spin_unlock(&pfdev->mm_lock); - if (ret) - goto free_obj; +struct panfrost_gem_object * +panfrost_gem_create_with_handle(struct drm_file *file_priv, + struct drm_device *dev, size_t size, + u32 flags, + uint32_t *handle) +{ + int ret; + struct drm_gem_shmem_object *shmem; + struct panfrost_gem_object *bo; - return &obj->base.base; + /* Round up heap allocations to 2MB to keep fault handling simple */ + if (flags & PANFROST_BO_HEAP) + size = roundup(size, SZ_2M); + + shmem = drm_gem_shmem_create(dev, size); + if (IS_ERR(shmem)) + return ERR_CAST(shmem); + + bo = to_panfrost_bo(&shmem->base); + bo->noexec = !!(flags & PANFROST_BO_NOEXEC); + bo->is_heap = !!(flags & PANFROST_BO_HEAP); -free_obj: - kfree(obj); - return ERR_PTR(ret); + /* + * Allocate an id of idr table where the obj is registered + * and handle has the id what user can see. + */ + ret = drm_gem_handle_create(file_priv, &shmem->base, handle); + /* drop reference from allocate - handle holds it now. */ + drm_gem_object_put(&shmem->base); + if (ret) + return ERR_PTR(ret); + + return bo; } struct drm_gem_object * @@ -84,17 +275,14 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev, struct sg_table *sgt) { struct drm_gem_object *obj; - struct panfrost_gem_object *pobj; + struct panfrost_gem_object *bo; obj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt); if (IS_ERR(obj)) return ERR_CAST(obj); - pobj = to_panfrost_bo(obj); - - obj->resv = attach->dmabuf->resv; - - panfrost_mmu_map(pobj); + bo = to_panfrost_bo(obj); + bo->noexec = true; return obj; } diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index 6dbcaba020fc..8088d5fd8480 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -7,11 +7,46 @@ #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_mm.h> +struct panfrost_mmu; + struct panfrost_gem_object { struct drm_gem_shmem_object base; + struct sg_table *sgts; + + /* + * Use a list for now. If searching a mapping ever becomes the + * bottleneck, we should consider using an RB-tree, or even better, + * let the core store drm_gem_object_mapping entries (where we + * could place driver specific data) instead of drm_gem_object ones + * in its drm_file->object_idr table. + * + * struct drm_gem_object_mapping { + * struct drm_gem_object *obj; + * void *driver_priv; + * }; + */ + struct { + struct list_head list; + struct mutex lock; + } mappings; + + /* + * Count the number of jobs referencing this BO so we don't let the + * shrinker reclaim this object prematurely. + */ + atomic_t gpu_usecount; - struct drm_mm_node node; - bool is_mapped; + bool noexec :1; + bool is_heap :1; +}; + +struct panfrost_gem_mapping { + struct list_head node; + struct kref refcount; + struct panfrost_gem_object *obj; + struct drm_mm_node mmnode; + struct panfrost_mmu *mmu; + bool active :1; }; static inline @@ -20,6 +55,12 @@ struct panfrost_gem_object *to_panfrost_bo(struct drm_gem_object *obj) return container_of(to_drm_gem_shmem_obj(obj), struct panfrost_gem_object, base); } +static inline struct panfrost_gem_mapping * +drm_mm_node_to_panfrost_mapping(struct drm_mm_node *node) +{ + return container_of(node, struct panfrost_gem_mapping, mmnode); +} + struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t size); struct drm_gem_object * @@ -27,4 +68,23 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); +struct panfrost_gem_object * +panfrost_gem_create_with_handle(struct drm_file *file_priv, + struct drm_device *dev, size_t size, + u32 flags, + uint32_t *handle); + +int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv); +void panfrost_gem_close(struct drm_gem_object *obj, + struct drm_file *file_priv); + +struct panfrost_gem_mapping * +panfrost_gem_mapping_get(struct panfrost_gem_object *bo, + struct panfrost_file_priv *priv); +void panfrost_gem_mapping_put(struct panfrost_gem_mapping *mapping); +void panfrost_gem_teardown_mappings_locked(struct panfrost_gem_object *bo); + +void panfrost_gem_shrinker_init(struct drm_device *dev); +void panfrost_gem_shrinker_cleanup(struct drm_device *dev); + #endif /* __PANFROST_GEM_H__ */ diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c new file mode 100644 index 000000000000..77e7cb6d1ae3 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019 Arm Ltd. + * + * Based on msm_gem_freedreno.c: + * Copyright (C) 2016 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + */ + +#include <linux/list.h> + +#include <drm/drm_device.h> +#include <drm/drm_gem_shmem_helper.h> + +#include "panfrost_device.h" +#include "panfrost_gem.h" +#include "panfrost_mmu.h" + +static unsigned long +panfrost_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) +{ + struct panfrost_device *pfdev = + container_of(shrinker, struct panfrost_device, shrinker); + struct drm_gem_shmem_object *shmem; + unsigned long count = 0; + + if (!mutex_trylock(&pfdev->shrinker_lock)) + return 0; + + list_for_each_entry(shmem, &pfdev->shrinker_list, madv_list) { + if (drm_gem_shmem_is_purgeable(shmem)) + count += shmem->base.size >> PAGE_SHIFT; + } + + mutex_unlock(&pfdev->shrinker_lock); + + return count; +} + +static bool panfrost_gem_purge(struct drm_gem_object *obj) +{ + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + struct panfrost_gem_object *bo = to_panfrost_bo(obj); + bool ret = false; + + if (atomic_read(&bo->gpu_usecount)) + return false; + + if (!mutex_trylock(&bo->mappings.lock)) + return false; + + if (!mutex_trylock(&shmem->pages_lock)) + goto unlock_mappings; + + panfrost_gem_teardown_mappings_locked(bo); + drm_gem_shmem_purge_locked(&bo->base); + ret = true; + + mutex_unlock(&shmem->pages_lock); + +unlock_mappings: + mutex_unlock(&bo->mappings.lock); + return ret; +} + +static unsigned long +panfrost_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) +{ + struct panfrost_device *pfdev = + container_of(shrinker, struct panfrost_device, shrinker); + struct drm_gem_shmem_object *shmem, *tmp; + unsigned long freed = 0; + + if (!mutex_trylock(&pfdev->shrinker_lock)) + return SHRINK_STOP; + + list_for_each_entry_safe(shmem, tmp, &pfdev->shrinker_list, madv_list) { + if (freed >= sc->nr_to_scan) + break; + if (drm_gem_shmem_is_purgeable(shmem) && + panfrost_gem_purge(&shmem->base)) { + freed += shmem->base.size >> PAGE_SHIFT; + list_del_init(&shmem->madv_list); + } + } + + mutex_unlock(&pfdev->shrinker_lock); + + if (freed > 0) + pr_info_ratelimited("Purging %lu bytes\n", freed << PAGE_SHIFT); + + return freed; +} + +/** + * panfrost_gem_shrinker_init - Initialize panfrost shrinker + * @dev: DRM device + * + * This function registers and sets up the panfrost shrinker. + */ +void panfrost_gem_shrinker_init(struct drm_device *dev) +{ + struct panfrost_device *pfdev = dev->dev_private; + pfdev->shrinker.count_objects = panfrost_gem_shrinker_count; + pfdev->shrinker.scan_objects = panfrost_gem_shrinker_scan; + pfdev->shrinker.seeks = DEFAULT_SEEKS; + WARN_ON(register_shrinker(&pfdev->shrinker)); +} + +/** + * panfrost_gem_shrinker_cleanup - Clean up panfrost shrinker + * @dev: DRM device + * + * This function unregisters the panfrost shrinker. + */ +void panfrost_gem_shrinker_cleanup(struct drm_device *dev) +{ + struct panfrost_device *pfdev = dev->dev_private; + + if (pfdev->shrinker.nr_deferred) { + unregister_shrinker(&pfdev->shrinker); + } +} diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index 20ab333fc925..aa89926742fd 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -10,6 +10,7 @@ #include <linux/io.h> #include <linux/iopoll.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include "panfrost_device.h" #include "panfrost_features.h" @@ -32,7 +33,7 @@ static irqreturn_t panfrost_gpu_irq_handler(int irq, void *data) address |= gpu_read(pfdev, GPU_FAULT_ADDRESS_LO); dev_warn(pfdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx\n", - fault_status & 0xFF, panfrost_exception_name(pfdev, fault_status), + fault_status, panfrost_exception_name(fault_status & 0xFF), address); if (state & GPU_IRQ_MULTIPLE_FAULT) @@ -75,6 +76,17 @@ int panfrost_gpu_soft_reset(struct panfrost_device *pfdev) return 0; } +void panfrost_gpu_amlogic_quirk(struct panfrost_device *pfdev) +{ + /* + * The Amlogic integrated Mali-T820, Mali-G31 & Mali-G52 needs + * these undocumented bits in GPU_PWR_OVERRIDE1 to be set in order + * to operate correctly. + */ + gpu_write(pfdev, GPU_PWR_KEY, GPU_PWR_KEY_UNLOCK); + gpu_write(pfdev, GPU_PWR_OVERRIDE1, 0xfff | (0x20 << 16)); +} + static void panfrost_gpu_init_quirks(struct panfrost_device *pfdev) { u32 quirks = 0; @@ -133,8 +145,15 @@ static void panfrost_gpu_init_quirks(struct panfrost_device *pfdev) quirks |= (COHERENCY_ACE_LITE | COHERENCY_ACE) << JM_FORCE_COHERENCY_FEATURES_SHIFT; + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_IDVS_GROUP_SIZE)) + quirks |= JM_DEFAULT_IDVS_GROUP_SIZE << JM_IDVS_GROUP_SIZE_SHIFT; + if (quirks) gpu_write(pfdev, GPU_JM_CONFIG, quirks); + + /* Here goes platform specific quirks */ + if (pfdev->comp->vendor_quirk) + pfdev->comp->vendor_quirk(pfdev); } #define MAX_HW_REVS 6 @@ -208,7 +227,11 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev) pfdev->features.mem_features = gpu_read(pfdev, GPU_MEM_FEATURES); pfdev->features.mmu_features = gpu_read(pfdev, GPU_MMU_FEATURES); pfdev->features.thread_features = gpu_read(pfdev, GPU_THREAD_FEATURES); + pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS); + pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE); + pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE); pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES); + pfdev->features.afbc_features = gpu_read(pfdev, GPU_AFBC_FEATURES); for (i = 0; i < 4; i++) pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i)); @@ -232,6 +255,8 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev) pfdev->features.stack_present = gpu_read(pfdev, GPU_STACK_PRESENT_LO); pfdev->features.stack_present |= (u64)gpu_read(pfdev, GPU_STACK_PRESENT_HI) << 32; + pfdev->features.thread_tls_alloc = gpu_read(pfdev, GPU_THREAD_TLS_ALLOC); + gpu_id = gpu_read(pfdev, GPU_ID); pfdev->features.revision = gpu_id & 0xffff; pfdev->features.id = gpu_id >> 16; @@ -298,33 +323,50 @@ void panfrost_gpu_power_on(struct panfrost_device *pfdev) { int ret; u32 val; + u64 core_mask = U64_MAX; - /* Just turn on everything for now */ - gpu_write(pfdev, L2_PWRON_LO, pfdev->features.l2_present); - ret = readl_relaxed_poll_timeout(pfdev->iomem + L2_READY_LO, - val, val == pfdev->features.l2_present, 100, 1000); + panfrost_gpu_init_quirks(pfdev); - gpu_write(pfdev, STACK_PWRON_LO, pfdev->features.stack_present); - ret |= readl_relaxed_poll_timeout(pfdev->iomem + STACK_READY_LO, - val, val == pfdev->features.stack_present, 100, 1000); + if (pfdev->features.l2_present != 1) { + /* + * Only support one core group now. + * ~(l2_present - 1) unsets all bits in l2_present except + * the bottom bit. (l2_present - 2) has all the bits in + * the first core group set. AND them together to generate + * a mask of cores in the first core group. + */ + core_mask = ~(pfdev->features.l2_present - 1) & + (pfdev->features.l2_present - 2); + dev_info_once(pfdev->dev, "using only 1st core group (%lu cores from %lu)\n", + hweight64(core_mask), + hweight64(pfdev->features.shader_present)); + } + gpu_write(pfdev, L2_PWRON_LO, pfdev->features.l2_present & core_mask); + ret = readl_relaxed_poll_timeout(pfdev->iomem + L2_READY_LO, + val, val == (pfdev->features.l2_present & core_mask), + 100, 20000); + if (ret) + dev_err(pfdev->dev, "error powering up gpu L2"); - gpu_write(pfdev, SHADER_PWRON_LO, pfdev->features.shader_present); - ret |= readl_relaxed_poll_timeout(pfdev->iomem + SHADER_READY_LO, - val, val == pfdev->features.shader_present, 100, 1000); + gpu_write(pfdev, SHADER_PWRON_LO, + pfdev->features.shader_present & core_mask); + ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_READY_LO, + val, val == (pfdev->features.shader_present & core_mask), + 100, 20000); + if (ret) + dev_err(pfdev->dev, "error powering up gpu shader"); gpu_write(pfdev, TILER_PWRON_LO, pfdev->features.tiler_present); - ret |= readl_relaxed_poll_timeout(pfdev->iomem + TILER_READY_LO, + ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_READY_LO, val, val == pfdev->features.tiler_present, 100, 1000); - if (ret) - dev_err(pfdev->dev, "error powering up gpu"); + dev_err(pfdev->dev, "error powering up gpu tiler"); } void panfrost_gpu_power_off(struct panfrost_device *pfdev) { gpu_write(pfdev, TILER_PWROFF_LO, 0); gpu_write(pfdev, SHADER_PWROFF_LO, 0); - gpu_write(pfdev, STACK_PWROFF_LO, 0); gpu_write(pfdev, L2_PWROFF_LO, 0); } @@ -338,21 +380,24 @@ int panfrost_gpu_init(struct panfrost_device *pfdev) panfrost_gpu_init_features(pfdev); - dma_set_mask_and_coherent(pfdev->dev, + err = dma_set_mask_and_coherent(pfdev->dev, DMA_BIT_MASK(FIELD_GET(0xff00, pfdev->features.mmu_features))); + if (err) + return err; + + dma_set_max_seg_size(pfdev->dev, UINT_MAX); irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "gpu"); if (irq <= 0) return -ENODEV; err = devm_request_irq(pfdev->dev, irq, panfrost_gpu_irq_handler, - IRQF_SHARED, "gpu", pfdev); + IRQF_SHARED, KBUILD_MODNAME "-gpu", pfdev); if (err) { dev_err(pfdev->dev, "failed to request gpu irq"); return err; } - panfrost_gpu_init_quirks(pfdev); panfrost_gpu_power_on(pfdev); return 0; @@ -365,7 +410,16 @@ void panfrost_gpu_fini(struct panfrost_device *pfdev) u32 panfrost_gpu_get_latest_flush_id(struct panfrost_device *pfdev) { - if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) - return gpu_read(pfdev, GPU_LATEST_FLUSH_ID); + u32 flush_id; + + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) { + /* Flush reduction only makes sense when the GPU is kept powered on between jobs */ + if (pm_runtime_get_if_in_use(pfdev->dev)) { + flush_id = gpu_read(pfdev, GPU_LATEST_FLUSH_ID); + pm_runtime_put(pfdev->dev); + return flush_id; + } + } + return 0; } diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.h b/drivers/gpu/drm/panfrost/panfrost_gpu.h index 4112412087b2..468c51e7e46d 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.h +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.h @@ -16,4 +16,6 @@ int panfrost_gpu_soft_reset(struct panfrost_device *pfdev); void panfrost_gpu_power_on(struct panfrost_device *pfdev); void panfrost_gpu_power_off(struct panfrost_device *pfdev); +void panfrost_gpu_amlogic_quirk(struct panfrost_device *pfdev); + #endif diff --git a/drivers/gpu/drm/panfrost/panfrost_issues.h b/drivers/gpu/drm/panfrost/panfrost_issues.h index cec6dcdadb5c..501a76c5e95f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_issues.h +++ b/drivers/gpu/drm/panfrost/panfrost_issues.h @@ -13,37 +13,118 @@ * to care about. */ enum panfrost_hw_issue { + /* Need way to guarantee that all previously-translated memory accesses + * are committed */ HW_ISSUE_6367, + + /* On job complete with non-done the cache is not flushed */ HW_ISSUE_6787, + + /* Write of PRFCNT_CONFIG_MODE_MANUAL to PRFCNT_CONFIG causes a + * instrumentation dump if PRFCNT_TILER_EN is enabled */ HW_ISSUE_8186, + + /* TIB: Reports faults from a vtile which has not yet been allocated */ HW_ISSUE_8245, + + /* uTLB deadlock could occur when writing to an invalid page at the + * same time as access to a valid page in the same uTLB cache line ( == + * 4 PTEs == 16K block of mapping) */ HW_ISSUE_8316, + + /* HT: TERMINATE for RUN command ignored if previous LOAD_DESCRIPTOR is + * still executing */ HW_ISSUE_8394, + + /* CSE: Sends a TERMINATED response for a task that should not be + * terminated */ HW_ISSUE_8401, + + /* Repeatedly Soft-stopping a job chain consisting of (Vertex Shader, + * Cache Flush, Tiler) jobs causes DATA_INVALID_FAULT on tiler job. */ HW_ISSUE_8408, + + /* Disable the Pause Buffer in the LS pipe. */ HW_ISSUE_8443, + + /* Change in RMUs in use causes problems related with the core's SDC */ HW_ISSUE_8987, + + /* Compute endpoint has a 4-deep queue of tasks, meaning a soft stop + * won't complete until all 4 tasks have completed */ HW_ISSUE_9435, + + /* HT: Tiler returns TERMINATED for non-terminated command */ HW_ISSUE_9510, + + /* Occasionally the GPU will issue multiple page faults for the same + * address before the MMU page table has been read by the GPU */ HW_ISSUE_9630, + + /* RA DCD load request to SDC returns invalid load ignore causing + * colour buffer mismatch */ HW_ISSUE_10327, + + /* MMU TLB invalidation hazards */ HW_ISSUE_10649, + + /* Missing cache flush in multi core-group configuration */ HW_ISSUE_10676, + + /* Chicken bit on T72X for a hardware workaround in compiler */ HW_ISSUE_10797, + + /* Soft-stopping fragment jobs might fail with TILE_RANGE_FAULT */ HW_ISSUE_10817, + + /* Intermittent missing interrupt on job completion */ HW_ISSUE_10883, + + /* Soft-stopping fragment jobs might fail with TILE_RANGE_ERROR + * (similar to issue 10817) and can use #10817 workaround */ HW_ISSUE_10959, + + /* Soft-stopped fragment shader job can restart with out-of-bound + * restart index */ HW_ISSUE_10969, + + /* Race condition can cause tile list corruption */ HW_ISSUE_11020, + + /* Write buffer can cause tile list corruption */ HW_ISSUE_11024, + + /* Pause buffer can cause a fragment job hang */ HW_ISSUE_11035, + + /* Dynamic Core Scaling not supported due to errata */ HW_ISSUE_11056, + + /* Clear encoder state for a hard stopped fragment job which is AFBC + * encoded by soft resetting the GPU. Only for T76X r0p0, r0p1 and + * r0p1_50rel0 */ HW_ISSUE_T76X_3542, + + /* Keep tiler module clock on to prevent GPU stall */ HW_ISSUE_T76X_3953, + + /* Must ensure L2 is not transitioning when we reset. Workaround with a + * busy wait until L2 completes transition; ensure there is a maximum + * loop count as she may never complete her transition. (On chips + * without this errata, it's totally okay if L2 transitions.) */ HW_ISSUE_TMIX_8463, + + /* Don't set SC_LS_ATTR_CHECK_DISABLE/SC_LS_ALLOW_ATTR_TYPES */ GPUCORE_1619, + + /* When a hard-stop follows close after a soft-stop, the completion + * code for the terminated job may be incorrectly set to STOPPED */ HW_ISSUE_TMIX_8438, + + /* "Protected mode" is buggy on Mali-G31 some Bifrost chips, so the + * kernel must fiddle with L2 caches to prevent data leakage */ HW_ISSUE_TGOX_R1_1234, + HW_ISSUE_END }; diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 9bb9260d9181..7c4208476fbd 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -4,9 +4,10 @@ #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/io.h> +#include <linux/iopoll.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> -#include <linux/reservation.h> +#include <linux/dma-resv.h> #include <drm/gpu_scheduler.h> #include <drm/panfrost_drm.h> @@ -20,12 +21,13 @@ #include "panfrost_gpu.h" #include "panfrost_mmu.h" +#define JOB_TIMEOUT_MS 500 + #define job_write(dev, reg, data) writel(data, dev->iomem + (reg)) #define job_read(dev, reg) readl(dev->iomem + (reg)) struct panfrost_queue_state { struct drm_gpu_scheduler sched; - u64 fence_context; u64 emit_seqno; }; @@ -33,6 +35,7 @@ struct panfrost_queue_state { struct panfrost_job_slot { struct panfrost_queue_state queue[NUM_JOB_SLOTS]; spinlock_t job_lock; + int irq; }; static struct panfrost_job * @@ -99,7 +102,7 @@ static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, in return &fence->base; } -static int panfrost_job_get_slot(struct panfrost_job *job) +int panfrost_job_get_slot(struct panfrost_job *job) { /* JS0: fragment jobs. * JS1: vertex/tiler jobs @@ -134,39 +137,83 @@ static void panfrost_job_write_affinity(struct panfrost_device *pfdev, */ affinity = pfdev->features.shader_present; - job_write(pfdev, JS_AFFINITY_NEXT_LO(js), affinity & 0xFFFFFFFF); - job_write(pfdev, JS_AFFINITY_NEXT_HI(js), affinity >> 32); + job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity)); + job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity)); +} + +static u32 +panfrost_get_job_chain_flag(const struct panfrost_job *job) +{ + struct panfrost_fence *f = to_panfrost_fence(job->done_fence); + + if (!panfrost_has_hw_feature(job->pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) + return 0; + + return (f->seqno & 1) ? JS_CONFIG_JOB_CHAIN_FLAG : 0; +} + +static struct panfrost_job * +panfrost_dequeue_job(struct panfrost_device *pfdev, int slot) +{ + struct panfrost_job *job = pfdev->jobs[slot][0]; + + WARN_ON(!job); + pfdev->jobs[slot][0] = pfdev->jobs[slot][1]; + pfdev->jobs[slot][1] = NULL; + + return job; +} + +static unsigned int +panfrost_enqueue_job(struct panfrost_device *pfdev, int slot, + struct panfrost_job *job) +{ + if (WARN_ON(!job)) + return 0; + + if (!pfdev->jobs[slot][0]) { + pfdev->jobs[slot][0] = job; + return 0; + } + + WARN_ON(pfdev->jobs[slot][1]); + pfdev->jobs[slot][1] = job; + WARN_ON(panfrost_get_job_chain_flag(job) == + panfrost_get_job_chain_flag(pfdev->jobs[slot][0])); + return 1; } static void panfrost_job_hw_submit(struct panfrost_job *job, int js) { struct panfrost_device *pfdev = job->pfdev; - unsigned long flags; + unsigned int subslot; u32 cfg; u64 jc_head = job->jc; int ret; + panfrost_devfreq_record_busy(&pfdev->pfdevfreq); + ret = pm_runtime_get_sync(pfdev->dev); if (ret < 0) return; - if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) - goto end; + if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) { + return; + } - panfrost_devfreq_record_transition(pfdev, js); - spin_lock_irqsave(&pfdev->hwaccess_lock, flags); + cfg = panfrost_mmu_as_get(pfdev, job->mmu); - job_write(pfdev, JS_HEAD_NEXT_LO(js), jc_head & 0xFFFFFFFF); - job_write(pfdev, JS_HEAD_NEXT_HI(js), jc_head >> 32); + job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head)); + job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head)); panfrost_job_write_affinity(pfdev, job->requirements, js); /* start MMU, medium priority, cache clean/flush on end, clean/flush on * start */ - /* TODO: different address spaces */ - cfg = JS_CONFIG_THREAD_PRI(8) | + cfg |= JS_CONFIG_THREAD_PRI(8) | JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE | - JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; + JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE | + panfrost_get_job_chain_flag(job); if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; @@ -180,26 +227,38 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js) job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id); /* GO ! */ - dev_dbg(pfdev->dev, "JS: Submitting atom %p to js[%d] with head=0x%llx", - job, js, jc_head); - - job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START); - - spin_unlock_irqrestore(&pfdev->hwaccess_lock, flags); -end: - pm_runtime_mark_last_busy(pfdev->dev); - pm_runtime_put_autosuspend(pfdev->dev); + spin_lock(&pfdev->js->job_lock); + subslot = panfrost_enqueue_job(pfdev, js, job); + /* Don't queue the job if a reset is in progress */ + if (!atomic_read(&pfdev->reset.pending)) { + job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START); + dev_dbg(pfdev->dev, + "JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d", + job, js, subslot, jc_head, cfg & 0xf); + } + spin_unlock(&pfdev->js->job_lock); } -static void panfrost_acquire_object_fences(struct drm_gem_object **bos, - int bo_count, - struct dma_fence **implicit_fences) +static int panfrost_acquire_object_fences(struct drm_gem_object **bos, + int bo_count, + struct drm_sched_job *job) { - int i; + int i, ret; - for (i = 0; i < bo_count; i++) - implicit_fences[i] = reservation_object_get_excl_rcu(bos[i]->resv); + for (i = 0; i < bo_count; i++) { + ret = dma_resv_reserve_fences(bos[i]->resv, 1); + if (ret) + return ret; + + /* panfrost always uses write mode in its current uapi */ + ret = drm_sched_job_add_implicit_dependencies(job, bos[i], + true); + if (ret) + return ret; + } + + return 0; } static void panfrost_attach_object_fences(struct drm_gem_object **bos, @@ -209,40 +268,35 @@ static void panfrost_attach_object_fences(struct drm_gem_object **bos, int i; for (i = 0; i < bo_count; i++) - reservation_object_add_excl_fence(bos[i]->resv, fence); + dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); } int panfrost_job_push(struct panfrost_job *job) { struct panfrost_device *pfdev = job->pfdev; - int slot = panfrost_job_get_slot(job); - struct drm_sched_entity *entity = &job->file_priv->sched_entity[slot]; struct ww_acquire_ctx acquire_ctx; int ret = 0; - mutex_lock(&pfdev->sched_lock); - ret = drm_gem_lock_reservations(job->bos, job->bo_count, &acquire_ctx); - if (ret) { - mutex_unlock(&pfdev->sched_lock); + if (ret) return ret; - } - ret = drm_sched_job_init(&job->base, entity, NULL); + mutex_lock(&pfdev->sched_lock); + drm_sched_job_arm(&job->base); + + job->render_done_fence = dma_fence_get(&job->base.s_fence->finished); + + ret = panfrost_acquire_object_fences(job->bos, job->bo_count, + &job->base); if (ret) { mutex_unlock(&pfdev->sched_lock); goto unlock; } - job->render_done_fence = dma_fence_get(&job->base.s_fence->finished); - kref_get(&job->refcount); /* put by scheduler job completion */ - panfrost_acquire_object_fences(job->bos, job->bo_count, - job->implicit_fences); - - drm_sched_entity_push_job(&job->base, entity); + drm_sched_entity_push_job(&job->base); mutex_unlock(&pfdev->sched_lock); @@ -261,22 +315,24 @@ static void panfrost_job_cleanup(struct kref *ref) refcount); unsigned int i; - if (job->in_fences) { - for (i = 0; i < job->in_fence_count; i++) - dma_fence_put(job->in_fences[i]); - kvfree(job->in_fences); - } - if (job->implicit_fences) { - for (i = 0; i < job->bo_count; i++) - dma_fence_put(job->implicit_fences[i]); - kvfree(job->implicit_fences); - } dma_fence_put(job->done_fence); dma_fence_put(job->render_done_fence); + if (job->mappings) { + for (i = 0; i < job->bo_count; i++) { + if (!job->mappings[i]) + break; + + atomic_dec(&job->mappings[i]->obj->gpu_usecount); + panfrost_gem_mapping_put(job->mappings[i]); + } + kvfree(job->mappings); + } + if (job->bos) { for (i = 0; i < job->bo_count; i++) - drm_gem_object_put_unlocked(job->bos[i]); + drm_gem_object_put(job->bos[i]); + kvfree(job->bos); } @@ -297,34 +353,6 @@ static void panfrost_job_free(struct drm_sched_job *sched_job) panfrost_job_put(job); } -static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job, - struct drm_sched_entity *s_entity) -{ - struct panfrost_job *job = to_panfrost_job(sched_job); - struct dma_fence *fence; - unsigned int i; - - /* Explicit fences */ - for (i = 0; i < job->in_fence_count; i++) { - if (job->in_fences[i]) { - fence = job->in_fences[i]; - job->in_fences[i] = NULL; - return fence; - } - } - - /* Implicit fences, max. one per BO */ - for (i = 0; i < job->bo_count; i++) { - if (job->implicit_fences[i]) { - fence = job->implicit_fences[i]; - job->implicit_fences[i] = NULL; - return fence; - } - } - - return NULL; -} - static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job) { struct panfrost_job *job = to_panfrost_job(sched_job); @@ -335,11 +363,15 @@ static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job) if (unlikely(job->base.s_fence->finished.error)) return NULL; - pfdev->jobs[slot] = job; + /* Nothing to execute: can happen if the job has finished while + * we were resetting the GPU. + */ + if (!job->jc) + return NULL; fence = panfrost_fence_create(pfdev, slot); if (IS_ERR(fence)) - return NULL; + return fence; if (job->done_fence) dma_fence_put(job->done_fence); @@ -363,135 +395,428 @@ void panfrost_job_enable_interrupts(struct panfrost_device *pfdev) job_write(pfdev, JOB_INT_MASK, irq_mask); } -static void panfrost_job_timedout(struct drm_sched_job *sched_job) +static void panfrost_job_handle_err(struct panfrost_device *pfdev, + struct panfrost_job *job, + unsigned int js) +{ + u32 js_status = job_read(pfdev, JS_STATUS(js)); + const char *exception_name = panfrost_exception_name(js_status); + bool signal_fence = true; + + if (!panfrost_exception_is_fault(js_status)) { + dev_dbg(pfdev->dev, "js event, js=%d, status=%s, head=0x%x, tail=0x%x", + js, exception_name, + job_read(pfdev, JS_HEAD_LO(js)), + job_read(pfdev, JS_TAIL_LO(js))); + } else { + dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x", + js, exception_name, + job_read(pfdev, JS_HEAD_LO(js)), + job_read(pfdev, JS_TAIL_LO(js))); + } + + if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) { + /* Update the job head so we can resume */ + job->jc = job_read(pfdev, JS_TAIL_LO(js)) | + ((u64)job_read(pfdev, JS_TAIL_HI(js)) << 32); + + /* The job will be resumed, don't signal the fence */ + signal_fence = false; + } else if (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) { + /* Job has been hard-stopped, flag it as canceled */ + dma_fence_set_error(job->done_fence, -ECANCELED); + job->jc = 0; + } else if (panfrost_exception_is_fault(js_status)) { + /* We might want to provide finer-grained error code based on + * the exception type, but unconditionally setting to EINVAL + * is good enough for now. + */ + dma_fence_set_error(job->done_fence, -EINVAL); + job->jc = 0; + } + + panfrost_mmu_as_put(pfdev, job->mmu); + panfrost_devfreq_record_idle(&pfdev->pfdevfreq); + + if (signal_fence) + dma_fence_signal_locked(job->done_fence); + + pm_runtime_put_autosuspend(pfdev->dev); + + if (panfrost_exception_needs_reset(pfdev, js_status)) { + atomic_set(&pfdev->reset.pending, 1); + drm_sched_fault(&pfdev->js->queue[js].sched); + } +} + +static void panfrost_job_handle_done(struct panfrost_device *pfdev, + struct panfrost_job *job) +{ + /* Set ->jc to 0 to avoid re-submitting an already finished job (can + * happen when we receive the DONE interrupt while doing a GPU reset). + */ + job->jc = 0; + panfrost_mmu_as_put(pfdev, job->mmu); + panfrost_devfreq_record_idle(&pfdev->pfdevfreq); + + dma_fence_signal_locked(job->done_fence); + pm_runtime_put_autosuspend(pfdev->dev); +} + +static void panfrost_job_handle_irq(struct panfrost_device *pfdev, u32 status) +{ + struct panfrost_job *done[NUM_JOB_SLOTS][2] = {}; + struct panfrost_job *failed[NUM_JOB_SLOTS] = {}; + u32 js_state = 0, js_events = 0; + unsigned int i, j; + + /* First we collect all failed/done jobs. */ + while (status) { + u32 js_state_mask = 0; + + for (j = 0; j < NUM_JOB_SLOTS; j++) { + if (status & MK_JS_MASK(j)) + js_state_mask |= MK_JS_MASK(j); + + if (status & JOB_INT_MASK_DONE(j)) { + if (done[j][0]) + done[j][1] = panfrost_dequeue_job(pfdev, j); + else + done[j][0] = panfrost_dequeue_job(pfdev, j); + } + + if (status & JOB_INT_MASK_ERR(j)) { + /* Cancel the next submission. Will be submitted + * after we're done handling this failure if + * there's no reset pending. + */ + job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP); + failed[j] = panfrost_dequeue_job(pfdev, j); + } + } + + /* JS_STATE is sampled when JOB_INT_CLEAR is written. + * For each BIT(slot) or BIT(slot + 16) bit written to + * JOB_INT_CLEAR, the corresponding bits in JS_STATE + * (BIT(slot) and BIT(slot + 16)) are updated, but this + * is racy. If we only have one job done at the time we + * read JOB_INT_RAWSTAT but the second job fails before we + * clear the status, we end up with a status containing + * only the DONE bit and consider both jobs as DONE since + * JS_STATE reports both NEXT and CURRENT as inactive. + * To prevent that, let's repeat this clear+read steps + * until status is 0. + */ + job_write(pfdev, JOB_INT_CLEAR, status); + js_state &= ~js_state_mask; + js_state |= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask; + js_events |= status; + status = job_read(pfdev, JOB_INT_RAWSTAT); + } + + /* Then we handle the dequeued jobs. */ + for (j = 0; j < NUM_JOB_SLOTS; j++) { + if (!(js_events & MK_JS_MASK(j))) + continue; + + if (failed[j]) { + panfrost_job_handle_err(pfdev, failed[j], j); + } else if (pfdev->jobs[j][0] && !(js_state & MK_JS_MASK(j))) { + /* When the current job doesn't fail, the JM dequeues + * the next job without waiting for an ACK, this means + * we can have 2 jobs dequeued and only catch the + * interrupt when the second one is done. If both slots + * are inactive, but one job remains in pfdev->jobs[j], + * consider it done. Of course that doesn't apply if a + * failure happened since we cancelled execution of the + * job in _NEXT (see above). + */ + if (WARN_ON(!done[j][0])) + done[j][0] = panfrost_dequeue_job(pfdev, j); + else + done[j][1] = panfrost_dequeue_job(pfdev, j); + } + + for (i = 0; i < ARRAY_SIZE(done[0]) && done[j][i]; i++) + panfrost_job_handle_done(pfdev, done[j][i]); + } + + /* And finally we requeue jobs that were waiting in the second slot + * and have been stopped if we detected a failure on the first slot. + */ + for (j = 0; j < NUM_JOB_SLOTS; j++) { + if (!(js_events & MK_JS_MASK(j))) + continue; + + if (!failed[j] || !pfdev->jobs[j][0]) + continue; + + if (pfdev->jobs[j][0]->jc == 0) { + /* The job was cancelled, signal the fence now */ + struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, j); + + dma_fence_set_error(canceled->done_fence, -ECANCELED); + panfrost_job_handle_done(pfdev, canceled); + } else if (!atomic_read(&pfdev->reset.pending)) { + /* Requeue the job we removed if no reset is pending */ + job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START); + } + } +} + +static void panfrost_job_handle_irqs(struct panfrost_device *pfdev) +{ + u32 status = job_read(pfdev, JOB_INT_RAWSTAT); + + while (status) { + pm_runtime_mark_last_busy(pfdev->dev); + + spin_lock(&pfdev->js->job_lock); + panfrost_job_handle_irq(pfdev, status); + spin_unlock(&pfdev->js->job_lock); + status = job_read(pfdev, JOB_INT_RAWSTAT); + } +} + +static u32 panfrost_active_slots(struct panfrost_device *pfdev, + u32 *js_state_mask, u32 js_state) +{ + u32 rawstat; + + if (!(js_state & *js_state_mask)) + return 0; + + rawstat = job_read(pfdev, JOB_INT_RAWSTAT); + if (rawstat) { + unsigned int i; + + for (i = 0; i < NUM_JOB_SLOTS; i++) { + if (rawstat & MK_JS_MASK(i)) + *js_state_mask &= ~MK_JS_MASK(i); + } + } + + return js_state & *js_state_mask; +} + +static void +panfrost_reset(struct panfrost_device *pfdev, + struct drm_sched_job *bad) +{ + u32 js_state, js_state_mask = 0xffffffff; + unsigned int i, j; + bool cookie; + int ret; + + if (!atomic_read(&pfdev->reset.pending)) + return; + + /* Stop the schedulers. + * + * FIXME: We temporarily get out of the dma_fence_signalling section + * because the cleanup path generate lockdep splats when taking locks + * to release job resources. We should rework the code to follow this + * pattern: + * + * try_lock + * if (locked) + * release + * else + * schedule_work_to_release_later + */ + for (i = 0; i < NUM_JOB_SLOTS; i++) + drm_sched_stop(&pfdev->js->queue[i].sched, bad); + + cookie = dma_fence_begin_signalling(); + + if (bad) + drm_sched_increase_karma(bad); + + /* Mask job interrupts and synchronize to make sure we won't be + * interrupted during our reset. + */ + job_write(pfdev, JOB_INT_MASK, 0); + synchronize_irq(pfdev->js->irq); + + for (i = 0; i < NUM_JOB_SLOTS; i++) { + /* Cancel the next job and soft-stop the running job. */ + job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); + job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP); + } + + /* Wait at most 10ms for soft-stops to complete */ + ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state, + !panfrost_active_slots(pfdev, &js_state_mask, js_state), + 10, 10000); + + if (ret) + dev_err(pfdev->dev, "Soft-stop failed\n"); + + /* Handle the remaining interrupts before we reset. */ + panfrost_job_handle_irqs(pfdev); + + /* Remaining interrupts have been handled, but we might still have + * stuck jobs. Let's make sure the PM counters stay balanced by + * manually calling pm_runtime_put_noidle() and + * panfrost_devfreq_record_idle() for each stuck job. + */ + spin_lock(&pfdev->js->job_lock); + for (i = 0; i < NUM_JOB_SLOTS; i++) { + for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) { + pm_runtime_put_noidle(pfdev->dev); + panfrost_devfreq_record_idle(&pfdev->pfdevfreq); + } + } + memset(pfdev->jobs, 0, sizeof(pfdev->jobs)); + spin_unlock(&pfdev->js->job_lock); + + /* Proceed with reset now. */ + panfrost_device_reset(pfdev); + + /* panfrost_device_reset() unmasks job interrupts, but we want to + * keep them masked a bit longer. + */ + job_write(pfdev, JOB_INT_MASK, 0); + + /* GPU has been reset, we can clear the reset pending bit. */ + atomic_set(&pfdev->reset.pending, 0); + + /* Now resubmit jobs that were previously queued but didn't have a + * chance to finish. + * FIXME: We temporarily get out of the DMA fence signalling section + * while resubmitting jobs because the job submission logic will + * allocate memory with the GFP_KERNEL flag which can trigger memory + * reclaim and exposes a lock ordering issue. + */ + dma_fence_end_signalling(cookie); + for (i = 0; i < NUM_JOB_SLOTS; i++) + drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched); + cookie = dma_fence_begin_signalling(); + + /* Restart the schedulers */ + for (i = 0; i < NUM_JOB_SLOTS; i++) + drm_sched_start(&pfdev->js->queue[i].sched, true); + + /* Re-enable job interrupts now that everything has been restarted. */ + job_write(pfdev, JOB_INT_MASK, + GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | + GENMASK(NUM_JOB_SLOTS - 1, 0)); + + dma_fence_end_signalling(cookie); +} + +static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job + *sched_job) { struct panfrost_job *job = to_panfrost_job(sched_job); struct panfrost_device *pfdev = job->pfdev; int js = panfrost_job_get_slot(job); - int i; /* * If the GPU managed to complete this jobs fence, the timeout is * spurious. Bail out. */ if (dma_fence_is_signaled(job->done_fence)) - return; + return DRM_GPU_SCHED_STAT_NOMINAL; - dev_err(pfdev->dev, "gpu sched timeout, js=%d, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", + dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", js, + job_read(pfdev, JS_CONFIG(js)), job_read(pfdev, JS_STATUS(js)), job_read(pfdev, JS_HEAD_LO(js)), job_read(pfdev, JS_TAIL_LO(js)), sched_job); - mutex_lock(&pfdev->reset_lock); - - for (i = 0; i < NUM_JOB_SLOTS; i++) - drm_sched_stop(&pfdev->js->queue[i].sched, sched_job); - - if (sched_job) - drm_sched_increase_karma(sched_job); - - /* panfrost_core_dump(pfdev); */ - - panfrost_devfreq_record_transition(pfdev, js); - panfrost_gpu_soft_reset(pfdev); + atomic_set(&pfdev->reset.pending, 1); + panfrost_reset(pfdev, sched_job); - /* TODO: Re-enable all other address spaces */ - panfrost_mmu_enable(pfdev, 0); - panfrost_gpu_power_on(pfdev); - panfrost_job_enable_interrupts(pfdev); - - for (i = 0; i < NUM_JOB_SLOTS; i++) - drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched); + return DRM_GPU_SCHED_STAT_NOMINAL; +} - /* restart scheduler after GPU is usable again */ - for (i = 0; i < NUM_JOB_SLOTS; i++) - drm_sched_start(&pfdev->js->queue[i].sched, true); +static void panfrost_reset_work(struct work_struct *work) +{ + struct panfrost_device *pfdev; - mutex_unlock(&pfdev->reset_lock); + pfdev = container_of(work, struct panfrost_device, reset.work); + panfrost_reset(pfdev, NULL); } static const struct drm_sched_backend_ops panfrost_sched_ops = { - .dependency = panfrost_job_dependency, .run_job = panfrost_job_run, .timedout_job = panfrost_job_timedout, .free_job = panfrost_job_free }; +static irqreturn_t panfrost_job_irq_handler_thread(int irq, void *data) +{ + struct panfrost_device *pfdev = data; + + panfrost_job_handle_irqs(pfdev); + job_write(pfdev, JOB_INT_MASK, + GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | + GENMASK(NUM_JOB_SLOTS - 1, 0)); + return IRQ_HANDLED; +} + static irqreturn_t panfrost_job_irq_handler(int irq, void *data) { struct panfrost_device *pfdev = data; u32 status = job_read(pfdev, JOB_INT_STAT); - int j; - - dev_dbg(pfdev->dev, "jobslot irq status=%x\n", status); if (!status) return IRQ_NONE; - pm_runtime_mark_last_busy(pfdev->dev); - - for (j = 0; status; j++) { - u32 mask = MK_JS_MASK(j); - - if (!(status & mask)) - continue; - - job_write(pfdev, JOB_INT_CLEAR, mask); - - if (status & JOB_INT_MASK_ERR(j)) { - job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP); - - dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x", - j, - panfrost_exception_name(pfdev, job_read(pfdev, JS_STATUS(j))), - job_read(pfdev, JS_HEAD_LO(j)), - job_read(pfdev, JS_TAIL_LO(j))); - - drm_sched_fault(&pfdev->js->queue[j].sched); - } - - if (status & JOB_INT_MASK_DONE(j)) { - panfrost_devfreq_record_transition(pfdev, j); - dma_fence_signal(pfdev->jobs[j]->done_fence); - } - - status &= ~mask; - } - - return IRQ_HANDLED; + job_write(pfdev, JOB_INT_MASK, 0); + return IRQ_WAKE_THREAD; } int panfrost_job_init(struct panfrost_device *pfdev) { struct panfrost_job_slot *js; - int ret, j, irq; + unsigned int nentries = 2; + int ret, j; + + /* All GPUs have two entries per queue, but without jobchain + * disambiguation stopping the right job in the close path is tricky, + * so let's just advertise one entry in that case. + */ + if (!panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) + nentries = 1; pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL); if (!js) return -ENOMEM; + INIT_WORK(&pfdev->reset.work, panfrost_reset_work); spin_lock_init(&js->job_lock); - irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job"); - if (irq <= 0) + js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job"); + if (js->irq <= 0) return -ENODEV; - ret = devm_request_irq(pfdev->dev, irq, panfrost_job_irq_handler, - IRQF_SHARED, "job", pfdev); + ret = devm_request_threaded_irq(pfdev->dev, js->irq, + panfrost_job_irq_handler, + panfrost_job_irq_handler_thread, + IRQF_SHARED, KBUILD_MODNAME "-job", + pfdev); if (ret) { dev_err(pfdev->dev, "failed to request job irq"); return ret; } + pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", 0); + if (!pfdev->reset.wq) + return -ENOMEM; + for (j = 0; j < NUM_JOB_SLOTS; j++) { js->queue[j].fence_context = dma_fence_context_alloc(1); ret = drm_sched_init(&js->queue[j].sched, &panfrost_sched_ops, - 1, 0, msecs_to_jiffies(500), - "pan_js"); + nentries, 0, + msecs_to_jiffies(JOB_TIMEOUT_MS), + pfdev->reset.wq, + NULL, "pan_js", pfdev->dev); if (ret) { dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret); goto err_sched; @@ -506,6 +831,7 @@ err_sched: for (j--; j >= 0; j--) drm_sched_fini(&js->queue[j].sched); + destroy_workqueue(pfdev->reset.wq); return ret; } @@ -516,21 +842,26 @@ void panfrost_job_fini(struct panfrost_device *pfdev) job_write(pfdev, JOB_INT_MASK, 0); - for (j = 0; j < NUM_JOB_SLOTS; j++) + for (j = 0; j < NUM_JOB_SLOTS; j++) { drm_sched_fini(&js->queue[j].sched); + } + cancel_work_sync(&pfdev->reset.work); + destroy_workqueue(pfdev->reset.wq); } int panfrost_job_open(struct panfrost_file_priv *panfrost_priv) { struct panfrost_device *pfdev = panfrost_priv->pfdev; struct panfrost_job_slot *js = pfdev->js; - struct drm_sched_rq *rq; + struct drm_gpu_scheduler *sched; int ret, i; for (i = 0; i < NUM_JOB_SLOTS; i++) { - rq = &js->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i], &rq, 1, NULL); + sched = &js->queue[i].sched; + ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i], + DRM_SCHED_PRIORITY_NORMAL, &sched, + 1, NULL); if (WARN_ON(ret)) return ret; } @@ -539,10 +870,46 @@ int panfrost_job_open(struct panfrost_file_priv *panfrost_priv) void panfrost_job_close(struct panfrost_file_priv *panfrost_priv) { + struct panfrost_device *pfdev = panfrost_priv->pfdev; int i; for (i = 0; i < NUM_JOB_SLOTS; i++) drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]); + + /* Kill in-flight jobs */ + spin_lock(&pfdev->js->job_lock); + for (i = 0; i < NUM_JOB_SLOTS; i++) { + struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i]; + int j; + + for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) { + struct panfrost_job *job = pfdev->jobs[i][j]; + u32 cmd; + + if (!job || job->base.entity != entity) + continue; + + if (j == 1) { + /* Try to cancel the job before it starts */ + job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); + /* Reset the job head so it doesn't get restarted if + * the job in the first slot failed. + */ + job->jc = 0; + } + + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + cmd = panfrost_get_job_chain_flag(job) ? + JS_COMMAND_HARD_STOP_1 : + JS_COMMAND_HARD_STOP_0; + } else { + cmd = JS_COMMAND_HARD_STOP; + } + + job_write(pfdev, JS_COMMAND(i), cmd); + } + } + spin_unlock(&pfdev->js->job_lock); } int panfrost_job_is_idle(struct panfrost_device *pfdev) @@ -554,10 +921,6 @@ int panfrost_job_is_idle(struct panfrost_device *pfdev) /* If there are any jobs in the HW queue, we're not idle */ if (atomic_read(&js->queue[i].sched.hw_rq_count)) return false; - - /* Check whether the hardware is idle */ - if (pfdev->devfreq.slot[i].busy) - return false; } return true; diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h index 62454128a792..8becc1ba0eb9 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.h +++ b/drivers/gpu/drm/panfrost/panfrost_job.h @@ -17,11 +17,7 @@ struct panfrost_job { struct kref refcount; struct panfrost_device *pfdev; - struct panfrost_file_priv *file_priv; - - /* Optional fences userspace can pass in for the job to depend on. */ - struct dma_fence **in_fences; - u32 in_fence_count; + struct panfrost_mmu *mmu; /* Fence to be signaled by IRQ handler when the job is complete. */ struct dma_fence *done_fence; @@ -30,8 +26,7 @@ struct panfrost_job { __u32 requirements; __u32 flush_id; - /* Exclusive fences we have taken from the BOs to wait for */ - struct dma_fence **implicit_fences; + struct panfrost_gem_mapping **mappings; struct drm_gem_object **bos; u32 bo_count; @@ -43,6 +38,7 @@ int panfrost_job_init(struct panfrost_device *pfdev); void panfrost_job_fini(struct panfrost_device *pfdev); int panfrost_job_open(struct panfrost_file_priv *panfrost_priv); void panfrost_job_close(struct panfrost_file_priv *panfrost_priv); +int panfrost_job_get_slot(struct panfrost_job *job); int panfrost_job_push(struct panfrost_job *job); void panfrost_job_put(struct panfrost_job *job); void panfrost_job_enable_interrupts(struct panfrost_device *pfdev); diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 92ac995dd9c6..d3f82b26a631 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -1,7 +1,12 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ + +#include <drm/panfrost_drm.h> + +#include <linux/atomic.h> #include <linux/bitfield.h> #include <linux/delay.h> +#include <linux/dma-mapping.h> #include <linux/interrupt.h> #include <linux/io.h> #include <linux/iopoll.h> @@ -9,6 +14,7 @@ #include <linux/iommu.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <linux/shmem_fs.h> #include <linux/sizes.h> #include "panfrost_device.h" @@ -20,12 +26,6 @@ #define mmu_write(dev, reg, data) writel(data, dev->iomem + reg) #define mmu_read(dev, reg) readl(dev->iomem + reg) -struct panfrost_mmu { - struct io_pgtable_cfg pgtbl_cfg; - struct io_pgtable_ops *pgtbl_ops; - struct mutex lock; -}; - static int wait_ready(struct panfrost_device *pfdev, u32 as_nr) { int ret; @@ -34,10 +34,13 @@ static int wait_ready(struct panfrost_device *pfdev, u32 as_nr) /* Wait for the MMU status to indicate there is no active command, in * case one is pending. */ ret = readl_relaxed_poll_timeout_atomic(pfdev->iomem + AS_STATUS(as_nr), - val, !(val & AS_STATUS_AS_ACTIVE), 10, 1000); + val, !(val & AS_STATUS_AS_ACTIVE), 10, 100000); - if (ret) + if (ret) { + /* The GPU hung, let's trigger a reset */ + panfrost_device_schedule_reset(pfdev); dev_err(pfdev->dev, "AS_ACTIVE bit stuck\n"); + } return ret; } @@ -55,41 +58,46 @@ static int write_cmd(struct panfrost_device *pfdev, u32 as_nr, u32 cmd) } static void lock_region(struct panfrost_device *pfdev, u32 as_nr, - u64 iova, size_t size) + u64 region_start, u64 size) { u8 region_width; - u64 region = iova & PAGE_MASK; + u64 region; + u64 region_end = region_start + size; + + if (!size) + return; + /* - * fls returns: - * 1 .. 32 - * - * 10 + fls(num_pages) - * results in the range (11 .. 42) + * The locked region is a naturally aligned power of 2 block encoded as + * log2 minus(1). + * Calculate the desired start/end and look for the highest bit which + * differs. The smallest naturally aligned block must include this bit + * change, the desired region starts with this bit (and subsequent bits) + * zeroed and ends with the bit (and subsequent bits) set to one. */ + region_width = max(fls64(region_start ^ (region_end - 1)), + const_ilog2(AS_LOCK_REGION_MIN_SIZE)) - 1; - size = round_up(size, PAGE_SIZE); + /* + * Mask off the low bits of region_start (which would be ignored by + * the hardware anyway) + */ + region_start &= GENMASK_ULL(63, region_width); - region_width = 10 + fls(size >> PAGE_SHIFT); - if ((size >> PAGE_SHIFT) != (1ul << (region_width - 11))) { - /* not pow2, so must go up to the next pow2 */ - region_width += 1; - } - region |= region_width; + region = region_width | region_start; /* Lock the region that needs to be updated */ - mmu_write(pfdev, AS_LOCKADDR_LO(as_nr), region & 0xFFFFFFFFUL); - mmu_write(pfdev, AS_LOCKADDR_HI(as_nr), (region >> 32) & 0xFFFFFFFFUL); + mmu_write(pfdev, AS_LOCKADDR_LO(as_nr), lower_32_bits(region)); + mmu_write(pfdev, AS_LOCKADDR_HI(as_nr), upper_32_bits(region)); write_cmd(pfdev, as_nr, AS_COMMAND_LOCK); } -static int mmu_hw_do_operation(struct panfrost_device *pfdev, u32 as_nr, - u64 iova, size_t size, u32 op) +static int mmu_hw_do_operation_locked(struct panfrost_device *pfdev, int as_nr, + u64 iova, u64 size, u32 op) { - unsigned long flags; - int ret; - - spin_lock_irqsave(&pfdev->hwaccess_lock, flags); + if (as_nr < 0) + return 0; if (op != AS_COMMAND_UNLOCK) lock_region(pfdev, as_nr, iova, size); @@ -98,36 +106,46 @@ static int mmu_hw_do_operation(struct panfrost_device *pfdev, u32 as_nr, write_cmd(pfdev, as_nr, op); /* Wait for the flush to complete */ - ret = wait_ready(pfdev, as_nr); + return wait_ready(pfdev, as_nr); +} - spin_unlock_irqrestore(&pfdev->hwaccess_lock, flags); +static int mmu_hw_do_operation(struct panfrost_device *pfdev, + struct panfrost_mmu *mmu, + u64 iova, u64 size, u32 op) +{ + int ret; + spin_lock(&pfdev->as_lock); + ret = mmu_hw_do_operation_locked(pfdev, mmu->as, iova, size, op); + spin_unlock(&pfdev->as_lock); return ret; } -void panfrost_mmu_enable(struct panfrost_device *pfdev, u32 as_nr) +static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) { - struct io_pgtable_cfg *cfg = &pfdev->mmu->pgtbl_cfg; + int as_nr = mmu->as; + struct io_pgtable_cfg *cfg = &mmu->pgtbl_cfg; u64 transtab = cfg->arm_mali_lpae_cfg.transtab; u64 memattr = cfg->arm_mali_lpae_cfg.memattr; - mmu_write(pfdev, MMU_INT_CLEAR, ~0); - mmu_write(pfdev, MMU_INT_MASK, ~0); + mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); - mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), transtab & 0xffffffffUL); - mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), transtab >> 32); + mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), lower_32_bits(transtab)); + mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), upper_32_bits(transtab)); /* Need to revisit mem attrs. * NC is the default, Mali driver is inner WT. */ - mmu_write(pfdev, AS_MEMATTR_LO(as_nr), memattr & 0xffffffffUL); - mmu_write(pfdev, AS_MEMATTR_HI(as_nr), memattr >> 32); + mmu_write(pfdev, AS_MEMATTR_LO(as_nr), lower_32_bits(memattr)); + mmu_write(pfdev, AS_MEMATTR_HI(as_nr), upper_32_bits(memattr)); write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE); } -static void mmu_disable(struct panfrost_device *pfdev, u32 as_nr) +static void panfrost_mmu_disable(struct panfrost_device *pfdev, u32 as_nr) { + mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); + mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), 0); mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), 0); @@ -137,6 +155,99 @@ static void mmu_disable(struct panfrost_device *pfdev, u32 as_nr) write_cmd(pfdev, as_nr, AS_COMMAND_UPDATE); } +u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) +{ + int as; + + spin_lock(&pfdev->as_lock); + + as = mmu->as; + if (as >= 0) { + int en = atomic_inc_return(&mmu->as_count); + u32 mask = BIT(as) | BIT(16 + as); + + /* + * AS can be retained by active jobs or a perfcnt context, + * hence the '+ 1' here. + */ + WARN_ON(en >= (NUM_JOB_SLOTS + 1)); + + list_move(&mmu->list, &pfdev->as_lru_list); + + if (pfdev->as_faulty_mask & mask) { + /* Unhandled pagefault on this AS, the MMU was + * disabled. We need to re-enable the MMU after + * clearing+unmasking the AS interrupts. + */ + mmu_write(pfdev, MMU_INT_CLEAR, mask); + mmu_write(pfdev, MMU_INT_MASK, ~pfdev->as_faulty_mask); + pfdev->as_faulty_mask &= ~mask; + panfrost_mmu_enable(pfdev, mmu); + } + + goto out; + } + + /* Check for a free AS */ + as = ffz(pfdev->as_alloc_mask); + if (!(BIT(as) & pfdev->features.as_present)) { + struct panfrost_mmu *lru_mmu; + + list_for_each_entry_reverse(lru_mmu, &pfdev->as_lru_list, list) { + if (!atomic_read(&lru_mmu->as_count)) + break; + } + WARN_ON(&lru_mmu->list == &pfdev->as_lru_list); + + list_del_init(&lru_mmu->list); + as = lru_mmu->as; + + WARN_ON(as < 0); + lru_mmu->as = -1; + } + + /* Assign the free or reclaimed AS to the FD */ + mmu->as = as; + set_bit(as, &pfdev->as_alloc_mask); + atomic_set(&mmu->as_count, 1); + list_add(&mmu->list, &pfdev->as_lru_list); + + dev_dbg(pfdev->dev, "Assigned AS%d to mmu %p, alloc_mask=%lx", as, mmu, pfdev->as_alloc_mask); + + panfrost_mmu_enable(pfdev, mmu); + +out: + spin_unlock(&pfdev->as_lock); + return as; +} + +void panfrost_mmu_as_put(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) +{ + atomic_dec(&mmu->as_count); + WARN_ON(atomic_read(&mmu->as_count) < 0); +} + +void panfrost_mmu_reset(struct panfrost_device *pfdev) +{ + struct panfrost_mmu *mmu, *mmu_tmp; + + spin_lock(&pfdev->as_lock); + + pfdev->as_alloc_mask = 0; + pfdev->as_faulty_mask = 0; + + list_for_each_entry_safe(mmu, mmu_tmp, &pfdev->as_lru_list, list) { + mmu->as = -1; + atomic_set(&mmu->as_count, 0); + list_del_init(&mmu->list); + } + + spin_unlock(&pfdev->as_lock); + + mmu_write(pfdev, MMU_INT_CLEAR, ~0); + mmu_write(pfdev, MMU_INT_MASK, ~0); +} + static size_t get_pgsize(u64 addr, size_t size) { if (addr & (SZ_2M - 1) || size < SZ_2M) @@ -145,123 +256,372 @@ static size_t get_pgsize(u64 addr, size_t size) return SZ_2M; } -int panfrost_mmu_map(struct panfrost_gem_object *bo) +static void panfrost_mmu_flush_range(struct panfrost_device *pfdev, + struct panfrost_mmu *mmu, + u64 iova, u64 size) { - struct drm_gem_object *obj = &bo->base.base; - struct panfrost_device *pfdev = to_panfrost_device(obj->dev); - struct io_pgtable_ops *ops = pfdev->mmu->pgtbl_ops; - u64 iova = bo->node.start << PAGE_SHIFT; - unsigned int count; - struct scatterlist *sgl; - struct sg_table *sgt; - int ret; + if (mmu->as < 0) + return; - if (WARN_ON(bo->is_mapped)) - return 0; + pm_runtime_get_noresume(pfdev->dev); - sgt = drm_gem_shmem_get_pages_sgt(obj); - if (WARN_ON(IS_ERR(sgt))) - return PTR_ERR(sgt); + /* Flush the PTs only if we're already awake */ + if (pm_runtime_active(pfdev->dev)) + mmu_hw_do_operation(pfdev, mmu, iova, size, AS_COMMAND_FLUSH_PT); - ret = pm_runtime_get_sync(pfdev->dev); - if (ret < 0) - return ret; + pm_runtime_put_sync_autosuspend(pfdev->dev); +} - mutex_lock(&pfdev->mmu->lock); +static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu, + u64 iova, int prot, struct sg_table *sgt) +{ + unsigned int count; + struct scatterlist *sgl; + struct io_pgtable_ops *ops = mmu->pgtbl_ops; + u64 start_iova = iova; - for_each_sg(sgt->sgl, sgl, sgt->nents, count) { + for_each_sgtable_dma_sg(sgt, sgl, count) { unsigned long paddr = sg_dma_address(sgl); size_t len = sg_dma_len(sgl); - dev_dbg(pfdev->dev, "map: iova=%llx, paddr=%lx, len=%zx", iova, paddr, len); + dev_dbg(pfdev->dev, "map: as=%d, iova=%llx, paddr=%lx, len=%zx", mmu->as, iova, paddr, len); while (len) { size_t pgsize = get_pgsize(iova | paddr, len); - ops->map(ops, iova, paddr, pgsize, IOMMU_WRITE | IOMMU_READ); + ops->map(ops, iova, paddr, pgsize, prot, GFP_KERNEL); iova += pgsize; paddr += pgsize; len -= pgsize; } } - mmu_hw_do_operation(pfdev, 0, bo->node.start << PAGE_SHIFT, - bo->node.size << PAGE_SHIFT, AS_COMMAND_FLUSH_PT); + panfrost_mmu_flush_range(pfdev, mmu, start_iova, iova - start_iova); + + return 0; +} + +int panfrost_mmu_map(struct panfrost_gem_mapping *mapping) +{ + struct panfrost_gem_object *bo = mapping->obj; + struct drm_gem_shmem_object *shmem = &bo->base; + struct drm_gem_object *obj = &shmem->base; + struct panfrost_device *pfdev = to_panfrost_device(obj->dev); + struct sg_table *sgt; + int prot = IOMMU_READ | IOMMU_WRITE; - mutex_unlock(&pfdev->mmu->lock); + if (WARN_ON(mapping->active)) + return 0; - pm_runtime_mark_last_busy(pfdev->dev); - pm_runtime_put_autosuspend(pfdev->dev); - bo->is_mapped = true; + if (bo->noexec) + prot |= IOMMU_NOEXEC; + + sgt = drm_gem_shmem_get_pages_sgt(shmem); + if (WARN_ON(IS_ERR(sgt))) + return PTR_ERR(sgt); + + mmu_map_sg(pfdev, mapping->mmu, mapping->mmnode.start << PAGE_SHIFT, + prot, sgt); + mapping->active = true; return 0; } -void panfrost_mmu_unmap(struct panfrost_gem_object *bo) +void panfrost_mmu_unmap(struct panfrost_gem_mapping *mapping) { + struct panfrost_gem_object *bo = mapping->obj; struct drm_gem_object *obj = &bo->base.base; struct panfrost_device *pfdev = to_panfrost_device(obj->dev); - struct io_pgtable_ops *ops = pfdev->mmu->pgtbl_ops; - u64 iova = bo->node.start << PAGE_SHIFT; - size_t len = bo->node.size << PAGE_SHIFT; + struct io_pgtable_ops *ops = mapping->mmu->pgtbl_ops; + u64 iova = mapping->mmnode.start << PAGE_SHIFT; + size_t len = mapping->mmnode.size << PAGE_SHIFT; size_t unmapped_len = 0; - int ret; - - if (WARN_ON(!bo->is_mapped)) - return; - dev_dbg(pfdev->dev, "unmap: iova=%llx, len=%zx", iova, len); - - ret = pm_runtime_get_sync(pfdev->dev); - if (ret < 0) + if (WARN_ON(!mapping->active)) return; - mutex_lock(&pfdev->mmu->lock); + dev_dbg(pfdev->dev, "unmap: as=%d, iova=%llx, len=%zx", + mapping->mmu->as, iova, len); while (unmapped_len < len) { size_t unmapped_page; size_t pgsize = get_pgsize(iova, len - unmapped_len); - unmapped_page = ops->unmap(ops, iova, pgsize); - if (!unmapped_page) + if (ops->iova_to_phys(ops, iova)) { + unmapped_page = ops->unmap(ops, iova, pgsize, NULL); + WARN_ON(unmapped_page != pgsize); + } + iova += pgsize; + unmapped_len += pgsize; + } + + panfrost_mmu_flush_range(pfdev, mapping->mmu, + mapping->mmnode.start << PAGE_SHIFT, len); + mapping->active = false; +} + +static void mmu_tlb_inv_context_s1(void *cookie) +{} + +static void mmu_tlb_sync_context(void *cookie) +{ + //struct panfrost_mmu *mmu = cookie; + // TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X +} + +static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, + void *cookie) +{ + mmu_tlb_sync_context(cookie); +} + +static const struct iommu_flush_ops mmu_tlb_ops = { + .tlb_flush_all = mmu_tlb_inv_context_s1, + .tlb_flush_walk = mmu_tlb_flush_walk, +}; + +static struct panfrost_gem_mapping * +addr_to_mapping(struct panfrost_device *pfdev, int as, u64 addr) +{ + struct panfrost_gem_mapping *mapping = NULL; + struct drm_mm_node *node; + u64 offset = addr >> PAGE_SHIFT; + struct panfrost_mmu *mmu; + + spin_lock(&pfdev->as_lock); + list_for_each_entry(mmu, &pfdev->as_lru_list, list) { + if (as == mmu->as) + goto found_mmu; + } + goto out; + +found_mmu: + + spin_lock(&mmu->mm_lock); + + drm_mm_for_each_node(node, &mmu->mm) { + if (offset >= node->start && + offset < (node->start + node->size)) { + mapping = drm_mm_node_to_panfrost_mapping(node); + + kref_get(&mapping->refcount); break; + } + } + + spin_unlock(&mmu->mm_lock); +out: + spin_unlock(&pfdev->as_lock); + return mapping; +} + +#define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE) - iova += unmapped_page; - unmapped_len += unmapped_page; +static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, + u64 addr) +{ + int ret, i; + struct panfrost_gem_mapping *bomapping; + struct panfrost_gem_object *bo; + struct address_space *mapping; + pgoff_t page_offset; + struct sg_table *sgt; + struct page **pages; + + bomapping = addr_to_mapping(pfdev, as, addr); + if (!bomapping) + return -ENOENT; + + bo = bomapping->obj; + if (!bo->is_heap) { + dev_WARN(pfdev->dev, "matching BO is not heap type (GPU VA = %llx)", + bomapping->mmnode.start << PAGE_SHIFT); + ret = -EINVAL; + goto err_bo; + } + WARN_ON(bomapping->mmu->as != as); + + /* Assume 2MB alignment and size multiple */ + addr &= ~((u64)SZ_2M - 1); + page_offset = addr >> PAGE_SHIFT; + page_offset -= bomapping->mmnode.start; + + mutex_lock(&bo->base.pages_lock); + + if (!bo->base.pages) { + bo->sgts = kvmalloc_array(bo->base.base.size / SZ_2M, + sizeof(struct sg_table), GFP_KERNEL | __GFP_ZERO); + if (!bo->sgts) { + mutex_unlock(&bo->base.pages_lock); + ret = -ENOMEM; + goto err_bo; + } + + pages = kvmalloc_array(bo->base.base.size >> PAGE_SHIFT, + sizeof(struct page *), GFP_KERNEL | __GFP_ZERO); + if (!pages) { + kvfree(bo->sgts); + bo->sgts = NULL; + mutex_unlock(&bo->base.pages_lock); + ret = -ENOMEM; + goto err_bo; + } + bo->base.pages = pages; + bo->base.pages_use_count = 1; + } else { + pages = bo->base.pages; + if (pages[page_offset]) { + /* Pages are already mapped, bail out. */ + mutex_unlock(&bo->base.pages_lock); + goto out; + } + } + + mapping = bo->base.base.filp->f_mapping; + mapping_set_unevictable(mapping); + + for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) { + pages[i] = shmem_read_mapping_page(mapping, i); + if (IS_ERR(pages[i])) { + mutex_unlock(&bo->base.pages_lock); + ret = PTR_ERR(pages[i]); + goto err_pages; + } } - mmu_hw_do_operation(pfdev, 0, bo->node.start << PAGE_SHIFT, - bo->node.size << PAGE_SHIFT, AS_COMMAND_FLUSH_PT); + mutex_unlock(&bo->base.pages_lock); - mutex_unlock(&pfdev->mmu->lock); + sgt = &bo->sgts[page_offset / (SZ_2M / PAGE_SIZE)]; + ret = sg_alloc_table_from_pages(sgt, pages + page_offset, + NUM_FAULT_PAGES, 0, SZ_2M, GFP_KERNEL); + if (ret) + goto err_pages; - pm_runtime_mark_last_busy(pfdev->dev); - pm_runtime_put_autosuspend(pfdev->dev); - bo->is_mapped = false; + ret = dma_map_sgtable(pfdev->dev, sgt, DMA_BIDIRECTIONAL, 0); + if (ret) + goto err_map; + + mmu_map_sg(pfdev, bomapping->mmu, addr, + IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt); + + bomapping->active = true; + + dev_dbg(pfdev->dev, "mapped page fault @ AS%d %llx", as, addr); + +out: + panfrost_gem_mapping_put(bomapping); + + return 0; + +err_map: + sg_free_table(sgt); +err_pages: + drm_gem_shmem_put_pages(&bo->base); +err_bo: + drm_gem_object_put(&bo->base.base); + return ret; } -static void mmu_tlb_inv_context_s1(void *cookie) +static void panfrost_mmu_release_ctx(struct kref *kref) { - struct panfrost_device *pfdev = cookie; + struct panfrost_mmu *mmu = container_of(kref, struct panfrost_mmu, + refcount); + struct panfrost_device *pfdev = mmu->pfdev; + + spin_lock(&pfdev->as_lock); + if (mmu->as >= 0) { + pm_runtime_get_noresume(pfdev->dev); + if (pm_runtime_active(pfdev->dev)) + panfrost_mmu_disable(pfdev, mmu->as); + pm_runtime_put_autosuspend(pfdev->dev); + + clear_bit(mmu->as, &pfdev->as_alloc_mask); + clear_bit(mmu->as, &pfdev->as_in_use_mask); + list_del(&mmu->list); + } + spin_unlock(&pfdev->as_lock); - mmu_hw_do_operation(pfdev, 0, 0, ~0UL, AS_COMMAND_FLUSH_MEM); + free_io_pgtable_ops(mmu->pgtbl_ops); + drm_mm_takedown(&mmu->mm); + kfree(mmu); } -static void mmu_tlb_inv_range_nosync(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) -{} +void panfrost_mmu_ctx_put(struct panfrost_mmu *mmu) +{ + kref_put(&mmu->refcount, panfrost_mmu_release_ctx); +} -static void mmu_tlb_sync_context(void *cookie) +struct panfrost_mmu *panfrost_mmu_ctx_get(struct panfrost_mmu *mmu) { - //struct panfrost_device *pfdev = cookie; - // TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X + kref_get(&mmu->refcount); + + return mmu; } -static const struct iommu_gather_ops mmu_tlb_ops = { - .tlb_flush_all = mmu_tlb_inv_context_s1, - .tlb_add_flush = mmu_tlb_inv_range_nosync, - .tlb_sync = mmu_tlb_sync_context, -}; +#define PFN_4G (SZ_4G >> PAGE_SHIFT) +#define PFN_4G_MASK (PFN_4G - 1) +#define PFN_16M (SZ_16M >> PAGE_SHIFT) + +static void panfrost_drm_mm_color_adjust(const struct drm_mm_node *node, + unsigned long color, + u64 *start, u64 *end) +{ + /* Executable buffers can't start or end on a 4GB boundary */ + if (!(color & PANFROST_BO_NOEXEC)) { + u64 next_seg; + + if ((*start & PFN_4G_MASK) == 0) + (*start)++; + + if ((*end & PFN_4G_MASK) == 0) + (*end)--; + + next_seg = ALIGN(*start, PFN_4G); + if (next_seg - *start <= PFN_16M) + *start = next_seg + 1; + + *end = min(*end, ALIGN(*start, PFN_4G) - 1); + } +} + +struct panfrost_mmu *panfrost_mmu_ctx_create(struct panfrost_device *pfdev) +{ + struct panfrost_mmu *mmu; + + mmu = kzalloc(sizeof(*mmu), GFP_KERNEL); + if (!mmu) + return ERR_PTR(-ENOMEM); + + mmu->pfdev = pfdev; + spin_lock_init(&mmu->mm_lock); + + /* 4G enough for now. can be 48-bit */ + drm_mm_init(&mmu->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT); + mmu->mm.color_adjust = panfrost_drm_mm_color_adjust; + + INIT_LIST_HEAD(&mmu->list); + mmu->as = -1; + + mmu->pgtbl_cfg = (struct io_pgtable_cfg) { + .pgsize_bitmap = SZ_4K | SZ_2M, + .ias = FIELD_GET(0xff, pfdev->features.mmu_features), + .oas = FIELD_GET(0xff00, pfdev->features.mmu_features), + .coherent_walk = pfdev->coherent, + .tlb = &mmu_tlb_ops, + .iommu_dev = pfdev->dev, + }; + + mmu->pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &mmu->pgtbl_cfg, + mmu); + if (!mmu->pgtbl_ops) { + kfree(mmu); + return ERR_PTR(-EINVAL); + } + + kref_init(&mmu->refcount); + + return mmu; +} static const char *access_type_name(struct panfrost_device *pfdev, u32 fault_status) @@ -287,100 +647,106 @@ static const char *access_type_name(struct panfrost_device *pfdev, static irqreturn_t panfrost_mmu_irq_handler(int irq, void *data) { struct panfrost_device *pfdev = data; - u32 status = mmu_read(pfdev, MMU_INT_STAT); - int i; - if (!status) + if (!mmu_read(pfdev, MMU_INT_STAT)) return IRQ_NONE; - dev_err(pfdev->dev, "mmu irq status=%x\n", status); + mmu_write(pfdev, MMU_INT_MASK, 0); + return IRQ_WAKE_THREAD; +} - for (i = 0; status; i++) { - u32 mask = BIT(i) | BIT(i + 16); +static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data) +{ + struct panfrost_device *pfdev = data; + u32 status = mmu_read(pfdev, MMU_INT_RAWSTAT); + int ret; + + while (status) { + u32 as = ffs(status | (status >> 16)) - 1; + u32 mask = BIT(as) | BIT(as + 16); u64 addr; u32 fault_status; u32 exception_type; u32 access_type; u32 source_id; - if (!(status & mask)) - continue; - - fault_status = mmu_read(pfdev, AS_FAULTSTATUS(i)); - addr = mmu_read(pfdev, AS_FAULTADDRESS_LO(i)); - addr |= (u64)mmu_read(pfdev, AS_FAULTADDRESS_HI(i)) << 32; + fault_status = mmu_read(pfdev, AS_FAULTSTATUS(as)); + addr = mmu_read(pfdev, AS_FAULTADDRESS_LO(as)); + addr |= (u64)mmu_read(pfdev, AS_FAULTADDRESS_HI(as)) << 32; /* decode the fault status */ exception_type = fault_status & 0xFF; access_type = (fault_status >> 8) & 0x3; source_id = (fault_status >> 16); - /* terminal fault, print info about the fault */ - dev_err(pfdev->dev, - "Unhandled Page fault in AS%d at VA 0x%016llX\n" - "Reason: %s\n" - "raw fault status: 0x%X\n" - "decoded fault status: %s\n" - "exception type 0x%X: %s\n" - "access type 0x%X: %s\n" - "source id 0x%X\n", - i, addr, - "TODO", - fault_status, - (fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), - exception_type, panfrost_exception_name(pfdev, exception_type), - access_type, access_type_name(pfdev, fault_status), - source_id); - mmu_write(pfdev, MMU_INT_CLEAR, mask); + /* Page fault only */ + ret = -1; + if ((status & mask) == BIT(as) && (exception_type & 0xF8) == 0xC0) + ret = panfrost_mmu_map_fault_addr(pfdev, as, addr); + + if (ret) { + /* terminal fault, print info about the fault */ + dev_err(pfdev->dev, + "Unhandled Page fault in AS%d at VA 0x%016llX\n" + "Reason: %s\n" + "raw fault status: 0x%X\n" + "decoded fault status: %s\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X\n", + as, addr, + "TODO", + fault_status, + (fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), + exception_type, panfrost_exception_name(exception_type), + access_type, access_type_name(pfdev, fault_status), + source_id); + + spin_lock(&pfdev->as_lock); + /* Ignore MMU interrupts on this AS until it's been + * re-enabled. + */ + pfdev->as_faulty_mask |= mask; + + /* Disable the MMU to kill jobs on this AS. */ + panfrost_mmu_disable(pfdev, as); + spin_unlock(&pfdev->as_lock); + } + status &= ~mask; + + /* If we received new MMU interrupts, process them before returning. */ + if (!status) + status = mmu_read(pfdev, MMU_INT_RAWSTAT) & ~pfdev->as_faulty_mask; } + spin_lock(&pfdev->as_lock); + mmu_write(pfdev, MMU_INT_MASK, ~pfdev->as_faulty_mask); + spin_unlock(&pfdev->as_lock); + return IRQ_HANDLED; }; int panfrost_mmu_init(struct panfrost_device *pfdev) { - struct io_pgtable_ops *pgtbl_ops; int err, irq; - pfdev->mmu = devm_kzalloc(pfdev->dev, sizeof(*pfdev->mmu), GFP_KERNEL); - if (!pfdev->mmu) - return -ENOMEM; - - mutex_init(&pfdev->mmu->lock); - irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "mmu"); if (irq <= 0) return -ENODEV; - err = devm_request_irq(pfdev->dev, irq, panfrost_mmu_irq_handler, - IRQF_SHARED, "mmu", pfdev); + err = devm_request_threaded_irq(pfdev->dev, irq, + panfrost_mmu_irq_handler, + panfrost_mmu_irq_handler_thread, + IRQF_SHARED, KBUILD_MODNAME "-mmu", + pfdev); if (err) { dev_err(pfdev->dev, "failed to request mmu irq"); return err; } - mmu_write(pfdev, MMU_INT_CLEAR, ~0); - mmu_write(pfdev, MMU_INT_MASK, ~0); - - pfdev->mmu->pgtbl_cfg = (struct io_pgtable_cfg) { - .pgsize_bitmap = SZ_4K | SZ_2M, - .ias = FIELD_GET(0xff, pfdev->features.mmu_features), - .oas = FIELD_GET(0xff00, pfdev->features.mmu_features), - .tlb = &mmu_tlb_ops, - .iommu_dev = pfdev->dev, - }; - - pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &pfdev->mmu->pgtbl_cfg, - pfdev); - if (!pgtbl_ops) - return -ENOMEM; - - pfdev->mmu->pgtbl_ops = pgtbl_ops; - - panfrost_mmu_enable(pfdev, 0); return 0; } @@ -388,7 +754,4 @@ int panfrost_mmu_init(struct panfrost_device *pfdev) void panfrost_mmu_fini(struct panfrost_device *pfdev) { mmu_write(pfdev, MMU_INT_MASK, 0); - mmu_disable(pfdev, 0); - - free_io_pgtable_ops(pfdev->mmu->pgtbl_ops); } diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.h b/drivers/gpu/drm/panfrost/panfrost_mmu.h index f5878d86a5ce..cc2a0d307feb 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.h +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.h @@ -4,14 +4,22 @@ #ifndef __PANFROST_MMU_H__ #define __PANFROST_MMU_H__ -struct panfrost_gem_object; +struct panfrost_gem_mapping; +struct panfrost_file_priv; +struct panfrost_mmu; -int panfrost_mmu_map(struct panfrost_gem_object *bo); -void panfrost_mmu_unmap(struct panfrost_gem_object *bo); +int panfrost_mmu_map(struct panfrost_gem_mapping *mapping); +void panfrost_mmu_unmap(struct panfrost_gem_mapping *mapping); int panfrost_mmu_init(struct panfrost_device *pfdev); void panfrost_mmu_fini(struct panfrost_device *pfdev); +void panfrost_mmu_reset(struct panfrost_device *pfdev); -void panfrost_mmu_enable(struct panfrost_device *pfdev, u32 as_nr); +u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu); +void panfrost_mmu_as_put(struct panfrost_device *pfdev, struct panfrost_mmu *mmu); + +struct panfrost_mmu *panfrost_mmu_ctx_get(struct panfrost_mmu *mmu); +void panfrost_mmu_ctx_put(struct panfrost_mmu *mmu); +struct panfrost_mmu *panfrost_mmu_ctx_create(struct panfrost_device *pfdev); #endif diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c index 83c57d325ca8..bc0df93f7f21 100644 --- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c +++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c @@ -1,21 +1,24 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright 2019 Collabora Ltd */ -#include <drm/drm_file.h> -#include <drm/drm_gem_shmem_helper.h> -#include <drm/panfrost_drm.h> #include <linux/completion.h> #include <linux/iopoll.h> +#include <linux/iosys-map.h> #include <linux/pm_runtime.h> #include <linux/slab.h> #include <linux/uaccess.h> +#include <drm/drm_file.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/panfrost_drm.h> + #include "panfrost_device.h" #include "panfrost_features.h" #include "panfrost_gem.h" #include "panfrost_issues.h" #include "panfrost_job.h" #include "panfrost_mmu.h" +#include "panfrost_perfcnt.h" #include "panfrost_regs.h" #define COUNTERS_PER_BLOCK 64 @@ -24,7 +27,7 @@ #define V4_SHADERS_PER_COREGROUP 4 struct panfrost_perfcnt { - struct panfrost_gem_object *bo; + struct panfrost_gem_mapping *mapping; size_t bosize; void *buf; struct panfrost_file_priv *user; @@ -48,9 +51,9 @@ static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev) int ret; reinit_completion(&pfdev->perfcnt->dump_comp); - gpuva = pfdev->perfcnt->bo->node.start << PAGE_SHIFT; - gpu_write(pfdev, GPU_PERFCNT_BASE_LO, gpuva); - gpu_write(pfdev, GPU_PERFCNT_BASE_HI, gpuva >> 32); + gpuva = pfdev->perfcnt->mapping->mmnode.start << PAGE_SHIFT; + gpu_write(pfdev, GPU_PERFCNT_BASE_LO, lower_32_bits(gpuva)); + gpu_write(pfdev, GPU_PERFCNT_BASE_HI, upper_32_bits(gpuva)); gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_CLEAN_CACHES_COMPLETED | GPU_IRQ_PERFCNT_SAMPLE_COMPLETED); @@ -66,12 +69,14 @@ static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev) } static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev, - struct panfrost_file_priv *user, + struct drm_file *file_priv, unsigned int counterset) { + struct panfrost_file_priv *user = file_priv->driver_priv; struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; + struct iosys_map map; struct drm_gem_shmem_object *bo; - u32 cfg; + u32 cfg, as; int ret; if (user == perfcnt->user) @@ -81,25 +86,31 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev, ret = pm_runtime_get_sync(pfdev->dev); if (ret < 0) - return ret; + goto err_put_pm; bo = drm_gem_shmem_create(pfdev->ddev, perfcnt->bosize); - if (IS_ERR(bo)) - return PTR_ERR(bo); - - perfcnt->bo = to_panfrost_bo(&bo->base); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + goto err_put_pm; + } /* Map the perfcnt buf in the address space attached to file_priv. */ - ret = panfrost_mmu_map(perfcnt->bo); + ret = panfrost_gem_open(&bo->base, file_priv); if (ret) goto err_put_bo; - perfcnt->buf = drm_gem_shmem_vmap(&bo->base); - if (IS_ERR(perfcnt->buf)) { - ret = PTR_ERR(perfcnt->buf); - goto err_put_bo; + perfcnt->mapping = panfrost_gem_mapping_get(to_panfrost_bo(&bo->base), + user); + if (!perfcnt->mapping) { + ret = -EINVAL; + goto err_close_bo; } + ret = drm_gem_shmem_vmap(bo, &map); + if (ret) + goto err_put_mapping; + perfcnt->buf = map.vaddr; + /* * Invalidate the cache and clear the counters to start from a fresh * state. @@ -119,12 +130,8 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev, perfcnt->user = user; - /* - * Always use address space 0 for now. - * FIXME: this needs to be updated when we start using different - * address space. - */ - cfg = GPU_PERFCNT_CFG_AS(0) | + as = panfrost_mmu_as_get(pfdev, perfcnt->mapping->mmu); + cfg = GPU_PERFCNT_CFG_AS(as) | GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL); /* @@ -152,19 +159,30 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev, if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186)) gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff); + /* The BO ref is retained by the mapping. */ + drm_gem_object_put(&bo->base); + return 0; err_vunmap: - drm_gem_shmem_vunmap(&perfcnt->bo->base.base, perfcnt->buf); + drm_gem_shmem_vunmap(bo, &map); +err_put_mapping: + panfrost_gem_mapping_put(perfcnt->mapping); +err_close_bo: + panfrost_gem_close(&bo->base, file_priv); err_put_bo: - drm_gem_object_put_unlocked(&bo->base); + drm_gem_object_put(&bo->base); +err_put_pm: + pm_runtime_put(pfdev->dev); return ret; } static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev, - struct panfrost_file_priv *user) + struct drm_file *file_priv) { + struct panfrost_file_priv *user = file_priv->driver_priv; struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; + struct iosys_map map = IOSYS_MAP_INIT_VADDR(perfcnt->buf); if (user != perfcnt->user) return -EINVAL; @@ -177,10 +195,12 @@ static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev, GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF)); perfcnt->user = NULL; - drm_gem_shmem_vunmap(&perfcnt->bo->base.base, perfcnt->buf); + drm_gem_shmem_vunmap(&perfcnt->mapping->obj->base, &map); perfcnt->buf = NULL; - drm_gem_object_put_unlocked(&perfcnt->bo->base.base); - perfcnt->bo = NULL; + panfrost_gem_close(&perfcnt->mapping->obj->base.base, file_priv); + panfrost_mmu_as_put(pfdev, perfcnt->mapping->mmu); + panfrost_gem_mapping_put(perfcnt->mapping); + perfcnt->mapping = NULL; pm_runtime_mark_last_busy(pfdev->dev); pm_runtime_put_autosuspend(pfdev->dev); @@ -190,7 +210,6 @@ static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev, int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data, struct drm_file *file_priv) { - struct panfrost_file_priv *pfile = file_priv->driver_priv; struct panfrost_device *pfdev = dev->dev_private; struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; struct drm_panfrost_perfcnt_enable *req = data; @@ -206,10 +225,10 @@ int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data, mutex_lock(&perfcnt->lock); if (req->enable) - ret = panfrost_perfcnt_enable_locked(pfdev, pfile, + ret = panfrost_perfcnt_enable_locked(pfdev, file_priv, req->counterset); else - ret = panfrost_perfcnt_disable_locked(pfdev, pfile); + ret = panfrost_perfcnt_disable_locked(pfdev, file_priv); mutex_unlock(&perfcnt->lock); return ret; @@ -247,15 +266,16 @@ out: return ret; } -void panfrost_perfcnt_close(struct panfrost_file_priv *pfile) +void panfrost_perfcnt_close(struct drm_file *file_priv) { + struct panfrost_file_priv *pfile = file_priv->driver_priv; struct panfrost_device *pfdev = pfile->pfdev; struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; pm_runtime_get_sync(pfdev->dev); mutex_lock(&perfcnt->lock); if (perfcnt->user == pfile) - panfrost_perfcnt_disable_locked(pfdev, pfile); + panfrost_perfcnt_disable_locked(pfdev, file_priv); mutex_unlock(&perfcnt->lock); pm_runtime_mark_last_busy(pfdev->dev); pm_runtime_put_autosuspend(pfdev->dev); diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.h b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h index 13b8fdaa1b43..8bbcf5f5fb33 100644 --- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.h +++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h @@ -9,7 +9,7 @@ void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev); void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev); int panfrost_perfcnt_init(struct panfrost_device *pfdev); void panfrost_perfcnt_fini(struct panfrost_device *pfdev); -void panfrost_perfcnt_close(struct panfrost_file_priv *pfile); +void panfrost_perfcnt_close(struct drm_file *file_priv); int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data, struct drm_file *file_priv); int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h index ea38ac60581c..0b6cd8fdcb47 100644 --- a/drivers/gpu/drm/panfrost/panfrost_regs.h +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h @@ -51,6 +51,10 @@ #define GPU_STATUS 0x34 #define GPU_STATUS_PRFCNT_ACTIVE BIT(2) #define GPU_LATEST_FLUSH_ID 0x38 +#define GPU_PWR_KEY 0x50 /* (WO) Power manager key register */ +#define GPU_PWR_KEY_UNLOCK 0x2968A819 +#define GPU_PWR_OVERRIDE0 0x54 /* (RW) Power manager override settings */ +#define GPU_PWR_OVERRIDE1 0x58 /* (RW) Power manager override settings */ #define GPU_FAULT_STATUS 0x3C #define GPU_FAULT_ADDRESS_LO 0x40 #define GPU_FAULT_ADDRESS_HI 0x44 @@ -78,6 +82,7 @@ #define GPU_TEXTURE_FEATURES(n) (0x0B0 + ((n) * 4)) #define GPU_JS_FEATURES(n) (0x0C0 + ((n) * 4)) +#define GPU_AFBC_FEATURES (0x4C) /* (RO) AFBC support on Bifrost */ #define GPU_SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ #define GPU_SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ @@ -203,6 +208,7 @@ #define JM_MAX_JOB_THROTTLE_LIMIT 0x3F #define JM_FORCE_COHERENCY_FEATURES_SHIFT 2 #define JM_IDVS_GROUP_SIZE_SHIFT 16 +#define JM_DEFAULT_IDVS_GROUP_SIZE 0xF #define JM_MAX_IDVS_GROUP_SIZE 0x3F @@ -257,9 +263,6 @@ #define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ #define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ -#define JS_STATUS_EVENT_ACTIVE 0x08 - - /* MMU regs */ #define MMU_INT_RAWSTAT 0x2000 #define MMU_INT_CLEAR 0x2004 @@ -290,7 +293,7 @@ #define AS_FAULTADDRESS_LO(as) (MMU_AS(as) + 0x20) /* (RO) Fault Address for address space n, low word */ #define AS_FAULTADDRESS_HI(as) (MMU_AS(as) + 0x24) /* (RO) Fault Address for address space n, high word */ #define AS_STATUS(as) (MMU_AS(as) + 0x28) /* (RO) Status flags for address space n */ -/* Additional Bifrost AS regsiters */ +/* Additional Bifrost AS registers */ #define AS_TRANSCFG_LO(as) (MMU_AS(as) + 0x30) /* (RW) Translation table configuration for address space n, low word */ #define AS_TRANSCFG_HI(as) (MMU_AS(as) + 0x34) /* (RW) Translation table configuration for address space n, high word */ #define AS_FAULTEXTRA_LO(as) (MMU_AS(as) + 0x38) /* (RO) Secondary fault address for address space n, low word */ @@ -314,6 +317,8 @@ #define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2 << 8) #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3 << 8) +#define AS_LOCK_REGION_MIN_SIZE (1ULL << 15) + #define gpu_write(dev, reg, data) writel(data, dev->iomem + reg) #define gpu_read(dev, reg) readl(dev->iomem + reg) |