/* * * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms * of such GNU licence. * * A copy of the licence is included with the program, and can also be obtained * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * */ /* * GPU backend instrumentation APIs. */ #include #include #include #include #include #include /** * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to * hardware * * @kbdev: Kbase device */ static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev) { unsigned long flags; unsigned long pm_flags; u32 irq_mask; spin_lock_irqsave(&kbdev->hwcnt.lock, flags); KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_REQUEST_CLEAN); /* Enable interrupt */ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | CLEAN_CACHES_COMPLETED, NULL); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* clean&invalidate the caches so we're sure the mmu tables for the dump * buffer is valid */ KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CLEAN_INV_CACHES, NULL); kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING; spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); } int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_uk_hwcnt_setup *setup) { unsigned long flags, pm_flags; int err = -EINVAL; u32 irq_mask; int ret; u64 shader_cores_needed; u32 prfcnt_config; shader_cores_needed = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER); /* alignment failure */ if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1))) goto out_err; /* Override core availability policy to ensure all cores are available */ kbase_pm_ca_instr_enable(kbdev); /* Request the cores early on synchronously - we'll release them on any * errors (e.g. instrumentation already active) */ kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { /* Instrumentation is already enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); goto out_unrequest_cores; } /* Enable interrupt */ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | PRFCNT_SAMPLE_COMPLETED, NULL); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* In use, this context is the owner */ kbdev->hwcnt.kctx = kctx; /* Remember the dump address so we can reprogram it later */ kbdev->hwcnt.addr = setup->dump_buffer; /* Request the clean */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; kbdev->hwcnt.backend.triggered = 0; /* Clean&invalidate the caches so we're sure the mmu tables for the dump * buffer is valid */ ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, &kbdev->hwcnt.backend.cache_clean_work); KBASE_DEBUG_ASSERT(ret); spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); /* Wait for cacheclean to complete */ wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0); KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE); kbase_pm_request_l2_caches(kbdev); /* Configure */ prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; #ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY { u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id); if (arch_v6) prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; } #endif kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), setup->dump_buffer & 0xFFFFFFFF, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), setup->dump_buffer >> 32, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), setup->jm_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), setup->shader_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), setup->mmu_l2_bm, kctx); /* Due to PRLAM-8186 we need to disable the Tiler before we enable the * HW counter dump. */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0, kctx); else kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), setup->tiler_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx); /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), setup->tiler_bm, kctx); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; kbdev->hwcnt.backend.triggered = 1; wake_up(&kbdev->hwcnt.backend.wait); spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); err = 0; dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); return err; out_unrequest_cores: spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); out_err: return err; } int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) { unsigned long flags, pm_flags; int err = -EINVAL; u32 irq_mask; struct kbase_device *kbdev = kctx->kbdev; while (1) { spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) { /* Instrumentation is not enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); goto out; } if (kbdev->hwcnt.kctx != kctx) { /* Instrumentation has been setup for another context */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); goto out; } if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) break; spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); /* Ongoing dump/setup - wait for its completion */ wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0); } kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; kbdev->hwcnt.backend.triggered = 0; /* Disable interrupt */ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL); /* Disable the counters */ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx); kbdev->hwcnt.kctx = NULL; kbdev->hwcnt.addr = 0ULL; kbase_pm_ca_instr_disable(kbdev); kbase_pm_unrequest_cores(kbdev, true, kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER)); kbase_pm_release_l2_caches(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx); err = 0; out: return err; } int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) { unsigned long flags; int err = -EINVAL; struct kbase_device *kbdev = kctx->kbdev; spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.kctx != kctx) { /* The instrumentation has been setup for another context */ goto unlock; } if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) { /* HW counters are disabled or another dump is ongoing, or we're * resetting */ goto unlock; } kbdev->hwcnt.backend.triggered = 0; /* Mark that we're dumping - the PF handler can signal that we faulted */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; /* Reconfigure the dump address */ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kbdev->hwcnt.addr & 0xFFFFFFFF, NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), kbdev->hwcnt.addr >> 32, NULL); /* Start dumping */ KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL, kbdev->hwcnt.addr, 0); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_PRFCNT_SAMPLE, kctx); dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx); err = 0; unlock: spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); return err; } KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, bool * const success) { unsigned long flags; bool complete = false; struct kbase_device *kbdev = kctx->kbdev; spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) { *success = true; complete = true; } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { *success = false; complete = true; kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); return complete; } KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete); void kbasep_cache_clean_worker(struct work_struct *data) { struct kbase_device *kbdev; unsigned long flags; kbdev = container_of(data, struct kbase_device, hwcnt.backend.cache_clean_work); mutex_lock(&kbdev->cacheclean_lock); kbasep_instr_hwcnt_cacheclean(kbdev); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); /* Wait for our condition, and any reset to complete */ while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); wait_event(kbdev->hwcnt.backend.cache_clean_wait, kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_CLEANING); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); } KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANED); /* All finished and idle */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; kbdev->hwcnt.backend.triggered = 1; wake_up(&kbdev->hwcnt.backend.wait); spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); mutex_unlock(&kbdev->cacheclean_lock); } void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) { unsigned long flags; spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { kbdev->hwcnt.backend.triggered = 1; wake_up(&kbdev->hwcnt.backend.wait); } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) { int ret; /* Always clean and invalidate the cache after a successful dump */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, &kbdev->hwcnt.backend.cache_clean_work); KBASE_DEBUG_ASSERT(ret); } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); } void kbase_clean_caches_done(struct kbase_device *kbdev) { u32 irq_mask; if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { unsigned long flags; unsigned long pm_flags; spin_lock_irqsave(&kbdev->hwcnt.lock, flags); /* Disable interrupt */ spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask & ~CLEAN_CACHES_COMPLETED, NULL); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); /* Wakeup... */ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { /* Only wake if we weren't resetting */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED; wake_up(&kbdev->hwcnt.backend.cache_clean_wait); } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); } } int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; unsigned long flags; int err; /* Wait for dump & cacheclean to complete */ wait_event(kbdev->hwcnt.backend.wait, kbdev->hwcnt.backend.triggered != 0); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { err = -EINVAL; kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; } else { /* Dump done */ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE); err = 0; } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); return err; } int kbase_instr_hwcnt_clear(struct kbase_context *kctx) { unsigned long flags; int err = -EINVAL; struct kbase_device *kbdev = kctx->kbdev; spin_lock_irqsave(&kbdev->hwcnt.lock, flags); /* Check it's the context previously set up and we're not already * dumping */ if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) goto out; /* Clear the counters */ KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_PRFCNT_CLEAR, kctx); err = 0; out: spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); return err; } KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear); int kbase_instr_backend_init(struct kbase_device *kbdev) { int ret = 0; kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; init_waitqueue_head(&kbdev->hwcnt.backend.wait); init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait); INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work, kbasep_cache_clean_worker); kbdev->hwcnt.backend.triggered = 0; kbdev->hwcnt.backend.cache_clean_wq = alloc_workqueue("Mali cache cleaning workqueue", 0, 1); if (NULL == kbdev->hwcnt.backend.cache_clean_wq) ret = -EINVAL; return ret; } void kbase_instr_backend_term(struct kbase_device *kbdev) { destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq); }