/* * * (C) COPYRIGHT ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms * of such GNU licence. * * A copy of the licence is included with the program, and can also be obtained * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * */ /** * @file mali_kbase_jm.c * Base kernel job manager APIs */ #include #include #include #include #include #include "mali_kbase_jm.h" #define beenthere(kctx, f, a...) KBASE_LOG(1, kctx->kbdev->dev, "%s:" f, __func__, ##a) #ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS u64 mali_js0_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; u64 mali_js1_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; u64 mali_js2_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; #endif static void kbasep_try_reset_gpu_early(kbase_device *kbdev); #ifdef CONFIG_GPU_TRACEPOINTS static char *kbasep_make_job_slot_string(int js, char *js_string) { sprintf(js_string, "job_slot_%i", js); return js_string; } #endif static void kbase_job_hw_submit(kbase_device *kbdev, kbase_jd_atom *katom, int js) { kbase_context *kctx; u32 cfg; u64 jc_head = katom->jc; KBASE_DEBUG_ASSERT(kbdev); KBASE_DEBUG_ASSERT(katom); kctx = katom->kctx; /* Command register must be available */ KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); /* Affinity is not violating */ kbase_js_debug_log_current_affinities(kbdev); KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js, katom->affinity)); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_LO), jc_head & 0xFFFFFFFF, kctx); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_HI), jc_head >> 32, kctx); #ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS { u64 mask; u32 value; if( 0 == js ) { mask = mali_js0_affinity_mask; } else if( 1 == js ) { mask = mali_js1_affinity_mask; } else { mask = mali_js2_affinity_mask; } value = katom->affinity & (mask & 0xFFFFFFFF); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_NEXT_LO), value, kctx); value = (katom->affinity >> 32) & ((mask>>32) & 0xFFFFFFFF); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_NEXT_HI), value, kctx); } #else kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_NEXT_LO), katom->affinity & 0xFFFFFFFF, kctx); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_NEXT_HI), katom->affinity >> 32, kctx); #endif /* start MMU, medium priority, cache clean/flush on end, clean/flush on start */ cfg = kctx->as_nr | JSn_CONFIG_END_FLUSH_CLEAN_INVALIDATE | JSn_CONFIG_START_MMU | JSn_CONFIG_START_FLUSH_CLEAN_INVALIDATE | JSn_CONFIG_THREAD_PRI(8); if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { if (!kbdev->jm_slots[js].job_chain_flag) { cfg |= JSn_CONFIG_JOB_CHAIN_FLAG; katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN; kbdev->jm_slots[js].job_chain_flag = MALI_TRUE; } else { katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN; kbdev->jm_slots[js].job_chain_flag = MALI_FALSE; } } kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_CONFIG_NEXT), cfg, kctx); /* Write an approximate start timestamp. * It's approximate because there might be a job in the HEAD register. In * such cases, we'll try to make a better approximation in the IRQ handler * (up to the KBASE_JS_IRQ_THROTTLE_TIME_US). */ katom->start_timestamp = ktime_get(); /* GO ! */ KBASE_LOG(2, kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx", katom, kctx, js, jc_head, katom->affinity); KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, (u32) katom->affinity); #ifdef CONFIG_MALI_GATOR_SUPPORT kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js), kctx, kbase_jd_atom_id(kctx, katom)); #endif /* CONFIG_MALI_GATOR_SUPPORT */ #ifdef CONFIG_GPU_TRACEPOINTS if (kbasep_jm_nr_jobs_submitted(&kbdev->jm_slots[js]) == 1) { /* If this is the only job on the slot, trace it as starting */ char js_string[16]; trace_gpu_sched_switch(kbasep_make_job_slot_string(js, js_string), ktime_to_ns(katom->start_timestamp), (u32)katom->kctx, 0, katom->work_id); kbdev->jm_slots[js].last_context = katom->kctx; } #endif kbase_timeline_job_slot_submit(kbdev, kctx, katom, js); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), JSn_COMMAND_START, katom->kctx); } void kbase_job_submit_nolock(kbase_device *kbdev, kbase_jd_atom *katom, int js) { kbase_jm_slot *jm_slots; KBASE_DEBUG_ASSERT(kbdev); jm_slots = kbdev->jm_slots; /* * We can have: * - one job already done (pending interrupt), * - one running, * - one ready to be run. * Hence a maximum of 3 inflight jobs. We have a 4 job * queue, which I hope will be enough... */ kbasep_jm_enqueue_submit_slot(&jm_slots[js], katom); kbase_job_hw_submit(kbdev, katom, js); } void kbase_job_done_slot(kbase_device *kbdev, int s, u32 completion_code, u64 job_tail, ktime_t *end_timestamp) { kbase_jm_slot *slot; kbase_jd_atom *katom; mali_addr64 jc_head; kbase_context *kctx; KBASE_DEBUG_ASSERT(kbdev); if (completion_code != BASE_JD_EVENT_DONE && completion_code != BASE_JD_EVENT_STOPPED) dev_err(kbdev->dev, "t6xx: GPU fault 0x%02lx from job slot %d\n", (unsigned long)completion_code, s); /* IMPORTANT: this function must only contain work necessary to complete a * job from a Real IRQ (and not 'fake' completion, e.g. from * Soft-stop). For general work that must happen no matter how the job was * removed from the hardware, place it in kbase_jd_done() */ slot = &kbdev->jm_slots[s]; katom = kbasep_jm_dequeue_submit_slot(slot); /* If the katom completed is because it's a dummy job for HW workarounds, then take no further action */ if (kbasep_jm_is_dummy_workaround_job(kbdev, katom)) { KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, NULL, NULL, 0, s, completion_code); return; } jc_head = katom->jc; kctx = katom->kctx; KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, jc_head, s, completion_code); if (completion_code != BASE_JD_EVENT_DONE && completion_code != BASE_JD_EVENT_STOPPED) { #if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0 KBASE_TRACE_DUMP(kbdev); #endif } if (job_tail != 0) { mali_bool was_updated = (job_tail != jc_head); /* Some of the job has been executed, so we update the job chain address to where we should resume from */ katom->jc = job_tail; if (was_updated) KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, kctx, katom, job_tail, s); } /* Only update the event code for jobs that weren't cancelled */ if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) katom->event_code = (base_jd_event_code) completion_code; kbase_device_trace_register_access(kctx, REG_WRITE, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 1 << s); /* Complete the job, and start new ones * * Also defer remaining work onto the workqueue: * - Re-queue Soft-stopped jobs * - For any other jobs, queue the job back into the dependency system * - Schedule out the parent context if necessary, and schedule a new one in. */ #ifdef CONFIG_GPU_TRACEPOINTS if (kbasep_jm_nr_jobs_submitted(slot) != 0) { kbase_jd_atom *katom; char js_string[16]; katom = kbasep_jm_peek_idx_submit_slot(slot, 0); /* The atom in the HEAD */ trace_gpu_sched_switch(kbasep_make_job_slot_string(s, js_string), ktime_to_ns(*end_timestamp), (u32)katom->kctx, 0, katom->work_id); slot->last_context = katom->kctx; } else { char js_string[16]; trace_gpu_sched_switch(kbasep_make_job_slot_string(s, js_string), ktime_to_ns(ktime_get()), 0, 0, 0); slot->last_context = 0; } #endif kbase_jd_done(katom, s, end_timestamp, KBASE_JS_ATOM_DONE_START_NEW_ATOMS); } /** * Update the start_timestamp of the job currently in the HEAD, based on the * fact that we got an IRQ for the previous set of completed jobs. * * The estimate also takes into account the KBASE_JS_IRQ_THROTTLE_TIME_US and * the time the job was submitted, to work out the best estimate (which might * still result in an over-estimate to the calculated time spent) */ STATIC void kbasep_job_slot_update_head_start_timestamp(kbase_device *kbdev, kbase_jm_slot *slot, ktime_t end_timestamp) { KBASE_DEBUG_ASSERT(slot); if (kbasep_jm_nr_jobs_submitted(slot) > 0) { kbase_jd_atom *katom; ktime_t new_timestamp; ktime_t timestamp_diff; katom = kbasep_jm_peek_idx_submit_slot(slot, 0); /* The atom in the HEAD */ KBASE_DEBUG_ASSERT(katom != NULL); if (kbasep_jm_is_dummy_workaround_job(kbdev, katom) != MALI_FALSE) { /* Don't access the members of HW workaround 'dummy' jobs */ return; } /* Account for any IRQ Throttle time - makes an overestimate of the time spent by the job */ new_timestamp = ktime_sub_ns(end_timestamp, KBASE_JS_IRQ_THROTTLE_TIME_US * 1000); timestamp_diff = ktime_sub(new_timestamp, katom->start_timestamp); if (ktime_to_ns(timestamp_diff) >= 0) { /* Only update the timestamp if it's a better estimate than what's currently stored. * This is because our estimate that accounts for the throttle time may be too much * of an overestimate */ katom->start_timestamp = new_timestamp; } } } void kbase_job_done(kbase_device *kbdev, u32 done) { unsigned long flags; int i; u32 count = 0; ktime_t end_timestamp = ktime_get(); kbasep_js_device_data *js_devdata; KBASE_DEBUG_ASSERT(kbdev); js_devdata = &kbdev->js_data; KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done); memset(&kbdev->slot_submit_count_irq[0], 0, sizeof(kbdev->slot_submit_count_irq)); /* write irq throttle register, this will prevent irqs from occurring until * the given number of gpu clock cycles have passed */ { int irq_throttle_cycles = atomic_read(&kbdev->irq_throttle_cycles); kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), irq_throttle_cycles, NULL); } spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); while (done) { kbase_jm_slot *slot; u32 failed = done >> 16; /* treat failed slots as finished slots */ u32 finished = (done & 0xFFFF) | failed; /* Note: This is inherently unfair, as we always check * for lower numbered interrupts before the higher * numbered ones.*/ i = ffs(finished) - 1; KBASE_DEBUG_ASSERT(i >= 0); slot = &kbdev->jm_slots[i]; do { int nr_done; u32 active; u32 completion_code = BASE_JD_EVENT_DONE; /* assume OK */ u64 job_tail = 0; if (failed & (1u << i)) { /* read out the job slot status code if the job slot reported failure */ completion_code = kbase_reg_read(kbdev, JOB_SLOT_REG(i, JSn_STATUS), NULL); switch (completion_code) { case BASE_JD_EVENT_STOPPED: #ifdef CONFIG_MALI_GATOR_SUPPORT kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_SOFT_STOPPED, i), NULL, 0); #endif /* CONFIG_MALI_GATOR_SUPPORT */ /* Soft-stopped job - read the value of JS_TAIL so that the job chain can be resumed */ job_tail = (u64) kbase_reg_read(kbdev, JOB_SLOT_REG(i, JSn_TAIL_LO), NULL) | ((u64) kbase_reg_read(kbdev, JOB_SLOT_REG(i, JSn_TAIL_HI), NULL) << 32); break; case BASE_JD_EVENT_NOT_STARTED: /* PRLAM-10673 can cause a TERMINATED job to come back as NOT_STARTED, but the error interrupt helps us detect it */ completion_code = BASE_JD_EVENT_TERMINATED; /* fall throught */ default: dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", i, completion_code, kbase_exception_name(completion_code)); } } kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), done & ((1 << i) | (1 << (i + 16))), NULL); active = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL); if (((active >> i) & 1) == 0 && (((done >> (i + 16)) & 1) == 0)) { /* There is a potential race we must work around: * * 1. A job slot has a job in both current and next registers * 2. The job in current completes successfully, the IRQ handler reads RAWSTAT * and calls this function with the relevant bit set in "done" * 3. The job in the next registers becomes the current job on the GPU * 4. Sometime before the JOB_IRQ_CLEAR line above the job on the GPU _fails_ * 5. The IRQ_CLEAR clears the done bit but not the failed bit. This atomically sets * JOB_IRQ_JS_STATE. However since both jobs have now completed the relevant bits * for the slot are set to 0. * * If we now did nothing then we'd incorrectly assume that _both_ jobs had completed * successfully (since we haven't yet observed the fail bit being set in RAWSTAT). * * So at this point if there are no active jobs left we check to see if RAWSTAT has a failure * bit set for the job slot. If it does we know that there has been a new failure that we * didn't previously know about, so we make sure that we record this in active (but we wait * for the next loop to deal with it). * * If we were handling a job failure (i.e. done has the relevant high bit set) then we know that * the value read back from JOB_IRQ_JS_STATE is the correct number of remaining jobs because * the failed job will have prevented any futher jobs from starting execution. */ u32 rawstat = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL); if ((rawstat >> (i + 16)) & 1) { /* There is a failed job that we've missed - add it back to active */ active |= (1u << i); } } KBASE_LOG(2, kbdev->dev, "Job ended with status 0x%08X\n", completion_code); nr_done = kbasep_jm_nr_jobs_submitted(slot); nr_done -= (active >> i) & 1; nr_done -= (active >> (i + 16)) & 1; if (nr_done <= 0) { dev_warn(kbdev->dev, "Spurious interrupt on slot %d", i); goto spurious; } count += nr_done; while (nr_done) { if (nr_done == 1) { kbase_job_done_slot(kbdev, i, completion_code, job_tail, &end_timestamp); } else { /* More than one job has completed. Since this is not the last job being reported this time it * must have passed. This is because the hardware will not allow further jobs in a job slot to * complete until the faile job is cleared from the IRQ status. */ kbase_job_done_slot(kbdev, i, BASE_JD_EVENT_DONE, 0, &end_timestamp); } nr_done--; } spurious: done = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) { /* Workaround for missing interrupt caused by PRLAM-10883 */ if (((active >> i) & 1) && (0 == kbase_reg_read(kbdev, JOB_SLOT_REG(i, JSn_STATUS), NULL))) { /* Force job slot to be processed again */ done |= (1u << i); } } failed = done >> 16; finished = (done & 0xFFFF) | failed; } while (finished & (1 << i)); kbasep_job_slot_update_head_start_timestamp(kbdev, slot, end_timestamp); } spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); if (atomic_read(&kbdev->reset_gpu) == KBASE_RESET_GPU_COMMITTED) { /* If we're trying to reset the GPU then we might be able to do it early * (without waiting for a timeout) because some jobs have completed */ kbasep_try_reset_gpu_early(kbdev); } KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count); } KBASE_EXPORT_TEST_API(kbase_job_done) static mali_bool kbasep_soft_stop_allowed(kbase_device *kbdev, u16 core_reqs) { mali_bool soft_stops_allowed = MALI_TRUE; if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { if ((core_reqs & BASE_JD_REQ_T) != 0) soft_stops_allowed = MALI_FALSE; } return soft_stops_allowed; } static mali_bool kbasep_hard_stop_allowed(kbase_device *kbdev, u16 core_reqs) { mali_bool hard_stops_allowed = MALI_TRUE; if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) { if ((core_reqs & BASE_JD_REQ_T) != 0) hard_stops_allowed = MALI_FALSE; } return hard_stops_allowed; } static void kbasep_job_slot_soft_or_hard_stop_do_action(kbase_device *kbdev, int js, u32 action, u16 core_reqs, kbase_jd_atom * target_katom ) { kbase_context *kctx = target_katom->kctx; #if KBASE_TRACE_ENABLE u32 status_reg_before; u64 job_in_head_before; u32 status_reg_after; /* Check the head pointer */ job_in_head_before = ((u64) kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_HEAD_LO), NULL)) | (((u64) kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_HEAD_HI), NULL)) << 32); status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_STATUS), NULL); #endif if (action == JSn_COMMAND_SOFT_STOP) { mali_bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev, core_reqs); if (!soft_stop_allowed) { #ifdef CONFIG_MALI_DEBUG KBASE_LOG(2, kbdev->dev, "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", (unsigned int)core_reqs); #endif /* CONFIG_MALI_DEBUG */ return; } /* We are about to issue a soft stop, so mark the atom as having been soft stopped */ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; } if (action == JSn_COMMAND_HARD_STOP) { mali_bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev, core_reqs); if (!hard_stop_allowed) { /* Jobs can be hard-stopped for the following reasons: * * CFS decides the job has been running too long (and soft-stop has not occurred). * In this case the GPU will be reset by CFS if the job remains on the GPU. * * * The context is destroyed, kbase_jd_zap_context will attempt to hard-stop the job. However * it also has a watchdog which will cause the GPU to be reset if the job remains on the GPU. * * * An (unhandled) MMU fault occurred. As long as BASE_HW_ISSUE_8245 is defined then * the GPU will be reset. * * All three cases result in the GPU being reset if the hard-stop fails, * so it is safe to just return and ignore the hard-stop request. */ dev_warn(kbdev->dev, "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", (unsigned int)core_reqs); return; } } if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316) && action == JSn_COMMAND_SOFT_STOP) { int i; kbase_jm_slot *slot; slot = &kbdev->jm_slots[js]; for (i = 0; i < kbasep_jm_nr_jobs_submitted(slot); i++) { kbase_jd_atom *katom; katom = kbasep_jm_peek_idx_submit_slot(slot, i); KBASE_DEBUG_ASSERT(katom); if (kbasep_jm_is_dummy_workaround_job(kbdev, katom) != MALI_FALSE) { /* Don't access the members of HW workaround 'dummy' jobs * * This assumes that such jobs can't cause HW_ISSUE_8316, and could only be blocked * by other jobs causing HW_ISSUE_8316 (which will get poked/or eventually get killed) */ continue; } /* For HW_ISSUE_8316, only 'bad' jobs attacking the system can * cause this issue: normally, all memory should be allocated in * multiples of 4 pages, and growable memory should be changed size * in multiples of 4 pages. * * Whilst such 'bad' jobs can be cleared by a GPU reset, the * locking up of a uTLB entry caused by the bad job could also * stall other ASs, meaning that other ASs' jobs don't complete in * the 'grace' period before the reset. We don't want to lose other * ASs' jobs when they would normally complete fine, so we must * 'poke' the MMU regularly to help other ASs complete */ kbase_as_poking_timer_retain_atom(kbdev, katom->kctx, katom); } } if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { if (action == JSn_COMMAND_SOFT_STOP) action = (target_katom->atom_flags & KBASE_KATOM_FLAGS_JOBCHAIN) ? JSn_COMMAND_SOFT_STOP_1: JSn_COMMAND_SOFT_STOP_0; else action = (target_katom->atom_flags & KBASE_KATOM_FLAGS_JOBCHAIN) ? JSn_COMMAND_HARD_STOP_1: JSn_COMMAND_HARD_STOP_0; } kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_COMMAND), action, kctx); #if KBASE_TRACE_ENABLE status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_STATUS), NULL); if (status_reg_after == BASE_JD_EVENT_ACTIVE) { kbase_jm_slot *slot; kbase_jd_atom *head; kbase_context *head_kctx; slot = &kbdev->jm_slots[js]; head = kbasep_jm_peek_idx_submit_slot(slot, slot->submitted_nr - 1); head_kctx = head->kctx; /* We don't need to check kbasep_jm_is_dummy_workaround_job( head ) here: * - Members are not indirected through * - The members will all be zero anyway */ if (status_reg_before == BASE_JD_EVENT_ACTIVE) KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, head, job_in_head_before, js); else KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); switch(action) { case JSn_COMMAND_SOFT_STOP: KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx, head, head->jc, js); break; case JSn_COMMAND_SOFT_STOP_0: KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, head, head->jc, js); break; case JSn_COMMAND_SOFT_STOP_1: KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, head, head->jc, js); break; case JSn_COMMAND_HARD_STOP: KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx, head, head->jc, js); break; case JSn_COMMAND_HARD_STOP_0: KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, head, head->jc, js); break; case JSn_COMMAND_HARD_STOP_1: KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js); break; default: BUG(); break; } } else { if (status_reg_before == BASE_JD_EVENT_ACTIVE) KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, job_in_head_before, js); else KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); switch(action) { case JSn_COMMAND_SOFT_STOP: KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, js); break; case JSn_COMMAND_SOFT_STOP_0: KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, 0, js); break; case JSn_COMMAND_SOFT_STOP_1: KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, 0, js); break; case JSn_COMMAND_HARD_STOP: KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, js); break; case JSn_COMMAND_HARD_STOP_0: KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, 0, js); break; case JSn_COMMAND_HARD_STOP_1: KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js); break; default: BUG(); break; } } #endif } /* Helper macros used by kbasep_job_slot_soft_or_hard_stop */ #define JM_SLOT_MAX_JOB_SUBMIT_REGS 2 #define JM_JOB_IS_CURRENT_JOB_INDEX(n) (1 == n) /* Index of the last job to process */ #define JM_JOB_IS_NEXT_JOB_INDEX(n) (2 == n) /* Index of the prior to last job to process */ /** Soft or hard-stop a slot * * This function safely ensures that the correct job is either hard or soft-stopped. * It deals with evicting jobs from the next registers where appropriate. * * This does not attempt to stop or evict jobs that are 'dummy' jobs for HW workarounds. * * @param kbdev The kbase device * @param kctx The context to soft/hard-stop job(s) from (or NULL is all jobs should be targeted) * @param js The slot that the job(s) are on * @param target_katom The atom that should be targeted (or NULL if all jobs from the context should be targeted) * @param action The action to perform, either JSn_COMMAND_HARD_STOP or JSn_COMMAND_SOFT_STOP */ static void kbasep_job_slot_soft_or_hard_stop(kbase_device *kbdev, kbase_context *kctx, int js, kbase_jd_atom *target_katom, u32 action) { kbase_jd_atom *katom; u8 i; u8 jobs_submitted; kbase_jm_slot *slot; u16 core_reqs; kbasep_js_device_data *js_devdata; mali_bool can_safely_stop = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION); KBASE_DEBUG_ASSERT(action == JSn_COMMAND_HARD_STOP || action == JSn_COMMAND_SOFT_STOP); KBASE_DEBUG_ASSERT(kbdev); js_devdata = &kbdev->js_data; slot = &kbdev->jm_slots[js]; KBASE_DEBUG_ASSERT(slot); lockdep_assert_held(&js_devdata->runpool_irq.lock); jobs_submitted = kbasep_jm_nr_jobs_submitted(slot); KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, 1); KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SLOT_SOFT_OR_HARD_STOP, kctx, NULL, 0u, js, jobs_submitted); if (jobs_submitted > JM_SLOT_MAX_JOB_SUBMIT_REGS) i = jobs_submitted - JM_SLOT_MAX_JOB_SUBMIT_REGS; else i = 0; /* Loop through all jobs that have been submitted to the slot and haven't completed */ for (; i < jobs_submitted; i++) { katom = kbasep_jm_peek_idx_submit_slot(slot, i); if (kctx && katom->kctx != kctx) continue; if (target_katom && katom != target_katom) continue; if (kbasep_jm_is_dummy_workaround_job(kbdev, katom)) continue; core_reqs = katom->core_req; if (JM_JOB_IS_CURRENT_JOB_INDEX(jobs_submitted - i)) { /* The last job in the slot, check if there is a job in the next register */ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), NULL) == 0) kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, action, core_reqs, katom); else { /* The job is in the next registers */ beenthere(kctx, "clearing job from next registers on slot %d", js); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), JSn_COMMAND_NOP, NULL); /* Check to see if we did remove a job from the next registers */ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_LO), NULL) != 0 || kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_HI), NULL) != 0) { /* The job was successfully cleared from the next registers, requeue it */ kbase_jd_atom *dequeued_katom = kbasep_jm_dequeue_tail_submit_slot(slot); KBASE_DEBUG_ASSERT(dequeued_katom == katom); jobs_submitted--; /* Set the next registers to NULL */ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_LO), 0, NULL); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_HI), 0, NULL); /* As the job is removed from the next registers we undo the associated * update to the job_chain_flag for the job slot. */ if (can_safely_stop) slot->job_chain_flag = !slot->job_chain_flag; KBASE_TRACE_ADD_SLOT(kbdev, JM_SLOT_EVICT, dequeued_katom->kctx, dequeued_katom, dequeued_katom->jc, js); /* Complete the job, indicate it took no time, but don't submit any more at this point */ kbase_jd_done(dequeued_katom, js, NULL, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT); } else { /* The job transitioned into the current registers before we managed to evict it, * in this case we fall back to soft/hard-stopping the job */ beenthere(kctx, "missed job in next register, soft/hard-stopping slot %d", js); kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, action, core_reqs, katom); } } } else if (JM_JOB_IS_NEXT_JOB_INDEX(jobs_submitted - i)) { /* There's a job after this one, check to see if that job is in the next registers. * If so, we need to pay attention to not accidently stop that one when issueing * the command to stop the one pointed to by the head registers (as the one in the head * may finish in the mean time and the one in the next moves to the head). Either the hardware * has support for this using job chain disambiguation or we need to evict the job * from the next registers first to ensure we can safely stop the one pointed to by * the head registers. */ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), NULL) != 0) { kbase_jd_atom *check_next_atom; /* It is - we should remove that job and soft/hard-stop the slot */ /* Only proceed when the next job isn't a HW workaround 'dummy' job * * This can't be an ASSERT due to MMU fault code: * - This first hard-stops the job that caused the fault * - Under HW Issue 8245, it will then reset the GPU * - This causes a Soft-stop to occur on all slots * - By the time of the soft-stop, we may (depending on timing) still have: * - The original job in HEAD, if it's not finished the hard-stop * - The dummy workaround job in NEXT * * Other cases could be coded in future that cause back-to-back Soft/Hard * stops with dummy workaround jobs in place, e.g. MMU handler code and Job * Scheduler watchdog timer running in parallel. * * Note, the index i+1 is valid to peek from: i == jobs_submitted-2, therefore * i+1 == jobs_submitted-1 */ check_next_atom = kbasep_jm_peek_idx_submit_slot(slot, i + 1); if (kbasep_jm_is_dummy_workaround_job(kbdev, check_next_atom) != MALI_FALSE) continue; if (!can_safely_stop) { beenthere(kctx, "clearing job from next registers on slot %d", js); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), JSn_COMMAND_NOP, NULL); /* Check to see if we did remove a job from the next registers */ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_LO), NULL) != 0 || kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_HI), NULL) != 0) { /* We did remove a job from the next registers, requeue it */ kbase_jd_atom *dequeued_katom = kbasep_jm_dequeue_tail_submit_slot(slot); KBASE_DEBUG_ASSERT(dequeued_katom != NULL); jobs_submitted--; /* Set the next registers to NULL */ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_LO), 0, NULL); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_HI), 0, NULL); KBASE_TRACE_ADD_SLOT(kbdev, JM_SLOT_EVICT, dequeued_katom->kctx, dequeued_katom, dequeued_katom->jc, js); /* Complete the job, indicate it took no time, but don't submit any more at this point */ kbase_jd_done(dequeued_katom, js, NULL, KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT); } else { /* We missed the job, that means the job we're interested in left the hardware before * we managed to do anything, so we can proceed to the next job */ continue; } } /* Next is now free, so we can soft/hard-stop the slot */ beenthere(kctx, "soft/hard-stopped slot %d (there was a job in next which was successfully cleared)\n", js); kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, action, core_reqs, katom); } /* If there was no job in the next registers, then the job we were * interested in has finished, so we need not take any action */ } } KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, 0); } void kbase_job_kill_jobs_from_context(kbase_context *kctx) { unsigned long flags; kbase_device *kbdev; kbasep_js_device_data *js_devdata; int i; KBASE_DEBUG_ASSERT(kctx != NULL); kbdev = kctx->kbdev; KBASE_DEBUG_ASSERT(kbdev != NULL); js_devdata = &kbdev->js_data; /* Cancel any remaining running jobs for this kctx */ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); /* Invalidate all jobs in context, to prevent re-submitting */ for (i = 0; i < BASE_JD_ATOM_COUNT; i++) kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_CANCELLED; for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) kbase_job_slot_hardstop(kctx, i, NULL); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); } void kbase_job_zap_context(kbase_context *kctx) { kbase_device *kbdev; kbasep_js_device_data *js_devdata; kbasep_js_kctx_info *js_kctx_info; int i; mali_bool evict_success; KBASE_DEBUG_ASSERT(kctx != NULL); kbdev = kctx->kbdev; KBASE_DEBUG_ASSERT(kbdev != NULL); js_devdata = &kbdev->js_data; js_kctx_info = &kctx->jctx.sched_info; /* * Critical assumption: No more submission is possible outside of the * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs) * whilst the kbase_context is terminating. */ /* First, atomically do the following: * - mark the context as dying * - try to evict it from the policy queue */ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); js_kctx_info->ctx.is_dying = MALI_TRUE; KBASE_LOG(1, kbdev->dev, "Zap: Try Evict Ctx %p", kctx); mutex_lock(&js_devdata->queue_mutex); evict_success = kbasep_js_policy_try_evict_ctx(&js_devdata->policy, kctx); mutex_unlock(&js_devdata->queue_mutex); /* * At this point we know: * - If eviction succeeded, it was in the policy queue, but now no longer is * - We must cancel the jobs here. No Power Manager active reference to * release. * - This happens asynchronously - kbase_jd_zap_context() will wait for * those jobs to be killed. * - If eviction failed, then it wasn't in the policy queue. It is one of * the following: * - a. it didn't have any jobs, and so is not in the Policy Queue or the * Run Pool (not scheduled) * - Hence, no more work required to cancel jobs. No Power Manager active * reference to release. * - b. it was in the middle of a scheduling transaction (and thus must * have at least 1 job). This can happen from a syscall or a kernel thread. * We still hold the jsctx_mutex, and so the thread must be waiting inside * kbasep_js_try_schedule_head_ctx(), before checking whether the runpool * is full. That thread will continue after we drop the mutex, and will * notice the context is dying. It will rollback the transaction, killing * all jobs at the same time. kbase_jd_zap_context() will wait for those * jobs to be killed. * - Hence, no more work required to cancel jobs, or to release the Power * Manager active reference. * - c. it is scheduled, and may or may not be running jobs * - We must cause it to leave the runpool by stopping it from submitting * any more jobs. When it finally does leave, * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs * (because it is dying), release the Power Manager active reference, and * will not requeue the context in the policy queue. kbase_jd_zap_context() * will wait for those jobs to be killed. * - Hence, work required just to make it leave the runpool. Cancelling * jobs and releasing the Power manager active reference will be handled * when it leaves the runpool. */ if (evict_success != MALI_FALSE || js_kctx_info->ctx.is_scheduled == MALI_FALSE) { /* The following events require us to kill off remaining jobs and * update PM book-keeping: * - we evicted it correctly (it must have jobs to be in the Policy Queue) * * These events need no action, but take this path anyway: * - Case a: it didn't have any jobs, and was never in the Queue * - Case b: scheduling transaction will be partially rolled-back (this * already cancels the jobs) */ KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, js_kctx_info->ctx.is_scheduled); KBASE_LOG(2, kbdev->dev, "Zap: Ctx %p evict_success=%d, scheduled=%d", kctx, evict_success, js_kctx_info->ctx.is_scheduled); if (evict_success != MALI_FALSE) { /* Only cancel jobs when we evicted from the policy queue. No Power * Manager active reference was held. * * Having is_dying set ensures that this kills, and doesn't requeue */ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, MALI_FALSE); } mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); } else { unsigned long flags; mali_bool was_retained; /* Case c: didn't evict, but it is scheduled - it's in the Run Pool */ KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, js_kctx_info->ctx.is_scheduled); KBASE_LOG(2, kbdev->dev, "Zap: Ctx %p is in RunPool", kctx); /* Disable the ctx from submitting any more jobs */ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); kbasep_js_clear_submit_allowed(js_devdata, kctx); /* Retain and (later) release the context whilst it is is now disallowed from submitting * jobs - ensures that someone somewhere will be removing the context later on */ was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); /* Since it's scheduled and we have the jsctx_mutex, it must be retained successfully */ KBASE_DEBUG_ASSERT(was_retained != MALI_FALSE); KBASE_LOG(2, kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx); /* Cancel any remaining running jobs for this kctx - if any. Submit is disallowed * which takes effect immediately, so no more new jobs will appear after we do this. */ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) kbase_job_slot_hardstop(kctx, i, NULL); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); KBASE_LOG(2, kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", kctx); kbasep_js_runpool_release_ctx(kbdev, kctx); } KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u); /* After this, you must wait on both the kbase_jd_context::zero_jobs_wait * and the kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the * jobs to be destroyed, and the context to be de-scheduled (if it was on * the runpool). * * kbase_jd_zap_context() will do this. */ } KBASE_EXPORT_TEST_API(kbase_job_zap_context) mali_error kbase_job_slot_init(kbase_device *kbdev) { int i; KBASE_DEBUG_ASSERT(kbdev); for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) kbasep_jm_init_submit_slot(&kbdev->jm_slots[i]); return MALI_ERROR_NONE; } KBASE_EXPORT_TEST_API(kbase_job_slot_init) void kbase_job_slot_halt(kbase_device *kbdev) { CSTD_UNUSED(kbdev); } void kbase_job_slot_term(kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev); } KBASE_EXPORT_TEST_API(kbase_job_slot_term) /** * Soft-stop the specified job slot * * The job slot lock must be held when calling this function. * The job slot must not already be in the process of being soft-stopped. * * Where possible any job in the next register is evicted before the soft-stop. * * @param kbdev The kbase device * @param js The job slot to soft-stop * @param target_katom The job that should be soft-stopped (or NULL for any job) */ void kbase_job_slot_softstop(kbase_device *kbdev, int js, kbase_jd_atom *target_katom) { kbasep_job_slot_soft_or_hard_stop(kbdev, NULL, js, target_katom, JSn_COMMAND_SOFT_STOP); } /** * Hard-stop the specified job slot * * The job slot lock must be held when calling this function. * * @param kctx The kbase context that contains the job(s) that should * be hard-stopped * @param js The job slot to hard-stop * @param target_katom The job that should be hard-stopped (or NULL for all * jobs from the context) */ void kbase_job_slot_hardstop(kbase_context *kctx, int js, kbase_jd_atom *target_katom) { kbase_device *kbdev = kctx->kbdev; kbasep_job_slot_soft_or_hard_stop(kbdev, kctx, js, target_katom, JSn_COMMAND_HARD_STOP); if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) || kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) || (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_T76X_3542) && (target_katom == NULL || target_katom->core_req & BASE_JD_REQ_FS_AFBC))) { /* MIDBASE-2916 if a fragment job with AFBC encoding is * hardstopped, ensure to do a soft reset also in order to * clear the GPU status. * Workaround for HW issue 8401 has an issue,so after * hard-stopping just reset the GPU. This will ensure that the * jobs leave the GPU.*/ if (kbase_prepare_to_reset_gpu_locked(kbdev)) { dev_err(kbdev->dev, "Issueing GPU\ soft-reset after hard stopping due to hardware issue"); kbase_reset_gpu_locked(kbdev); } } } void kbase_debug_dump_registers(kbase_device *kbdev) { int i; dev_err(kbdev->dev, "Register state:"); dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL), kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL)); dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x JOB_IRQ_THROTTLE=0x%08x", kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL), kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL), kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), NULL)); for (i = 0; i < 3; i++) { dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x", i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JSn_STATUS), NULL), i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JSn_HEAD_LO), NULL)); } dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL), kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL)); dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL), kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL), kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL)); dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL), kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL)); dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x", kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL), kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL)); } void kbasep_reset_timeout_worker(struct work_struct *data) { unsigned long flags; kbase_device *kbdev; int i; ktime_t end_timestamp = ktime_get(); kbasep_js_device_data *js_devdata; kbase_uk_hwcnt_setup hwcnt_setup = { {0} }; kbase_instr_state bckp_state; KBASE_DEBUG_ASSERT(data); kbdev = container_of(data, kbase_device, reset_work); KBASE_DEBUG_ASSERT(kbdev); js_devdata = &kbdev->js_data; KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); /* Make sure the timer has completed - this cannot be done from interrupt context, * so this cannot be done within kbasep_try_reset_gpu_early. */ hrtimer_cancel(&kbdev->reset_timer); if (kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { /* This would re-activate the GPU. Since it's already idle, there's no * need to reset it */ atomic_set(&kbdev->reset_gpu, KBASE_RESET_GPU_NOT_PENDING); wake_up(&kbdev->reset_wait); return; } mutex_lock(&kbdev->pm.lock); /* We hold the pm lock, so there ought to be a current policy */ KBASE_DEBUG_ASSERT(kbdev->pm.pm_current_policy); /* All slot have been soft-stopped and we've waited SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point * we assume that anything that is still left on the GPU is stuck there and we'll kill it when we reset the GPU */ dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", RESET_TIMEOUT); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if (kbdev->hwcnt.state == KBASE_INSTR_STATE_RESETTING) { /*the same interrupt handler preempted itself */ /* GPU is being reset */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); wait_event(kbdev->hwcnt.wait, kbdev->hwcnt.triggered != 0); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); } /* Save the HW counters setup */ if (kbdev->hwcnt.kctx != NULL) { kbase_context *kctx = kbdev->hwcnt.kctx; hwcnt_setup.dump_buffer = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kctx) & 0xffffffff; hwcnt_setup.dump_buffer |= (mali_addr64) kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), kctx) << 32; hwcnt_setup.jm_bm = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), kctx); hwcnt_setup.shader_bm = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), kctx); hwcnt_setup.tiler_bm = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), kctx); hwcnt_setup.l3_cache_bm = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_L3_CACHE_EN), kctx); hwcnt_setup.mmu_l2_bm = kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), kctx); } /* Output the state of some interesting registers to help in the * debugging of GPU resets */ kbase_debug_dump_registers(kbdev); bckp_state = kbdev->hwcnt.state; kbdev->hwcnt.state = KBASE_INSTR_STATE_RESETTING; kbdev->hwcnt.triggered = 0; /* Disable IRQ to avoid IRQ handlers to kick in after releaseing the spinlock; * this also clears any outstanding interrupts */ kbase_pm_disable_interrupts(kbdev); /* Ensure that any IRQ handlers have finished */ kbase_synchronize_irqs(kbdev); spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); /* Reset the GPU */ kbase_pm_init_hw(kbdev, MALI_TRUE); /* IRQs were re-enabled by kbase_pm_init_hw, and GPU is still powered */ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); /* Restore the HW counters setup */ if (kbdev->hwcnt.kctx != NULL) { kbase_context *kctx = kbdev->hwcnt.kctx; kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | PRFCNT_CONFIG_MODE_OFF, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), hwcnt_setup.dump_buffer & 0xFFFFFFFF, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), hwcnt_setup.dump_buffer >> 32, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), hwcnt_setup.jm_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), hwcnt_setup.shader_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_L3_CACHE_EN), hwcnt_setup.l3_cache_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), hwcnt_setup.mmu_l2_bm, kctx); /* Due to PRLAM-8186 we need to disable the Tiler before we enable the HW counter dump. */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0, kctx); else kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), hwcnt_setup.tiler_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | PRFCNT_CONFIG_MODE_MANUAL, kctx); /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), hwcnt_setup.tiler_bm, kctx); } kbdev->hwcnt.state = bckp_state; switch(kbdev->hwcnt.state) { /* Cases for waking kbasep_cache_clean_worker worker */ case KBASE_INSTR_STATE_CLEANED: /* Cache-clean IRQ occurred, but we reset: * Wakeup incase the waiter saw RESETTING */ case KBASE_INSTR_STATE_REQUEST_CLEAN: /* After a clean was requested, but before the regs were written: * Wakeup incase the waiter saw RESETTING */ wake_up(&kbdev->hwcnt.cache_clean_wait); break; case KBASE_INSTR_STATE_CLEANING: /* Either: * 1) We've not got the Cache-clean IRQ yet: it was lost, or: * 2) We got it whilst resetting: it was voluntarily lost * * So, move to the next state and wakeup: */ kbdev->hwcnt.state = KBASE_INSTR_STATE_CLEANED; wake_up(&kbdev->hwcnt.cache_clean_wait); break; /* Cases for waking anyone else */ case KBASE_INSTR_STATE_DUMPING: /* If dumping, abort the dump, because we may've lost the IRQ */ kbdev->hwcnt.state = KBASE_INSTR_STATE_IDLE; kbdev->hwcnt.triggered = 1; wake_up(&kbdev->hwcnt.wait); break; case KBASE_INSTR_STATE_DISABLED: case KBASE_INSTR_STATE_IDLE: case KBASE_INSTR_STATE_FAULT: /* Every other reason: wakeup in that state */ kbdev->hwcnt.triggered = 1; wake_up(&kbdev->hwcnt.wait); break; /* Unhandled cases */ case KBASE_INSTR_STATE_RESETTING: default: BUG(); break; } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); /* Complete any jobs that were still on the GPU */ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) { int nr_done; kbase_jm_slot *slot = &kbdev->jm_slots[i]; nr_done = kbasep_jm_nr_jobs_submitted(slot); while (nr_done) { dev_err(kbdev->dev, "Job stuck in slot %d on the GPU was cancelled", i); kbase_job_done_slot(kbdev, i, BASE_JD_EVENT_JOB_CANCELLED, 0, &end_timestamp); nr_done--; } } spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_lock(&js_devdata->runpool_mutex); /* Reprogram the GPU's MMU */ for (i = 0; i < BASE_MAX_NR_AS; i++) { if (js_devdata->runpool_irq.per_as_data[i].kctx) { kbase_as *as = &kbdev->as[i]; mutex_lock(&as->transaction_mutex); kbase_mmu_update(js_devdata->runpool_irq.per_as_data[i].kctx); mutex_unlock(&as->transaction_mutex); } } atomic_set(&kbdev->reset_gpu, KBASE_RESET_GPU_NOT_PENDING); wake_up(&kbdev->reset_wait); dev_err(kbdev->dev, "Reset complete"); /* Find out what cores are required now */ kbase_pm_update_cores_state(kbdev); /* Synchronously request and wait for those cores, because if * instrumentation is enabled it would need them immediately. */ kbase_pm_check_transitions_sync(kbdev); /* Try submitting some jobs to restart processing */ if (js_devdata->nr_user_contexts_running > 0) { KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0); spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); kbasep_js_try_run_next_job_nolock(kbdev); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); } mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&kbdev->pm.lock); kbase_pm_context_idle(kbdev); KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); } enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) { kbase_device *kbdev = container_of(timer, kbase_device, reset_timer); KBASE_DEBUG_ASSERT(kbdev); /* Reset still pending? */ if (atomic_cmpxchg(&kbdev->reset_gpu, KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == KBASE_RESET_GPU_COMMITTED) queue_work(kbdev->reset_workq, &kbdev->reset_work); return HRTIMER_NORESTART; } /* * If all jobs are evicted from the GPU then we can reset the GPU * immediately instead of waiting for the timeout to elapse */ static void kbasep_try_reset_gpu_early_locked(kbase_device *kbdev) { int i; int pending_jobs = 0; KBASE_DEBUG_ASSERT(kbdev); /* Count the number of jobs */ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) { kbase_jm_slot *slot = &kbdev->jm_slots[i]; pending_jobs += kbasep_jm_nr_jobs_submitted(slot); } if (pending_jobs > 0) { /* There are still jobs on the GPU - wait */ return; } /* Check that the reset has been committed to (i.e. kbase_reset_gpu has been called), and that no other * thread beat this thread to starting the reset */ if (atomic_cmpxchg(&kbdev->reset_gpu, KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != KBASE_RESET_GPU_COMMITTED) { /* Reset has already occurred */ return; } queue_work(kbdev->reset_workq, &kbdev->reset_work); } static void kbasep_try_reset_gpu_early(kbase_device *kbdev) { unsigned long flags; kbasep_js_device_data *js_devdata; js_devdata = &kbdev->js_data; spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); kbasep_try_reset_gpu_early_locked(kbdev); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); } /* * Prepare for resetting the GPU. * This function just soft-stops all the slots to ensure that as many jobs as possible are saved. * * The function returns a boolean which should be interpreted as follows: * - MALI_TRUE - Prepared for reset, kbase_reset_gpu should be called. * - MALI_FALSE - Another thread is performing a reset, kbase_reset_gpu should not be called. * * @return See description */ mali_bool kbase_prepare_to_reset_gpu_locked(kbase_device *kbdev) { int i; KBASE_DEBUG_ASSERT(kbdev); if (atomic_cmpxchg(&kbdev->reset_gpu, KBASE_RESET_GPU_NOT_PENDING, KBASE_RESET_GPU_PREPARED) != KBASE_RESET_GPU_NOT_PENDING) { /* Some other thread is already resetting the GPU */ return MALI_FALSE; } for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) kbase_job_slot_softstop(kbdev, i, NULL); return MALI_TRUE; } mali_bool kbase_prepare_to_reset_gpu(kbase_device *kbdev) { unsigned long flags; mali_bool ret; kbasep_js_device_data *js_devdata; js_devdata = &kbdev->js_data; spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); ret = kbase_prepare_to_reset_gpu_locked(kbdev); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); return ret; } KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu) /* * This function should be called after kbase_prepare_to_reset_gpu iff it returns MALI_TRUE. * It should never be called without a corresponding call to kbase_prepare_to_reset_gpu. * * After this function is called (or not called if kbase_prepare_to_reset_gpu returned MALI_FALSE), * the caller should wait for kbdev->reset_waitq to be signalled to know when the reset has completed. */ void kbase_reset_gpu(kbase_device *kbdev) { u32 timeout_ms; KBASE_DEBUG_ASSERT(kbdev); /* Note this is an assert/atomic_set because it is a software issue for a race to be occuring here */ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->reset_gpu) == KBASE_RESET_GPU_PREPARED); atomic_set(&kbdev->reset_gpu, KBASE_RESET_GPU_COMMITTED); timeout_ms = kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS); dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", timeout_ms); hrtimer_start(&kbdev->reset_timer, HR_TIMER_DELAY_MSEC(timeout_ms), HRTIMER_MODE_REL); /* Try resetting early */ kbasep_try_reset_gpu_early(kbdev); } KBASE_EXPORT_TEST_API(kbase_reset_gpu) void kbase_reset_gpu_locked(kbase_device *kbdev) { u32 timeout_ms; KBASE_DEBUG_ASSERT(kbdev); /* Note this is an assert/atomic_set because it is a software issue for a race to be occuring here */ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->reset_gpu) == KBASE_RESET_GPU_PREPARED); atomic_set(&kbdev->reset_gpu, KBASE_RESET_GPU_COMMITTED); timeout_ms = kbasep_get_config_value(kbdev, kbdev->config_attributes, KBASE_CONFIG_ATTR_JS_RESET_TIMEOUT_MS); dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", timeout_ms); hrtimer_start(&kbdev->reset_timer, HR_TIMER_DELAY_MSEC(timeout_ms), HRTIMER_MODE_REL); /* Try resetting early */ kbasep_try_reset_gpu_early_locked(kbdev); }