diff options
Diffstat (limited to 'drivers/gpu/arm/t6xx/kbase/src/common/mali_kbase_8401_workaround.c')
-rwxr-xr-x | drivers/gpu/arm/t6xx/kbase/src/common/mali_kbase_8401_workaround.c | 422 |
1 files changed, 422 insertions, 0 deletions
diff --git a/drivers/gpu/arm/t6xx/kbase/src/common/mali_kbase_8401_workaround.c b/drivers/gpu/arm/t6xx/kbase/src/common/mali_kbase_8401_workaround.c new file mode 100755 index 00000000000..bf42e41668b --- /dev/null +++ b/drivers/gpu/arm/t6xx/kbase/src/common/mali_kbase_8401_workaround.c @@ -0,0 +1,422 @@ +/* + * + * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_8401_workaround.c + * Functions related to working around BASE_HW_ISSUE_8401 + */ + +#include <kbase/src/common/mali_kbase.h> +#include <kbase/src/common/mali_kbase_defs.h> +#include <kbase/src/common/mali_kbase_jm.h> +#include <kbase/src/common/mali_kbase_8401_workaround.h> + +#define WORKAROUND_PAGE_OFFSET (2) +#define URT_POINTER_INDEX (20) +#define RMU_POINTER_INDEX (23) +#define RSD_POINTER_INDEX (24) +#define TSD_POINTER_INDEX (31) + +static const u32 compute_job_32bit_header[] = { + /* Job Descriptor Header */ + + /* Job Status */ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + /* Flags and Indices */ + /* job_type = compute shader job */ + 0x00000008, 0x00000000, + /* Pointer to next job */ + 0x00000000, + /* Reserved */ + 0x00000000, + /* Job Dimension Data */ + 0x0000000f, 0x21040842, + /* Task Split */ + 0x08000000, + /* Reserved */ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + + /* Draw Call Descriptor - 32 bit (Must be aligned to a 64-byte boundry) */ + + /* Flags */ + 0x00000004, + /* Primary Attribute Offset */ + 0x00000000, + /* Primitive Index Base Value */ + 0x00000000, + + /* Pointer To Vertex Position Array (64-byte alignment) */ + 0x00000000, + /* Pointer To Uniform Remapping Table (8-byte alignment) */ + 0, + /* Pointer To Image Descriptor Pointer Table */ + 0x00000000, + /* Pointer To Sampler Array */ + 0x00000000, + /* Pointer To Register-Mapped Uniform Data Area (16-byte alignment) */ + 0, + /* Pointer To Renderer State Descriptor (64-byte alignment) */ + 0, + /* Pointer To Primary Attribute Buffer Array */ + 0x00000000, + /* Pointer To Primary Attribute Array */ + 0x00000000, + /* Pointer To Secondary Attribute Buffer Array */ + 0x00000000, + /* Pointer To Secondary Attribute Array */ + 0x00000000, + /* Pointer To Viewport Descriptor */ + 0x00000000, + /* Pointer To Occlusion Query Result */ + 0x00000000, + /* Pointer To Thread Storage (64 byte alignment) */ + 0, +}; + +static const u32 compute_job_32bit_urt[] = { + /* Uniform Remapping Table Entry */ + 0, 0, +}; + +static const u32 compute_job_32bit_rmu[] = { + /* Register Mapped Uniform Data Area (16 byte aligned), an array of 128-bit + * register values. + * + * NOTE: this is also used as the URT pointer, so the first 16-byte entry + * must be all zeros. + * + * For BASE_HW_ISSUE_8987, we place 16 RMUs here, because this should only + * be run concurrently with other GLES jobs (i.e. FS jobs from slot 0). + */ + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 +}; + +static const u32 compute_job_32bit_rsd[] = { + /* Renderer State Descriptor */ + + /* Shader program inital PC (low) */ + 0x00000001, + /* Shader program initial PC (high) */ + 0x00000000, + /* Image descriptor array sizes */ + 0x00000000, + /* Attribute array sizes */ + 0x00000000, + /* Uniform array size and Shader Flags */ + /* Flags set: R, D, SE, Reg Uniforms==16, FPM==OpenCL */ + 0x42003800, + /* Depth bias */ + 0x00000000, + /* Depth slope bias */ + 0x00000000, + /* Depth bias clamp */ + 0x00000000, + /* Multisample Write Mask and Flags */ + 0x00000000, + /* Stencil Write Masks and Alpha parameters */ + 0x00000000, + /* Stencil tests - forward facing */ + 0x00000000, + /* Stencel tests - back facing */ + 0x00000000, + /* Alpha Test Reference Value */ + 0x00000000, + /* Thread Balancing Information */ + 0x00000000, + /* Blend Parameters or Pointer (low) */ + 0x00000000, + /* Blend Parameters or Pointer (high) */ + 0x00000000, +}; + +static const u32 compute_job_32bit_tsd[] = { + /* Thread Storage Descriptor */ + + /* Thread Local Storage Sizes */ + 0x00000000, + /* Workgroup Local Memory Area Flags */ + 0x0000001f, + /* Pointer to Local Storage Area */ + 0x00021000, 0x00000001, + /* Pointer to Workgroup Local Storage Area */ + 0x00000000, 0x00000000, + /* Pointer to Shader Exception Handler */ + 0x00000000, 0x00000000 +}; + +static kbase_jd_atom dummy_job_atom[KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT]; + +/** + * Initialize the compute job sturcture. + */ + +static void kbasep_8401_workaround_update_job_pointers(u32 *dummy_compute_job, int page_nr) +{ + u32 base_address = (page_nr + WORKAROUND_PAGE_OFFSET) * PAGE_SIZE; + u8 *dummy_job = (u8 *) dummy_compute_job; + u8 *dummy_job_urt; + u8 *dummy_job_rmu; + u8 *dummy_job_rsd; + u8 *dummy_job_tsd; + + KBASE_DEBUG_ASSERT(dummy_compute_job); + + /* determin where each job section goes taking alignment restrictions into consideration */ + dummy_job_urt = (u8 *) ((((uintptr_t) dummy_job + sizeof(compute_job_32bit_header)) + 7) & ~7); + dummy_job_rmu = (u8 *) ((((uintptr_t) dummy_job_urt + sizeof(compute_job_32bit_urt)) + 15) & ~15); + dummy_job_rsd = (u8 *) ((((uintptr_t) dummy_job_rmu + sizeof(compute_job_32bit_rmu)) + 63) & ~63); + dummy_job_tsd = (u8 *) ((((uintptr_t) dummy_job_rsd + sizeof(compute_job_32bit_rsd)) + 63) & ~63); + + /* Make sure the job fits within a single page */ + KBASE_DEBUG_ASSERT(PAGE_SIZE > ((dummy_job_tsd + sizeof(compute_job_32bit_tsd)) - dummy_job)); + + /* Copy the job sections to the allocated memory */ + memcpy(dummy_job, compute_job_32bit_header, sizeof(compute_job_32bit_header)); + memcpy(dummy_job_urt, compute_job_32bit_urt, sizeof(compute_job_32bit_urt)); + memcpy(dummy_job_rmu, compute_job_32bit_rmu, sizeof(compute_job_32bit_rmu)); + memcpy(dummy_job_rsd, compute_job_32bit_rsd, sizeof(compute_job_32bit_rsd)); + memcpy(dummy_job_tsd, compute_job_32bit_tsd, sizeof(compute_job_32bit_tsd)); + + /* Update header pointers */ + *(dummy_compute_job + URT_POINTER_INDEX) = (dummy_job_urt - dummy_job) + base_address; + *(dummy_compute_job + RMU_POINTER_INDEX) = (dummy_job_rmu - dummy_job) + base_address; + *(dummy_compute_job + RSD_POINTER_INDEX) = (dummy_job_rsd - dummy_job) + base_address; + *(dummy_compute_job + TSD_POINTER_INDEX) = (dummy_job_tsd - dummy_job) + base_address; + /* Update URT pointer */ + *((u32 *) dummy_job_urt + 0) = (((dummy_job_rmu - dummy_job) + base_address) << 8) & 0xffffff00; + *((u32 *) dummy_job_urt + 1) = (((dummy_job_rmu - dummy_job) + base_address) >> 24) & 0xff; +} + +/** + * Initialize the memory for 8401 workaround. + */ + +mali_error kbasep_8401_workaround_init(kbase_device * const kbdev) +{ + kbasep_js_device_data *js_devdata; + kbase_context *workaround_kctx; + int i; + u16 as_present_mask; + + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(kbdev->workaround_kctx == NULL); + + js_devdata = &kbdev->js_data; + + /* For this workaround we reserve one address space to allow us to + * submit a special job independent of other contexts */ + --(kbdev->nr_hw_address_spaces); + + /* Only update nr_user_address_spaces if it was unchanged - to ensure + * HW workarounds that have modified this will still work */ + if (kbdev->nr_user_address_spaces == (kbdev->nr_hw_address_spaces + 1)) + --(kbdev->nr_user_address_spaces); + + KBASE_DEBUG_ASSERT(kbdev->nr_user_address_spaces <= kbdev->nr_hw_address_spaces); + + /* Recalculate the free address spaces bit-pattern */ + as_present_mask = (1U << kbdev->nr_hw_address_spaces) - 1; + js_devdata->as_free &= as_present_mask; + + workaround_kctx = kbase_create_context(kbdev); + if (!workaround_kctx) + return MALI_ERROR_FUNCTION_FAILED; + + /* Allocate the pages required to contain the job */ + if (MALI_ERROR_NONE != kbase_mem_allocator_alloc(&workaround_kctx->osalloc, KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT, kbdev->workaround_compute_job_pa, 0)) + goto no_pages; + + /* Get virtual address of mapped memory and write a compute job for each page */ + for (i = 0; i < KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT; i++) { + kbdev->workaround_compute_job_va[i] = kmap(pfn_to_page(PFN_DOWN(kbdev->workaround_compute_job_pa[i]))); + if (NULL == kbdev->workaround_compute_job_va[i]) + goto page_free; + + /* Generate the compute job data */ + kbasep_8401_workaround_update_job_pointers((u32 *) kbdev->workaround_compute_job_va[i], i); + } + + /* Insert pages to the gpu mmu. */ + kbase_gpu_vm_lock(workaround_kctx); + + kbase_mmu_insert_pages(workaround_kctx, + /* vpfn = page number */ + (u64) WORKAROUND_PAGE_OFFSET, + /* physical address */ + kbdev->workaround_compute_job_pa, + /* number of pages */ + KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT, + /* flags */ + KBASE_REG_GPU_RD | KBASE_REG_CPU_RD | KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); + + kbase_gpu_vm_unlock(workaround_kctx); + + kbdev->workaround_kctx = workaround_kctx; + return MALI_ERROR_NONE; + page_free: + while (i--) + kunmap(pfn_to_page(PFN_DOWN(kbdev->workaround_compute_job_pa[i]))); + + kbase_mem_allocator_free(&workaround_kctx->osalloc, KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT, kbdev->workaround_compute_job_pa, MALI_TRUE); + no_pages: + kbase_destroy_context(workaround_kctx); + + return MALI_ERROR_FUNCTION_FAILED; +} + +/** + * Free up the memory used by 8401 workaround. + **/ + +void kbasep_8401_workaround_term(kbase_device *kbdev) +{ + kbasep_js_device_data *js_devdata; + int i; + u16 restored_as; + + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(kbdev->workaround_kctx); + + js_devdata = &kbdev->js_data; + + for (i = 0; i < KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT; i++) + kunmap(pfn_to_page(PFN_DOWN(kbdev->workaround_compute_job_pa[i]))); + + kbase_mem_allocator_free(&kbdev->workaround_kctx->osalloc, KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT, kbdev->workaround_compute_job_pa, MALI_TRUE); + + kbase_destroy_context(kbdev->workaround_kctx); + kbdev->workaround_kctx = NULL; + + /* Free up the workaround address space */ + kbdev->nr_hw_address_spaces++; + + if (kbdev->nr_user_address_spaces == (kbdev->nr_hw_address_spaces - 1)) { + /* Only update nr_user_address_spaces if it was unchanged - to ensure + * HW workarounds that have modified this will still work */ + ++(kbdev->nr_user_address_spaces); + } + KBASE_DEBUG_ASSERT(kbdev->nr_user_address_spaces <= kbdev->nr_hw_address_spaces); + + /* Recalculate the free address spaces bit-pattern */ + restored_as = (1U << kbdev->nr_hw_address_spaces); + js_devdata->as_free |= restored_as; +} + +/** + * Submit the 8401 workaround job. + * + * Important for BASE_HW_ISSUE_8987: This job always uses 16 RMUs + * - Therefore, on slot[1] it will always use the same number of RMUs as another + * GLES job. + * - On slot[2], no other job (GLES or otherwise) will be running on the + * cores, by virtue of it being slot[2]. Therefore, any value of RMUs is + * acceptable. + */ +void kbasep_8401_submit_dummy_job(kbase_device *kbdev, int js) +{ + u32 cfg; + mali_addr64 jc; + u32 pgd_high; + + /* While this workaround is active we reserve the last address space just for submitting the dummy jobs */ + int as = kbdev->nr_hw_address_spaces; + + /* Don't issue compute jobs on job slot 0 */ + KBASE_DEBUG_ASSERT(js != 0); + KBASE_DEBUG_ASSERT(js < KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT); + + /* Job chain GPU address */ + jc = (js + WORKAROUND_PAGE_OFFSET) * PAGE_SIZE; /* GPU phys address (see kbase_mmu_insert_pages call in kbasep_8401_workaround_init */ + + /* Clear the job status words which may contain values from a previous job completion */ + memset(kbdev->workaround_compute_job_va[js], 0, 4 * sizeof(u32)); + + /* Get the affinity of the previous job */ + dummy_job_atom[js].affinity = ((u64) kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_LO), NULL)) | (((u64) kbase_reg_read(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_HI), NULL)) << 32); + + /* Don't submit a compute job if the affinity was previously zero (i.e. no jobs have run yet on this slot) */ + if (!dummy_job_atom[js].affinity) + return; + + /* Ensure that our page tables are programmed into the MMU */ + kbase_reg_write(kbdev, MMU_AS_REG(as, ASn_TRANSTAB_LO), (kbdev->workaround_kctx->pgd & ASn_TRANSTAB_ADDR_SPACE_MASK) | ASn_TRANSTAB_READ_INNER | ASn_TRANSTAB_ADRMODE_TABLE, NULL); + + /* Need to use a conditional expression to avoid "right shift count >= width of type" + * error when using an if statement - although the size_of condition is evaluated at compile + * time the unused branch is not removed until after it is type-checked and the error + * produced. + */ + pgd_high = sizeof(kbdev->workaround_kctx->pgd) > 4 ? (kbdev->workaround_kctx->pgd >> 32) : 0; + kbase_reg_write(kbdev, MMU_AS_REG(as, ASn_TRANSTAB_HI), pgd_high, NULL); + + kbase_reg_write(kbdev, MMU_AS_REG(as, ASn_MEMATTR_LO), ASn_MEMATTR_IMPL_DEF_CACHE_POLICY, NULL); + kbase_reg_write(kbdev, MMU_AS_REG(as, ASn_MEMATTR_HI), ASn_MEMATTR_IMPL_DEF_CACHE_POLICY, NULL); + kbase_reg_write(kbdev, MMU_AS_REG(as, ASn_COMMAND), ASn_COMMAND_UPDATE, NULL); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_LO), jc & 0xFFFFFFFF, NULL); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_HEAD_NEXT_HI), jc >> 32, NULL); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_NEXT_LO), dummy_job_atom[js].affinity & 0xFFFFFFFF, NULL); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_AFFINITY_NEXT_HI), dummy_job_atom[js].affinity >> 32, NULL); + + /* start MMU, medium priority, cache clean/flush on end, clean/flush on start */ + cfg = as | JSn_CONFIG_END_FLUSH_CLEAN_INVALIDATE | JSn_CONFIG_START_MMU | JSn_CONFIG_START_FLUSH_CLEAN_INVALIDATE | JSn_CONFIG_THREAD_PRI(8); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_CONFIG_NEXT), cfg, NULL); + + KBASE_TRACE_ADD_SLOT(kbdev, JM_SUBMIT, NULL, 0, jc, js); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JSn_COMMAND_NEXT), JSn_COMMAND_START, NULL); + /* Report that the job has been submitted */ + kbasep_jm_enqueue_submit_slot(&kbdev->jm_slots[js], &dummy_job_atom[js]); +} + +/** + * Check if the katom given is a dummy compute job. + */ +mali_bool kbasep_8401_is_workaround_job(kbase_jd_atom *katom) +{ + int i; + + /* Note: we don't check the first dummy_job_atom as slot 0 is never used for the workaround */ + for (i = 1; i < KBASE_8401_WORKAROUND_COMPUTEJOB_COUNT; i++) { + if (katom == &dummy_job_atom[i]) { + /* This is a dummy job */ + return MALI_TRUE; + } + } + + /* This is a real job */ + return MALI_FALSE; +} |