diff options
author | Linux Build Service Account <lnxbuild@localhost> | 2019-09-23 03:58:31 -0700 |
---|---|---|
committer | Gerrit - the friendly Code Review server <code-review@localhost> | 2019-09-23 03:58:32 -0700 |
commit | 2c69f1e502e79bbdc41ef08d374af08e48abca56 (patch) | |
tree | 1ff4d7c98157d16b31da8b72d46b5bbc90883e96 | |
parent | b9d76aa6e9dd8d5032336e5cb4210c81f8a262cc (diff) | |
parent | a8fba7d0461b7b4519528b2e2ac2e01f1b5bc6a5 (diff) |
Merge changes Ic0dedbad,Ic0dedbad,Ic0dedbad,Ic0dedbad into kernel.lnx.4.14.r3-relLA.UM.7.9.r1-08500-sm6150.0
* changes:
msm: kgsl: Make the "scratch" global buffer use a random GPU address
msm: kgsl: Use a bitmap allocator for global addressing
msm: kgsl: Execute user profiling commands in an IB
msm: kgsl: Verify the offset of the profiling buffer
-rw-r--r-- | drivers/gpu/msm/adreno_ringbuffer.c | 58 | ||||
-rw-r--r-- | drivers/gpu/msm/adreno_ringbuffer.h | 14 | ||||
-rw-r--r-- | drivers/gpu/msm/kgsl.h | 4 | ||||
-rw-r--r-- | drivers/gpu/msm/kgsl_drawobj.c | 24 | ||||
-rw-r--r-- | drivers/gpu/msm/kgsl_iommu.c | 60 |
5 files changed, 126 insertions, 34 deletions
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c index d791ee485736..59635ce35592 100644 --- a/drivers/gpu/msm/adreno_ringbuffer.c +++ b/drivers/gpu/msm/adreno_ringbuffer.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2018, The Linux Foundation. All rights reserved. +/* Copyright (c) 2002,2007-2019, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -308,6 +308,11 @@ static int _adreno_ringbuffer_probe(struct adreno_device *adreno_dev, PAGE_SIZE, 0, KGSL_MEMDESC_PRIVILEGED, "pagetable_desc"); if (ret) return ret; + + /* allocate a chunk of memory to create user profiling IB1s */ + kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->profile_desc, + PAGE_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0, "profile_desc"); + return kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->buffer_desc, KGSL_RB_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0, "ringbuffer"); @@ -322,7 +327,7 @@ int adreno_ringbuffer_probe(struct adreno_device *adreno_dev, bool nopreempt) if (!adreno_is_a3xx(adreno_dev)) { status = kgsl_allocate_global(device, &device->scratch, - PAGE_SIZE, 0, 0, "scratch"); + PAGE_SIZE, 0, KGSL_MEMDESC_RANDOM, "scratch"); if (status != 0) return status; } @@ -353,7 +358,7 @@ static void _adreno_ringbuffer_close(struct adreno_device *adreno_dev, kgsl_free_global(device, &rb->pagetable_desc); kgsl_free_global(device, &rb->preemption_desc); - + kgsl_free_global(device, &rb->profile_desc); kgsl_free_global(device, &rb->buffer_desc); kgsl_del_event_group(&rb->events); memset(rb, 0, sizeof(struct adreno_ringbuffer)); @@ -854,6 +859,37 @@ static inline int _get_alwayson_counter(struct adreno_device *adreno_dev, return (unsigned int)(p - cmds); } +/* This is the maximum possible size for 64 bit targets */ +#define PROFILE_IB_DWORDS 4 +#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2)) + +static int set_user_profiling(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, u32 *cmds, u64 gpuaddr) +{ + int dwords, index = 0; + u64 ib_gpuaddr; + u32 *ib; + + if (!rb->profile_desc.hostptr) + return 0; + + ib = ((u32 *) rb->profile_desc.hostptr) + + (rb->profile_index * PROFILE_IB_DWORDS); + ib_gpuaddr = rb->profile_desc.gpuaddr + + (rb->profile_index * (PROFILE_IB_DWORDS << 2)); + + dwords = _get_alwayson_counter(adreno_dev, ib, gpuaddr); + + /* Make an indirect buffer for the request */ + cmds[index++] = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1); + index += cp_gpuaddr(adreno_dev, &cmds[index], ib_gpuaddr); + cmds[index++] = dwords; + + rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS; + + return index; +} + /* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, struct kgsl_drawobj_cmd *cmdobj, @@ -954,14 +990,12 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, !adreno_is_a3xx(adreno_dev) && (cmdobj->profiling_buf_entry != NULL)) { user_profiling = true; - dwords += 6; /* - * REG_TO_MEM packet on A5xx and above needs another ordinal. - * Add 2 more dwords since we do profiling before and after. + * User side profiling uses two IB1s, one before with 4 dwords + * per INDIRECT_BUFFER_PFE call */ - if (!ADRENO_LEGACY_PM4(adreno_dev)) - dwords += 2; + dwords += 8; /* * we want to use an adreno_submit_time struct to get the @@ -1020,11 +1054,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, } /* - * Add cmds to read the GPU ticks at the start of command obj and + * Add IB1 to read the GPU ticks at the start of command obj and * write it into the appropriate command obj profiling buffer offset */ if (user_profiling) { - cmds += _get_alwayson_counter(adreno_dev, cmds, + cmds += set_user_profiling(adreno_dev, rb, cmds, cmdobj->profiling_buffer_gpuaddr + offsetof(struct kgsl_drawobj_profiling_buffer, gpu_ticks_submitted)); @@ -1072,11 +1106,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, } /* - * Add cmds to read the GPU ticks at the end of command obj and + * Add IB1 to read the GPU ticks at the end of command obj and * write it into the appropriate command obj profiling buffer offset */ if (user_profiling) { - cmds += _get_alwayson_counter(adreno_dev, cmds, + cmds += set_user_profiling(adreno_dev, rb, cmds, cmdobj->profiling_buffer_gpuaddr + offsetof(struct kgsl_drawobj_profiling_buffer, gpu_ticks_retired)); diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h index 2f3d0146740c..9eb0c92213f3 100644 --- a/drivers/gpu/msm/adreno_ringbuffer.h +++ b/drivers/gpu/msm/adreno_ringbuffer.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2018, The Linux Foundation. All rights reserved. +/* Copyright (c) 2002,2007-2019, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -133,6 +133,18 @@ struct adreno_ringbuffer { int preempted_midway; spinlock_t preempt_lock; bool skip_inline_wptr; + /** + * @profile_desc: global memory to construct IB1s to do user side + * profiling + */ + struct kgsl_memdesc profile_desc; + /** + * @profile_index: Pointer to the next "slot" in profile_desc for a user + * profiling IB1. This allows for PAGE_SIZE / 16 = 256 simultaneous + * commands per ringbuffer with user profiling enabled + * enough. + */ + u32 profile_index; }; /* Returns the current ringbuffer */ diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h index dfb5050c734b..e5e071e03314 100644 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-2018, The Linux Foundation. All rights reserved. +/* Copyright (c) 2008-2019, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -201,6 +201,8 @@ struct kgsl_memdesc_ops { #define KGSL_MEMDESC_CONTIG BIT(8) /* This is an instruction buffer */ #define KGSL_MEMDESC_UCODE BIT(9) +/* For global buffers, randomly assign an address from the region */ +#define KGSL_MEMDESC_RANDOM BIT(10) /** * struct kgsl_memdesc - GPU memory object descriptor diff --git a/drivers/gpu/msm/kgsl_drawobj.c b/drivers/gpu/msm/kgsl_drawobj.c index ee75a204635c..b82ea1f15503 100644 --- a/drivers/gpu/msm/kgsl_drawobj.c +++ b/drivers/gpu/msm/kgsl_drawobj.c @@ -598,13 +598,29 @@ static void add_profiling_buffer(struct kgsl_device *device, return; } - cmdobj->profiling_buf_entry = entry; - if (id != 0) + if (!id) { + cmdobj->profiling_buffer_gpuaddr = gpuaddr; + } else { + u64 off = offset + sizeof(struct kgsl_drawobj_profiling_buffer); + + /* + * Make sure there is enough room in the object to store the + * entire profiling buffer object + */ + if (off < offset || off >= entry->memdesc.size) { + dev_err(device->dev, + "ignore invalid profile offset ctxt %d id %d offset %lld gpuaddr %llx size %lld\n", + drawobj->context->id, id, offset, gpuaddr, size); + kgsl_mem_entry_put(entry); + return; + } + cmdobj->profiling_buffer_gpuaddr = entry->memdesc.gpuaddr + offset; - else - cmdobj->profiling_buffer_gpuaddr = gpuaddr; + } + + cmdobj->profiling_buf_entry = entry; } /** diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c index a05abe0776db..1ad2badcf065 100644 --- a/drivers/gpu/msm/kgsl_iommu.c +++ b/drivers/gpu/msm/kgsl_iommu.c @@ -20,6 +20,7 @@ #include <linux/msm_kgsl.h> #include <linux/ratelimit.h> #include <linux/of_platform.h> +#include <linux/random.h> #include <soc/qcom/scm.h> #include <soc/qcom/secure_buffer.h> #include <linux/compat.h> @@ -90,15 +91,8 @@ static struct kmem_cache *addr_entry_cache; * * Here we define an array and a simple allocator to keep track of the currently * active global entries. Each entry is assigned a unique address inside of a - * MMU implementation specific "global" region. The addresses are assigned - * sequentially and never re-used to avoid having to go back and reprogram - * existing pagetables. The entire list of active entries are mapped and - * unmapped into every new pagetable as it is created and destroyed. - * - * Because there are relatively few entries and they are defined at boot time we - * don't need to go over the top to define a dynamic allocation scheme. It will - * be less wasteful to pick a static number with a little bit of growth - * potential. + * MMU implementation specific "global" region. We use a simple bitmap based + * allocator for the region to allow for both fixed and dynamic addressing. */ #define GLOBAL_PT_ENTRIES 32 @@ -108,13 +102,17 @@ struct global_pt_entry { char name[32]; }; +#define GLOBAL_MAP_PAGES (KGSL_IOMMU_GLOBAL_MEM_SIZE >> PAGE_SHIFT) + static struct global_pt_entry global_pt_entries[GLOBAL_PT_ENTRIES]; +static DECLARE_BITMAP(global_map, GLOBAL_MAP_PAGES); + static int secure_global_size; static int global_pt_count; -uint64_t global_pt_alloc; static struct kgsl_memdesc gpu_qdss_desc; static struct kgsl_memdesc gpu_qtimer_desc; static unsigned int context_bank_number; + void kgsl_print_global_pt_entries(struct seq_file *s) { int i; @@ -209,6 +207,12 @@ static void kgsl_iommu_remove_global(struct kgsl_mmu *mmu, for (i = 0; i < global_pt_count; i++) { if (global_pt_entries[i].memdesc == memdesc) { + u64 offset = memdesc->gpuaddr - + KGSL_IOMMU_GLOBAL_MEM_BASE(mmu); + + bitmap_clear(global_map, offset >> PAGE_SHIFT, + kgsl_memdesc_footprint(memdesc) >> PAGE_SHIFT); + memdesc->gpuaddr = 0; memdesc->priv &= ~KGSL_MEMDESC_GLOBAL; global_pt_entries[i].memdesc = NULL; @@ -220,19 +224,43 @@ static void kgsl_iommu_remove_global(struct kgsl_mmu *mmu, static void kgsl_iommu_add_global(struct kgsl_mmu *mmu, struct kgsl_memdesc *memdesc, const char *name) { + u32 bit, start = 0; + u64 size = kgsl_memdesc_footprint(memdesc); + if (memdesc->gpuaddr != 0) return; - /*Check that we can fit the global allocations */ - if (WARN_ON(global_pt_count >= GLOBAL_PT_ENTRIES) || - WARN_ON((global_pt_alloc + memdesc->size) >= - KGSL_IOMMU_GLOBAL_MEM_SIZE)) + if (WARN_ON(global_pt_count >= GLOBAL_PT_ENTRIES)) + return; + + if (WARN_ON(size > KGSL_IOMMU_GLOBAL_MEM_SIZE)) + return; + + if (memdesc->priv & KGSL_MEMDESC_RANDOM) { + u32 range = GLOBAL_MAP_PAGES - (size >> PAGE_SHIFT); + + start = get_random_int() % range; + } + + while (start >= 0) { + bit = bitmap_find_next_zero_area(global_map, GLOBAL_MAP_PAGES, + start, size >> PAGE_SHIFT, 0); + + if (bit < GLOBAL_MAP_PAGES) + break; + + start--; + } + + if (WARN_ON(start < 0)) return; - memdesc->gpuaddr = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + global_pt_alloc; + memdesc->gpuaddr = + KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + (bit << PAGE_SHIFT); + + bitmap_set(global_map, bit, size >> PAGE_SHIFT); memdesc->priv |= KGSL_MEMDESC_GLOBAL; - global_pt_alloc += kgsl_memdesc_footprint(memdesc); global_pt_entries[global_pt_count].memdesc = memdesc; strlcpy(global_pt_entries[global_pt_count].name, name, |