aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinux Build Service Account <lnxbuild@localhost>2019-09-23 03:58:31 -0700
committerGerrit - the friendly Code Review server <code-review@localhost>2019-09-23 03:58:32 -0700
commit2c69f1e502e79bbdc41ef08d374af08e48abca56 (patch)
tree1ff4d7c98157d16b31da8b72d46b5bbc90883e96
parentb9d76aa6e9dd8d5032336e5cb4210c81f8a262cc (diff)
parenta8fba7d0461b7b4519528b2e2ac2e01f1b5bc6a5 (diff)
Merge changes Ic0dedbad,Ic0dedbad,Ic0dedbad,Ic0dedbad into kernel.lnx.4.14.r3-relLA.UM.7.9.r1-08500-sm6150.0
* changes: msm: kgsl: Make the "scratch" global buffer use a random GPU address msm: kgsl: Use a bitmap allocator for global addressing msm: kgsl: Execute user profiling commands in an IB msm: kgsl: Verify the offset of the profiling buffer
-rw-r--r--drivers/gpu/msm/adreno_ringbuffer.c58
-rw-r--r--drivers/gpu/msm/adreno_ringbuffer.h14
-rw-r--r--drivers/gpu/msm/kgsl.h4
-rw-r--r--drivers/gpu/msm/kgsl_drawobj.c24
-rw-r--r--drivers/gpu/msm/kgsl_iommu.c60
5 files changed, 126 insertions, 34 deletions
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index d791ee485736..59635ce35592 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2018, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2019, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -308,6 +308,11 @@ static int _adreno_ringbuffer_probe(struct adreno_device *adreno_dev,
PAGE_SIZE, 0, KGSL_MEMDESC_PRIVILEGED, "pagetable_desc");
if (ret)
return ret;
+
+ /* allocate a chunk of memory to create user profiling IB1s */
+ kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->profile_desc,
+ PAGE_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0, "profile_desc");
+
return kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->buffer_desc,
KGSL_RB_SIZE, KGSL_MEMFLAGS_GPUREADONLY,
0, "ringbuffer");
@@ -322,7 +327,7 @@ int adreno_ringbuffer_probe(struct adreno_device *adreno_dev, bool nopreempt)
if (!adreno_is_a3xx(adreno_dev)) {
status = kgsl_allocate_global(device, &device->scratch,
- PAGE_SIZE, 0, 0, "scratch");
+ PAGE_SIZE, 0, KGSL_MEMDESC_RANDOM, "scratch");
if (status != 0)
return status;
}
@@ -353,7 +358,7 @@ static void _adreno_ringbuffer_close(struct adreno_device *adreno_dev,
kgsl_free_global(device, &rb->pagetable_desc);
kgsl_free_global(device, &rb->preemption_desc);
-
+ kgsl_free_global(device, &rb->profile_desc);
kgsl_free_global(device, &rb->buffer_desc);
kgsl_del_event_group(&rb->events);
memset(rb, 0, sizeof(struct adreno_ringbuffer));
@@ -854,6 +859,37 @@ static inline int _get_alwayson_counter(struct adreno_device *adreno_dev,
return (unsigned int)(p - cmds);
}
+/* This is the maximum possible size for 64 bit targets */
+#define PROFILE_IB_DWORDS 4
+#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2))
+
+static int set_user_profiling(struct adreno_device *adreno_dev,
+ struct adreno_ringbuffer *rb, u32 *cmds, u64 gpuaddr)
+{
+ int dwords, index = 0;
+ u64 ib_gpuaddr;
+ u32 *ib;
+
+ if (!rb->profile_desc.hostptr)
+ return 0;
+
+ ib = ((u32 *) rb->profile_desc.hostptr) +
+ (rb->profile_index * PROFILE_IB_DWORDS);
+ ib_gpuaddr = rb->profile_desc.gpuaddr +
+ (rb->profile_index * (PROFILE_IB_DWORDS << 2));
+
+ dwords = _get_alwayson_counter(adreno_dev, ib, gpuaddr);
+
+ /* Make an indirect buffer for the request */
+ cmds[index++] = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1);
+ index += cp_gpuaddr(adreno_dev, &cmds[index], ib_gpuaddr);
+ cmds[index++] = dwords;
+
+ rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS;
+
+ return index;
+}
+
/* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */
int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
struct kgsl_drawobj_cmd *cmdobj,
@@ -954,14 +990,12 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
!adreno_is_a3xx(adreno_dev) &&
(cmdobj->profiling_buf_entry != NULL)) {
user_profiling = true;
- dwords += 6;
/*
- * REG_TO_MEM packet on A5xx and above needs another ordinal.
- * Add 2 more dwords since we do profiling before and after.
+ * User side profiling uses two IB1s, one before with 4 dwords
+ * per INDIRECT_BUFFER_PFE call
*/
- if (!ADRENO_LEGACY_PM4(adreno_dev))
- dwords += 2;
+ dwords += 8;
/*
* we want to use an adreno_submit_time struct to get the
@@ -1020,11 +1054,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
}
/*
- * Add cmds to read the GPU ticks at the start of command obj and
+ * Add IB1 to read the GPU ticks at the start of command obj and
* write it into the appropriate command obj profiling buffer offset
*/
if (user_profiling) {
- cmds += _get_alwayson_counter(adreno_dev, cmds,
+ cmds += set_user_profiling(adreno_dev, rb, cmds,
cmdobj->profiling_buffer_gpuaddr +
offsetof(struct kgsl_drawobj_profiling_buffer,
gpu_ticks_submitted));
@@ -1072,11 +1106,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
}
/*
- * Add cmds to read the GPU ticks at the end of command obj and
+ * Add IB1 to read the GPU ticks at the end of command obj and
* write it into the appropriate command obj profiling buffer offset
*/
if (user_profiling) {
- cmds += _get_alwayson_counter(adreno_dev, cmds,
+ cmds += set_user_profiling(adreno_dev, rb, cmds,
cmdobj->profiling_buffer_gpuaddr +
offsetof(struct kgsl_drawobj_profiling_buffer,
gpu_ticks_retired));
diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h
index 2f3d0146740c..9eb0c92213f3 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.h
+++ b/drivers/gpu/msm/adreno_ringbuffer.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2018, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2019, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -133,6 +133,18 @@ struct adreno_ringbuffer {
int preempted_midway;
spinlock_t preempt_lock;
bool skip_inline_wptr;
+ /**
+ * @profile_desc: global memory to construct IB1s to do user side
+ * profiling
+ */
+ struct kgsl_memdesc profile_desc;
+ /**
+ * @profile_index: Pointer to the next "slot" in profile_desc for a user
+ * profiling IB1. This allows for PAGE_SIZE / 16 = 256 simultaneous
+ * commands per ringbuffer with user profiling enabled
+ * enough.
+ */
+ u32 profile_index;
};
/* Returns the current ringbuffer */
diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h
index dfb5050c734b..e5e071e03314 100644
--- a/drivers/gpu/msm/kgsl.h
+++ b/drivers/gpu/msm/kgsl.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2018, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2008-2019, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -201,6 +201,8 @@ struct kgsl_memdesc_ops {
#define KGSL_MEMDESC_CONTIG BIT(8)
/* This is an instruction buffer */
#define KGSL_MEMDESC_UCODE BIT(9)
+/* For global buffers, randomly assign an address from the region */
+#define KGSL_MEMDESC_RANDOM BIT(10)
/**
* struct kgsl_memdesc - GPU memory object descriptor
diff --git a/drivers/gpu/msm/kgsl_drawobj.c b/drivers/gpu/msm/kgsl_drawobj.c
index ee75a204635c..b82ea1f15503 100644
--- a/drivers/gpu/msm/kgsl_drawobj.c
+++ b/drivers/gpu/msm/kgsl_drawobj.c
@@ -598,13 +598,29 @@ static void add_profiling_buffer(struct kgsl_device *device,
return;
}
- cmdobj->profiling_buf_entry = entry;
- if (id != 0)
+ if (!id) {
+ cmdobj->profiling_buffer_gpuaddr = gpuaddr;
+ } else {
+ u64 off = offset + sizeof(struct kgsl_drawobj_profiling_buffer);
+
+ /*
+ * Make sure there is enough room in the object to store the
+ * entire profiling buffer object
+ */
+ if (off < offset || off >= entry->memdesc.size) {
+ dev_err(device->dev,
+ "ignore invalid profile offset ctxt %d id %d offset %lld gpuaddr %llx size %lld\n",
+ drawobj->context->id, id, offset, gpuaddr, size);
+ kgsl_mem_entry_put(entry);
+ return;
+ }
+
cmdobj->profiling_buffer_gpuaddr =
entry->memdesc.gpuaddr + offset;
- else
- cmdobj->profiling_buffer_gpuaddr = gpuaddr;
+ }
+
+ cmdobj->profiling_buf_entry = entry;
}
/**
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
index a05abe0776db..1ad2badcf065 100644
--- a/drivers/gpu/msm/kgsl_iommu.c
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -20,6 +20,7 @@
#include <linux/msm_kgsl.h>
#include <linux/ratelimit.h>
#include <linux/of_platform.h>
+#include <linux/random.h>
#include <soc/qcom/scm.h>
#include <soc/qcom/secure_buffer.h>
#include <linux/compat.h>
@@ -90,15 +91,8 @@ static struct kmem_cache *addr_entry_cache;
*
* Here we define an array and a simple allocator to keep track of the currently
* active global entries. Each entry is assigned a unique address inside of a
- * MMU implementation specific "global" region. The addresses are assigned
- * sequentially and never re-used to avoid having to go back and reprogram
- * existing pagetables. The entire list of active entries are mapped and
- * unmapped into every new pagetable as it is created and destroyed.
- *
- * Because there are relatively few entries and they are defined at boot time we
- * don't need to go over the top to define a dynamic allocation scheme. It will
- * be less wasteful to pick a static number with a little bit of growth
- * potential.
+ * MMU implementation specific "global" region. We use a simple bitmap based
+ * allocator for the region to allow for both fixed and dynamic addressing.
*/
#define GLOBAL_PT_ENTRIES 32
@@ -108,13 +102,17 @@ struct global_pt_entry {
char name[32];
};
+#define GLOBAL_MAP_PAGES (KGSL_IOMMU_GLOBAL_MEM_SIZE >> PAGE_SHIFT)
+
static struct global_pt_entry global_pt_entries[GLOBAL_PT_ENTRIES];
+static DECLARE_BITMAP(global_map, GLOBAL_MAP_PAGES);
+
static int secure_global_size;
static int global_pt_count;
-uint64_t global_pt_alloc;
static struct kgsl_memdesc gpu_qdss_desc;
static struct kgsl_memdesc gpu_qtimer_desc;
static unsigned int context_bank_number;
+
void kgsl_print_global_pt_entries(struct seq_file *s)
{
int i;
@@ -209,6 +207,12 @@ static void kgsl_iommu_remove_global(struct kgsl_mmu *mmu,
for (i = 0; i < global_pt_count; i++) {
if (global_pt_entries[i].memdesc == memdesc) {
+ u64 offset = memdesc->gpuaddr -
+ KGSL_IOMMU_GLOBAL_MEM_BASE(mmu);
+
+ bitmap_clear(global_map, offset >> PAGE_SHIFT,
+ kgsl_memdesc_footprint(memdesc) >> PAGE_SHIFT);
+
memdesc->gpuaddr = 0;
memdesc->priv &= ~KGSL_MEMDESC_GLOBAL;
global_pt_entries[i].memdesc = NULL;
@@ -220,19 +224,43 @@ static void kgsl_iommu_remove_global(struct kgsl_mmu *mmu,
static void kgsl_iommu_add_global(struct kgsl_mmu *mmu,
struct kgsl_memdesc *memdesc, const char *name)
{
+ u32 bit, start = 0;
+ u64 size = kgsl_memdesc_footprint(memdesc);
+
if (memdesc->gpuaddr != 0)
return;
- /*Check that we can fit the global allocations */
- if (WARN_ON(global_pt_count >= GLOBAL_PT_ENTRIES) ||
- WARN_ON((global_pt_alloc + memdesc->size) >=
- KGSL_IOMMU_GLOBAL_MEM_SIZE))
+ if (WARN_ON(global_pt_count >= GLOBAL_PT_ENTRIES))
+ return;
+
+ if (WARN_ON(size > KGSL_IOMMU_GLOBAL_MEM_SIZE))
+ return;
+
+ if (memdesc->priv & KGSL_MEMDESC_RANDOM) {
+ u32 range = GLOBAL_MAP_PAGES - (size >> PAGE_SHIFT);
+
+ start = get_random_int() % range;
+ }
+
+ while (start >= 0) {
+ bit = bitmap_find_next_zero_area(global_map, GLOBAL_MAP_PAGES,
+ start, size >> PAGE_SHIFT, 0);
+
+ if (bit < GLOBAL_MAP_PAGES)
+ break;
+
+ start--;
+ }
+
+ if (WARN_ON(start < 0))
return;
- memdesc->gpuaddr = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + global_pt_alloc;
+ memdesc->gpuaddr =
+ KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + (bit << PAGE_SHIFT);
+
+ bitmap_set(global_map, bit, size >> PAGE_SHIFT);
memdesc->priv |= KGSL_MEMDESC_GLOBAL;
- global_pt_alloc += kgsl_memdesc_footprint(memdesc);
global_pt_entries[global_pt_count].memdesc = memdesc;
strlcpy(global_pt_entries[global_pt_count].name, name,