aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-04 15:49:32 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-04 15:49:32 -0700
commitf377ea88b862bf7151be96d276f4cb740f8e1c41 (patch)
tree6205913431c012e285316281b6221a20d4a92128 /drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
parent51e771c0d25b43d0f12b2c7c01939942becbbe28 (diff)
parent73bf1b7be7aab60d7c651402441dd0b0b4991098 (diff)
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie: "This is the main pull request for the drm for 4.3. Nouveau is probably the biggest amount of changes in here, since it missed 4.2. Highlights below, along with the usual bunch of fixes. All stuff outside drm should have applicable acks. Highlights: - new drivers: freescale dcu kms driver - core: more atomic fixes disable some dri1 interfaces on kms drivers drop fb panic handling, this was just getting more broken, as more locking was required. new core fbdev Kconfig support - instead of each driver enable/disabling it struct_mutex cleanups - panel: more new panels cleanup Kconfig - i915: Skylake support enabled by default legacy modesetting using atomic infrastructure Skylake fixes GEN9 workarounds - amdgpu: Fiji support CGS support for amdgpu Initial GPU scheduler - off by default Lots of bug fixes and optimisations. - radeon: DP fixes misc fixes - amdkfd: Add Carrizo support for amdkfd using amdgpu. - nouveau: long pending cleanup to complete driver, fully bisectable which makes it larger, perfmon work more reclocking improvements maxwell displayport fixes - vmwgfx: new DX device support, supports OpenGL 3.3 screen targets support - mgag200: G200eW support G200e new revision support - msm: dragonboard 410c support, msm8x94 support, msm8x74v1 support yuv format support dma plane support mdp5 rotation initial hdcp - sti: atomic support - exynos: lots of cleanups atomic modesetting/pageflipping support render node support - tegra: tegra210 support (dc, dsi, dp/hdmi) dpms with atomic modesetting support - atmel: support for 3 more atmel SoCs new input formats, PRIME support. - dwhdmi: preparing to add audio support - rockchip: yuv plane support" * 'drm-next' of git://people.freedesktop.org/~airlied/linux: (1369 commits) drm/amdgpu: rename gmc_v8_0_init_compute_vmid drm/amdgpu: fix vce3 instance handling drm/amdgpu: remove ib test for the second VCE Ring drm/amdgpu: properly enable VM fault interrupts drm/amdgpu: fix warning in scheduler drm/amdgpu: fix buffer placement under memory pressure drm/amdgpu/cz: fix cz_dpm_update_low_memory_pstate logic drm/amdgpu: fix typo in dce11 watermark setup drm/amdgpu: fix typo in dce10 watermark setup drm/amdgpu: use top down allocation for non-CPU accessible vram drm/amdgpu: be explicit about cpu vram access for driver BOs (v2) drm/amdgpu: set MEC doorbell range for Fiji drm/amdgpu: implement burst NOP for SDMA drm/amdgpu: add insert_nop ring func and default implementation drm/amdgpu: add amdgpu_get_sdma_instance helper function drm/amdgpu: add AMDGPU_MAX_SDMA_INSTANCES drm/amdgpu: add burst_nop flag for sdma drm/amdgpu: add count field for the SDMA NOP packet v2 drm/amdgpu: use PT for VM sync on unmap drm/amdgpu: make wait_event uninterruptible in push_job ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c283
1 files changed, 194 insertions, 89 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1f040d85ac47..3b355aeb62fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -126,6 +126,30 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
return 0;
}
+struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
+ struct drm_file *filp,
+ struct amdgpu_ctx *ctx,
+ struct amdgpu_ib *ibs,
+ uint32_t num_ibs)
+{
+ struct amdgpu_cs_parser *parser;
+ int i;
+
+ parser = kzalloc(sizeof(struct amdgpu_cs_parser), GFP_KERNEL);
+ if (!parser)
+ return NULL;
+
+ parser->adev = adev;
+ parser->filp = filp;
+ parser->ctx = ctx;
+ parser->ibs = ibs;
+ parser->num_ibs = num_ibs;
+ for (i = 0; i < num_ibs; i++)
+ ibs[i].ctx = ctx;
+
+ return parser;
+}
+
int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
{
union drm_amdgpu_cs *cs = data;
@@ -147,13 +171,13 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
/* get chunks */
INIT_LIST_HEAD(&p->validated);
- chunk_array = kcalloc(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
+ chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
if (chunk_array == NULL) {
r = -ENOMEM;
goto out;
}
- chunk_array_user = (uint64_t *)(unsigned long)(cs->in.chunks);
+ chunk_array_user = (uint64_t __user *)(cs->in.chunks);
if (copy_from_user(chunk_array, chunk_array_user,
sizeof(uint64_t)*cs->in.num_chunks)) {
r = -EFAULT;
@@ -161,7 +185,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
}
p->nchunks = cs->in.num_chunks;
- p->chunks = kcalloc(p->nchunks, sizeof(struct amdgpu_cs_chunk),
+ p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
GFP_KERNEL);
if (p->chunks == NULL) {
r = -ENOMEM;
@@ -173,7 +197,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
struct drm_amdgpu_cs_chunk user_chunk;
uint32_t __user *cdata;
- chunk_ptr = (void __user *)(unsigned long)chunk_array[i];
+ chunk_ptr = (void __user *)chunk_array[i];
if (copy_from_user(&user_chunk, chunk_ptr,
sizeof(struct drm_amdgpu_cs_chunk))) {
r = -EFAULT;
@@ -183,7 +207,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
p->chunks[i].length_dw = user_chunk.length_dw;
size = p->chunks[i].length_dw;
- cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
+ cdata = (void __user *)user_chunk.chunk_data;
p->chunks[i].user_ptr = cdata;
p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
@@ -235,11 +259,10 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
}
}
+
p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);
- if (!p->ibs) {
+ if (!p->ibs)
r = -ENOMEM;
- goto out;
- }
out:
kfree(chunk_array);
@@ -331,7 +354,7 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p)
* into account. We don't want to disallow buffer moves
* completely.
*/
- if (current_domain != AMDGPU_GEM_DOMAIN_CPU &&
+ if ((lobj->allowed_domains & current_domain) != 0 &&
(domain & current_domain) == 0 && /* will be moved */
bytes_moved > bytes_moved_threshold) {
/* don't move it */
@@ -415,18 +438,8 @@ static int cmp_size_smaller_first(void *priv, struct list_head *a,
return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
}
-/**
- * cs_parser_fini() - clean parser states
- * @parser: parser structure holding parsing context.
- * @error: error number
- *
- * If error is set than unvalidate buffer, otherwise just free memory
- * used by parsing context.
- **/
-static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
+static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff)
{
- unsigned i;
-
if (!error) {
/* Sort the buffer list from the smallest to largest buffer,
* which affects the order of buffers in the LRU list.
@@ -447,21 +460,45 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
}
+}
+static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
+{
+ unsigned i;
if (parser->ctx)
amdgpu_ctx_put(parser->ctx);
if (parser->bo_list)
amdgpu_bo_list_put(parser->bo_list);
+
drm_free_large(parser->vm_bos);
for (i = 0; i < parser->nchunks; i++)
drm_free_large(parser->chunks[i].kdata);
kfree(parser->chunks);
- if (parser->ibs)
- for (i = 0; i < parser->num_ibs; i++)
- amdgpu_ib_free(parser->adev, &parser->ibs[i]);
- kfree(parser->ibs);
- if (parser->uf.bo)
- drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
+ if (!amdgpu_enable_scheduler)
+ {
+ if (parser->ibs)
+ for (i = 0; i < parser->num_ibs; i++)
+ amdgpu_ib_free(parser->adev, &parser->ibs[i]);
+ kfree(parser->ibs);
+ if (parser->uf.bo)
+ drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
+ }
+
+ kfree(parser);
+}
+
+/**
+ * cs_parser_fini() - clean parser states
+ * @parser: parser structure holding parsing context.
+ * @error: error number
+ *
+ * If error is set than unvalidate buffer, otherwise just free memory
+ * used by parsing context.
+ **/
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
+{
+ amdgpu_cs_parser_fini_early(parser, error, backoff);
+ amdgpu_cs_parser_fini_late(parser);
}
static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
@@ -476,12 +513,18 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
if (r)
return r;
+ r = amdgpu_sync_fence(adev, &p->ibs[0].sync, vm->page_directory_fence);
+ if (r)
+ return r;
+
r = amdgpu_vm_clear_freed(adev, vm);
if (r)
return r;
if (p->bo_list) {
for (i = 0; i < p->bo_list->num_entries; i++) {
+ struct fence *f;
+
/* ignore duplicates */
bo = p->bo_list->array[i].robj;
if (!bo)
@@ -495,7 +538,10 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
if (r)
return r;
- amdgpu_sync_fence(&p->ibs[0].sync, bo_va->last_pt_update);
+ f = bo_va->last_pt_update;
+ r = amdgpu_sync_fence(adev, &p->ibs[0].sync, f);
+ if (r)
+ return r;
}
}
@@ -529,9 +575,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
goto out;
}
amdgpu_cs_sync_rings(parser);
-
- r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs,
- parser->filp);
+ if (!amdgpu_enable_scheduler)
+ r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs,
+ parser->filp);
out:
mutex_unlock(&vm->mutex);
@@ -650,7 +696,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
ib->oa_size = amdgpu_bo_size(oa);
}
}
-
/* wrap the last IB with user fence */
if (parser->uf.bo) {
struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1];
@@ -693,9 +738,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
sizeof(struct drm_amdgpu_cs_chunk_dep);
for (j = 0; j < num_deps; ++j) {
- struct amdgpu_fence *fence;
struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx;
+ struct fence *fence;
r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
deps[j].ip_instance,
@@ -707,85 +752,137 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
if (ctx == NULL)
return -EINVAL;
- r = amdgpu_fence_recreate(ring, p->filp,
- deps[j].handle,
- &fence);
- if (r) {
+ fence = amdgpu_ctx_get_fence(ctx, ring,
+ deps[j].handle);
+ if (IS_ERR(fence)) {
+ r = PTR_ERR(fence);
amdgpu_ctx_put(ctx);
return r;
- }
- amdgpu_sync_fence(&ib->sync, fence);
- amdgpu_fence_unref(&fence);
- amdgpu_ctx_put(ctx);
+ } else if (fence) {
+ r = amdgpu_sync_fence(adev, &ib->sync, fence);
+ fence_put(fence);
+ amdgpu_ctx_put(ctx);
+ if (r)
+ return r;
+ }
}
}
return 0;
}
+static int amdgpu_cs_free_job(struct amdgpu_job *sched_job)
+{
+ int i;
+ if (sched_job->ibs)
+ for (i = 0; i < sched_job->num_ibs; i++)
+ amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]);
+ kfree(sched_job->ibs);
+ if (sched_job->uf.bo)
+ drm_gem_object_unreference_unlocked(&sched_job->uf.bo->gem_base);
+ return 0;
+}
+
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
union drm_amdgpu_cs *cs = data;
- struct amdgpu_cs_parser parser;
- int r, i;
+ struct amdgpu_cs_parser *parser;
bool reserved_buffers = false;
+ int i, r;
down_read(&adev->exclusive_lock);
if (!adev->accel_working) {
up_read(&adev->exclusive_lock);
return -EBUSY;
}
- /* initialize parser */
- memset(&parser, 0, sizeof(struct amdgpu_cs_parser));
- parser.filp = filp;
- parser.adev = adev;
- r = amdgpu_cs_parser_init(&parser, data);
+
+ parser = amdgpu_cs_parser_create(adev, filp, NULL, NULL, 0);
+ if (!parser)
+ return -ENOMEM;
+ r = amdgpu_cs_parser_init(parser, data);
if (r) {
DRM_ERROR("Failed to initialize parser !\n");
- amdgpu_cs_parser_fini(&parser, r, false);
+ amdgpu_cs_parser_fini(parser, r, false);
up_read(&adev->exclusive_lock);
r = amdgpu_cs_handle_lockup(adev, r);
return r;
}
- r = amdgpu_cs_parser_relocs(&parser);
- if (r) {
- if (r != -ERESTARTSYS) {
- if (r == -ENOMEM)
- DRM_ERROR("Not enough memory for command submission!\n");
- else
- DRM_ERROR("Failed to process the buffer list %d!\n", r);
- }
+ r = amdgpu_cs_parser_relocs(parser);
+ if (r == -ENOMEM)
+ DRM_ERROR("Not enough memory for command submission!\n");
+ else if (r && r != -ERESTARTSYS)
+ DRM_ERROR("Failed to process the buffer list %d!\n", r);
+ else if (!r) {
+ reserved_buffers = true;
+ r = amdgpu_cs_ib_fill(adev, parser);
}
if (!r) {
- reserved_buffers = true;
- r = amdgpu_cs_ib_fill(adev, &parser);
+ r = amdgpu_cs_dependencies(adev, parser);
+ if (r)
+ DRM_ERROR("Failed in the dependencies handling %d!\n", r);
}
- if (!r)
- r = amdgpu_cs_dependencies(adev, &parser);
-
- if (r) {
- amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
- up_read(&adev->exclusive_lock);
- r = amdgpu_cs_handle_lockup(adev, r);
- return r;
- }
+ if (r)
+ goto out;
- for (i = 0; i < parser.num_ibs; i++)
- trace_amdgpu_cs(&parser, i);
+ for (i = 0; i < parser->num_ibs; i++)
+ trace_amdgpu_cs(parser, i);
- r = amdgpu_cs_ib_vm_chunk(adev, &parser);
- if (r) {
+ r = amdgpu_cs_ib_vm_chunk(adev, parser);
+ if (r)
goto out;
+
+ if (amdgpu_enable_scheduler && parser->num_ibs) {
+ struct amdgpu_job *job;
+ struct amdgpu_ring * ring = parser->ibs->ring;
+ job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
+ if (!job)
+ return -ENOMEM;
+ job->base.sched = ring->scheduler;
+ job->base.s_entity = &parser->ctx->rings[ring->idx].entity;
+ job->adev = parser->adev;
+ job->ibs = parser->ibs;
+ job->num_ibs = parser->num_ibs;
+ job->base.owner = parser->filp;
+ mutex_init(&job->job_lock);
+ if (job->ibs[job->num_ibs - 1].user) {
+ memcpy(&job->uf, &parser->uf,
+ sizeof(struct amdgpu_user_fence));
+ job->ibs[job->num_ibs - 1].user = &job->uf;
+ }
+
+ job->free_job = amdgpu_cs_free_job;
+ mutex_lock(&job->job_lock);
+ r = amd_sched_entity_push_job((struct amd_sched_job *)job);
+ if (r) {
+ mutex_unlock(&job->job_lock);
+ amdgpu_cs_free_job(job);
+ kfree(job);
+ goto out;
+ }
+ cs->out.handle =
+ amdgpu_ctx_add_fence(parser->ctx, ring,
+ &job->base.s_fence->base);
+ parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle;
+
+ list_sort(NULL, &parser->validated, cmp_size_smaller_first);
+ ttm_eu_fence_buffer_objects(&parser->ticket,
+ &parser->validated,
+ &job->base.s_fence->base);
+
+ mutex_unlock(&job->job_lock);
+ amdgpu_cs_parser_fini_late(parser);
+ up_read(&adev->exclusive_lock);
+ return 0;
}
- cs->out.handle = parser.ibs[parser.num_ibs - 1].fence->seq;
+ cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;
out:
- amdgpu_cs_parser_fini(&parser, r, true);
+ amdgpu_cs_parser_fini(parser, r, reserved_buffers);
up_read(&adev->exclusive_lock);
r = amdgpu_cs_handle_lockup(adev, r);
return r;
@@ -806,30 +903,29 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
union drm_amdgpu_wait_cs *wait = data;
struct amdgpu_device *adev = dev->dev_private;
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
- struct amdgpu_fence *fence = NULL;
struct amdgpu_ring *ring = NULL;
struct amdgpu_ctx *ctx;
+ struct fence *fence;
long r;
- ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
- if (ctx == NULL)
- return -EINVAL;
-
r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
wait->in.ring, &ring);
- if (r) {
- amdgpu_ctx_put(ctx);
+ if (r)
return r;
- }
- r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence);
- if (r) {
- amdgpu_ctx_put(ctx);
- return r;
- }
+ ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
+ if (ctx == NULL)
+ return -EINVAL;
+
+ fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
+ if (IS_ERR(fence))
+ r = PTR_ERR(fence);
+ else if (fence) {
+ r = fence_wait_timeout(fence, true, timeout);
+ fence_put(fence);
+ } else
+ r = 1;
- r = fence_wait_timeout(&fence->base, true, timeout);
- amdgpu_fence_unref(&fence);
amdgpu_ctx_put(ctx);
if (r < 0)
return r;
@@ -864,7 +960,16 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
if (!reloc->bo_va)
continue;
- list_for_each_entry(mapping, &reloc->bo_va->mappings, list) {
+ list_for_each_entry(mapping, &reloc->bo_va->valids, list) {
+ if (mapping->it.start > addr ||
+ addr > mapping->it.last)
+ continue;
+
+ *bo = reloc->bo_va->bo;
+ return mapping;
+ }
+
+ list_for_each_entry(mapping, &reloc->bo_va->invalids, list) {
if (mapping->it.start > addr ||
addr > mapping->it.last)
continue;