aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/intel_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c904
1 files changed, 515 insertions, 389 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5e98fd79bd9d..4e0a351bfbca 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -164,20 +164,15 @@
#define WA_TAIL_DWORDS 2
#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
-static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine,
- struct intel_context *ce);
+#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
+
+static int execlists_context_deferred_alloc(struct intel_context *ce,
+ struct intel_engine_cs *engine);
static void execlists_init_reg_state(u32 *reg_state,
- struct i915_gem_context *ctx,
+ struct intel_context *ce,
struct intel_engine_cs *engine,
struct intel_ring *ring);
-static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
-{
- return (i915_ggtt_offset(engine->status_page.vma) +
- I915_GEM_HWS_INDEX_ADDR);
-}
-
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{
return rb_entry(rb, struct i915_priolist, node);
@@ -188,6 +183,34 @@ static inline int rq_prio(const struct i915_request *rq)
return rq->sched.attr.priority;
}
+static int effective_prio(const struct i915_request *rq)
+{
+ int prio = rq_prio(rq);
+
+ /*
+ * On unwinding the active request, we give it a priority bump
+ * equivalent to a freshly submitted request. This protects it from
+ * being gazumped again, but it would be preferable if we didn't
+ * let it be gazumped in the first place!
+ *
+ * See __unwind_incomplete_requests()
+ */
+ if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(rq)) {
+ /*
+ * After preemption, we insert the active request at the
+ * end of the new priority level. This means that we will be
+ * _lower_ priority than the preemptee all things equal (and
+ * so the preemption is valid), so adjust our comparison
+ * accordingly.
+ */
+ prio |= ACTIVE_PRIORITY;
+ prio--;
+ }
+
+ /* Restrict mere WAIT boosts from triggering preemption */
+ return prio | __NO_PREEMPTION;
+}
+
static int queue_prio(const struct intel_engine_execlists *execlists)
{
struct i915_priolist *p;
@@ -208,9 +231,9 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
static inline bool need_preempt(const struct intel_engine_cs *engine,
const struct i915_request *rq)
{
- const int last_prio = rq_prio(rq);
+ int last_prio;
- if (!intel_engine_has_preemption(engine))
+ if (!engine->preempt_context)
return false;
if (i915_request_completed(rq))
@@ -228,6 +251,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
* preempt. If that hint is stale or we may be trying to preempt
* ourselves, ignore the request.
*/
+ last_prio = effective_prio(rq);
if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
last_prio))
return false;
@@ -254,12 +278,11 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
}
__maybe_unused static inline bool
-assert_priority_queue(const struct intel_engine_execlists *execlists,
- const struct i915_request *prev,
+assert_priority_queue(const struct i915_request *prev,
const struct i915_request *next)
{
- if (!prev)
- return true;
+ const struct intel_engine_execlists *execlists =
+ &prev->engine->execlists;
/*
* Without preemption, the prev may refer to the still active element
@@ -300,11 +323,10 @@ assert_priority_queue(const struct intel_engine_execlists *execlists,
* engine info, SW context ID and SW counter need to form a unique number
* (Context ID) per lrc.
*/
-static void
-intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine,
- struct intel_context *ce)
+static u64
+lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
{
+ struct i915_gem_context *ctx = ce->gem_context;
u64 desc;
BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
@@ -322,7 +344,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
* Consider updating oa_get_render_ctx_id in i915_perf.c when changing
* anything below.
*/
- if (INTEL_GEN(ctx->i915) >= 11) {
+ if (INTEL_GEN(engine->i915) >= 11) {
GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
/* bits 37-47 */
@@ -339,7 +361,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */
}
- ce->lrc_desc = desc;
+ return desc;
}
static void unwind_wa_tail(struct i915_request *rq)
@@ -353,7 +375,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
{
struct i915_request *rq, *rn, *active = NULL;
struct list_head *uninitialized_var(pl);
- int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;
+ int prio = I915_PRIORITY_INVALID | ACTIVE_PRIORITY;
lockdep_assert_held(&engine->timeline.lock);
@@ -384,9 +406,21 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
* The active request is now effectively the start of a new client
* stream, so give it the equivalent small priority bump to prevent
* it being gazumped a second time by another peer.
+ *
+ * Note we have to be careful not to apply a priority boost to a request
+ * still spinning on its semaphores. If the request hasn't started, that
+ * means it is still waiting for its dependencies to be signaled, and
+ * if we apply a priority boost to this request, we will boost it past
+ * its signalers and so break PI.
+ *
+ * One consequence of this preemption boost is that we may jump
+ * over lesser priorities (such as I915_PRIORITY_WAIT), effectively
+ * making those priorities non-preemptible. They will be moved forward
+ * in the priority queue, but they will not gain immediate access to
+ * the GPU.
*/
- if (!(prio & I915_PRIORITY_NEWCLIENT)) {
- prio |= I915_PRIORITY_NEWCLIENT;
+ if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(active)) {
+ prio |= ACTIVE_PRIORITY;
active->sched.attr.priority = prio;
list_move_tail(&active->sched.link,
i915_sched_lookup_priolist(engine, prio));
@@ -395,13 +429,13 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
return active;
}
-void
+struct i915_request *
execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
{
struct intel_engine_cs *engine =
container_of(execlists, typeof(*engine), execlists);
- __unwind_incomplete_requests(engine);
+ return __unwind_incomplete_requests(engine);
}
static inline void
@@ -523,13 +557,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
desc = execlists_update_context(rq);
GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
- GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
+ GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
engine->name, n,
port[n].context_id, count,
- rq->global_seqno,
rq->fence.context, rq->fence.seqno,
hwsp_seqno(rq),
- intel_engine_get_seqno(engine),
rq_prio(rq));
} else {
GEM_BUG_ON(!n);
@@ -564,6 +596,17 @@ static bool can_merge_ctx(const struct intel_context *prev,
return true;
}
+static bool can_merge_rq(const struct i915_request *prev,
+ const struct i915_request *next)
+{
+ GEM_BUG_ON(!assert_priority_queue(prev, next));
+
+ if (!can_merge_ctx(prev->hw_context, next->hw_context))
+ return false;
+
+ return true;
+}
+
static void port_assign(struct execlist_port *port, struct i915_request *rq)
{
GEM_BUG_ON(rq == port_request(port));
@@ -577,8 +620,7 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
static void inject_preempt_context(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
- struct intel_context *ce =
- to_intel_context(engine->i915->preempt_context, engine);
+ struct intel_context *ce = engine->preempt_context;
unsigned int n;
GEM_BUG_ON(execlists->preempt_complete_status !=
@@ -716,8 +758,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
- GEM_BUG_ON(!assert_priority_queue(execlists, last, rq));
-
/*
* Can we combine this request with the current port?
* It has to be the same context/ringbuffer and not
@@ -729,8 +769,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* second request, and so we never need to tell the
* hardware about the first.
*/
- if (last &&
- !can_merge_ctx(rq->hw_context, last->hw_context)) {
+ if (last && !can_merge_rq(last, rq)) {
/*
* If we are on the second port and cannot
* combine this request with the last, then we
@@ -740,6 +779,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
goto done;
/*
+ * We must not populate both ELSP[] with the
+ * same LRCA, i.e. we must submit 2 different
+ * contexts if we submit 2 ELSP.
+ */
+ if (last->hw_context == rq->hw_context)
+ goto done;
+
+ /*
* If GVT overrides us we only ever submit
* port[0], leaving port[1] empty. Note that we
* also have to be careful that we don't queue
@@ -750,7 +797,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
ctx_single_port_submission(rq->hw_context))
goto done;
- GEM_BUG_ON(last->hw_context == rq->hw_context);
if (submit)
port_assign(port, last);
@@ -769,8 +815,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
rb_erase_cached(&p->node, &execlists->queue);
- if (p->priority != I915_PRIORITY_NORMAL)
- kmem_cache_free(engine->i915->priorities, p);
+ i915_priolist_free(p);
}
done:
@@ -790,8 +835,7 @@ done:
* request triggering preemption on the next dequeue (or subsequent
* interrupt for secondary ports).
*/
- execlists->queue_priority_hint =
- port != execlists->port ? rq_prio(last) : INT_MIN;
+ execlists->queue_priority_hint = queue_prio(execlists);
if (submit) {
port_assign(port, last);
@@ -821,13 +865,11 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
while (num_ports-- && port_isset(port)) {
struct i915_request *rq = port_request(port);
- GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n",
+ GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n",
rq->engine->name,
(unsigned int)(port - execlists->port),
- rq->global_seqno,
rq->fence.context, rq->fence.seqno,
- hwsp_seqno(rq),
- intel_engine_get_seqno(rq->engine));
+ hwsp_seqno(rq));
GEM_BUG_ON(!execlists->active);
execlists_context_schedule_out(rq,
@@ -851,104 +893,6 @@ invalidate_csb_entries(const u32 *first, const u32 *last)
clflush((void *)last);
}
-static void reset_csb_pointers(struct intel_engine_execlists *execlists)
-{
- const unsigned int reset_value = GEN8_CSB_ENTRIES - 1;
-
- /*
- * After a reset, the HW starts writing into CSB entry [0]. We
- * therefore have to set our HEAD pointer back one entry so that
- * the *first* entry we check is entry 0. To complicate this further,
- * as we don't wait for the first interrupt after reset, we have to
- * fake the HW write to point back to the last entry so that our
- * inline comparison of our cached head position against the last HW
- * write works even before the first interrupt.
- */
- execlists->csb_head = reset_value;
- WRITE_ONCE(*execlists->csb_write, reset_value);
-
- invalidate_csb_entries(&execlists->csb_status[0],
- &execlists->csb_status[GEN8_CSB_ENTRIES - 1]);
-}
-
-static void nop_submission_tasklet(unsigned long data)
-{
- /* The driver is wedged; don't process any more events. */
-}
-
-static void execlists_cancel_requests(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists * const execlists = &engine->execlists;
- struct i915_request *rq, *rn;
- struct rb_node *rb;
- unsigned long flags;
-
- GEM_TRACE("%s current %d\n",
- engine->name, intel_engine_get_seqno(engine));
-
- /*
- * Before we call engine->cancel_requests(), we should have exclusive
- * access to the submission state. This is arranged for us by the
- * caller disabling the interrupt generation, the tasklet and other
- * threads that may then access the same state, giving us a free hand
- * to reset state. However, we still need to let lockdep be aware that
- * we know this state may be accessed in hardirq context, so we
- * disable the irq around this manipulation and we want to keep
- * the spinlock focused on its duties and not accidentally conflate
- * coverage to the submission's irq state. (Similarly, although we
- * shouldn't need to disable irq around the manipulation of the
- * submission's irq state, we also wish to remind ourselves that
- * it is irq state.)
- */
- spin_lock_irqsave(&engine->timeline.lock, flags);
-
- /* Cancel the requests on the HW and clear the ELSP tracker. */
- execlists_cancel_port_requests(execlists);
- execlists_user_end(execlists);
-
- /* Mark all executing requests as skipped. */
- list_for_each_entry(rq, &engine->timeline.requests, link) {
- GEM_BUG_ON(!rq->global_seqno);
-
- if (!i915_request_signaled(rq))
- dma_fence_set_error(&rq->fence, -EIO);
-
- i915_request_mark_complete(rq);
- }
-
- /* Flush the queued requests to the timeline list (for retiring). */
- while ((rb = rb_first_cached(&execlists->queue))) {
- struct i915_priolist *p = to_priolist(rb);
- int i;
-
- priolist_for_each_request_consume(rq, rn, p, i) {
- list_del_init(&rq->sched.link);
- __i915_request_submit(rq);
- dma_fence_set_error(&rq->fence, -EIO);
- i915_request_mark_complete(rq);
- }
-
- rb_erase_cached(&p->node, &execlists->queue);
- if (p->priority != I915_PRIORITY_NORMAL)
- kmem_cache_free(engine->i915->priorities, p);
- }
-
- intel_write_status_page(engine,
- I915_GEM_HWS_INDEX,
- intel_engine_last_submit(engine));
-
- /* Remaining _unready_ requests will be nop'ed when submitted */
-
- execlists->queue_priority_hint = INT_MIN;
- execlists->queue = RB_ROOT_CACHED;
- GEM_BUG_ON(port_isset(execlists->port));
-
- GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
- execlists->tasklet.func = nop_submission_tasklet;
-
- spin_unlock_irqrestore(&engine->timeline.lock, flags);
-}
-
static inline bool
reset_in_progress(const struct intel_engine_execlists *execlists)
{
@@ -960,6 +904,7 @@ static void process_csb(struct intel_engine_cs *engine)
struct intel_engine_execlists * const execlists = &engine->execlists;
struct execlist_port *port = execlists->port;
const u32 * const buf = execlists->csb_status;
+ const u8 num_entries = execlists->csb_size;
u8 head, tail;
lockdep_assert_held(&engine->timeline.lock);
@@ -995,7 +940,7 @@ static void process_csb(struct intel_engine_cs *engine)
unsigned int status;
unsigned int count;
- if (++head == GEN8_CSB_ENTRIES)
+ if (++head == num_entries)
head = 0;
/*
@@ -1052,14 +997,12 @@ static void process_csb(struct intel_engine_cs *engine)
EXECLISTS_ACTIVE_USER));
rq = port_unpack(port, &count);
- GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
+ GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
engine->name,
port->context_id, count,
- rq ? rq->global_seqno : 0,
rq ? rq->fence.context : 0,
rq ? rq->fence.seqno : 0,
rq ? hwsp_seqno(rq) : 0,
- intel_engine_get_seqno(engine),
rq ? rq_prio(rq) : 0);
/* Check the context/desc id for this event matches */
@@ -1119,7 +1062,7 @@ static void process_csb(struct intel_engine_cs *engine)
* the wash as hardware, working or not, will need to do the
* invalidation before.
*/
- invalidate_csb_entries(&buf[0], &buf[GEN8_CSB_ENTRIES - 1]);
+ invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
}
static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
@@ -1196,19 +1139,50 @@ static void execlists_submit_request(struct i915_request *request)
spin_unlock_irqrestore(&engine->timeline.lock, flags);
}
-static void execlists_context_destroy(struct intel_context *ce)
+static void __execlists_context_fini(struct intel_context *ce)
{
- GEM_BUG_ON(ce->pin_count);
-
- if (!ce->state)
- return;
-
- intel_ring_free(ce->ring);
+ intel_ring_put(ce->ring);
GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
i915_gem_object_put(ce->state->obj);
}
+static void execlists_context_destroy(struct kref *kref)
+{
+ struct intel_context *ce = container_of(kref, typeof(*ce), ref);
+
+ GEM_BUG_ON(intel_context_is_pinned(ce));
+
+ if (ce->state)
+ __execlists_context_fini(ce);
+
+ intel_context_free(ce);
+}
+
+static int __context_pin(struct i915_vma *vma)
+{
+ unsigned int flags;
+ int err;
+
+ flags = PIN_GLOBAL | PIN_HIGH;
+ flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+
+ err = i915_vma_pin(vma, 0, 0, flags);
+ if (err)
+ return err;
+
+ vma->obj->pin_global++;
+ vma->obj->mm.dirty = true;
+
+ return 0;
+}
+
+static void __context_unpin(struct i915_vma *vma)
+{
+ vma->obj->pin_global--;
+ __i915_vma_unpin(vma);
+}
+
static void execlists_context_unpin(struct intel_context *ce)
{
struct intel_engine_cs *engine;
@@ -1237,41 +1211,19 @@ static void execlists_context_unpin(struct intel_context *ce)
intel_ring_unpin(ce->ring);
- ce->state->obj->pin_global--;
i915_gem_object_unpin_map(ce->state->obj);
- i915_vma_unpin(ce->state);
-
- i915_gem_context_put(ce->gem_context);
-}
-
-static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
-{
- unsigned int flags;
- int err;
-
- /*
- * Clear this page out of any CPU caches for coherent swap-in/out.
- * We only want to do this on the first bind so that we do not stall
- * on an active context (which by nature is already on the GPU).
- */
- if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
- err = i915_gem_object_set_to_wc_domain(vma->obj, true);
- if (err)
- return err;
- }
-
- flags = PIN_GLOBAL | PIN_HIGH;
- flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-
- return i915_vma_pin(vma, 0, 0, flags);
+ __context_unpin(ce->state);
}
static void
-__execlists_update_reg_state(struct intel_engine_cs *engine,
- struct intel_context *ce)
+__execlists_update_reg_state(struct intel_context *ce,
+ struct intel_engine_cs *engine)
{
- u32 *regs = ce->lrc_reg_state;
struct intel_ring *ring = ce->ring;
+ u32 *regs = ce->lrc_reg_state;
+
+ GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
+ GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma);
regs[CTX_RING_HEAD + 1] = ring->head;
@@ -1279,29 +1231,30 @@ __execlists_update_reg_state(struct intel_engine_cs *engine,
/* RPCS */
if (engine->class == RENDER_CLASS)
- regs[CTX_R_PWR_CLK_STATE + 1] = gen8_make_rpcs(engine->i915,
- &ce->sseu);
+ regs[CTX_R_PWR_CLK_STATE + 1] =
+ gen8_make_rpcs(engine->i915, &ce->sseu);
}
-static struct intel_context *
-__execlists_context_pin(struct intel_engine_cs *engine,
- struct i915_gem_context *ctx,
- struct intel_context *ce)
+static int
+__execlists_context_pin(struct intel_context *ce,
+ struct intel_engine_cs *engine)
{
void *vaddr;
int ret;
- ret = execlists_context_deferred_alloc(ctx, engine, ce);
+ GEM_BUG_ON(!ce->gem_context->ppgtt);
+
+ ret = execlists_context_deferred_alloc(ce, engine);
if (ret)
goto err;
GEM_BUG_ON(!ce->state);
- ret = __context_pin(ctx, ce->state);
+ ret = __context_pin(ce->state);
if (ret)
goto err;
vaddr = i915_gem_object_pin_map(ce->state->obj,
- i915_coherent_map_type(ctx->i915) |
+ i915_coherent_map_type(engine->i915) |
I915_MAP_OVERRIDE);
if (IS_ERR(vaddr)) {
ret = PTR_ERR(vaddr);
@@ -1312,55 +1265,60 @@ __execlists_context_pin(struct intel_engine_cs *engine,
if (ret)
goto unpin_map;
- ret = i915_gem_context_pin_hw_id(ctx);
+ ret = i915_gem_context_pin_hw_id(ce->gem_context);
if (ret)
goto unpin_ring;
- intel_lr_context_descriptor_update(ctx, engine, ce);
-
- GEM_BUG_ON(!intel_ring_offset_valid(ce->ring, ce->ring->head));
-
+ ce->lrc_desc = lrc_descriptor(ce, engine);
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+ __execlists_update_reg_state(ce, engine);
- __execlists_update_reg_state(engine, ce);
-
- ce->state->obj->pin_global++;
- i915_gem_context_get(ctx);
- return ce;
+ return 0;
unpin_ring:
intel_ring_unpin(ce->ring);
unpin_map:
i915_gem_object_unpin_map(ce->state->obj);
unpin_vma:
- __i915_vma_unpin(ce->state);
+ __context_unpin(ce->state);
err:
- ce->pin_count = 0;
- return ERR_PTR(ret);
+ return ret;
}
-static const struct intel_context_ops execlists_context_ops = {
- .unpin = execlists_context_unpin,
- .destroy = execlists_context_destroy,
-};
-
-static struct intel_context *
-execlists_context_pin(struct intel_engine_cs *engine,
- struct i915_gem_context *ctx)
+static int execlists_context_pin(struct intel_context *ce)
{
- struct intel_context *ce = to_intel_context(ctx, engine);
-
- lockdep_assert_held(&ctx->i915->drm.struct_mutex);
- GEM_BUG_ON(!ctx->ppgtt);
+ return __execlists_context_pin(ce, ce->engine);
+}
- if (likely(ce->pin_count++))
- return ce;
- GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
+static void execlists_context_reset(struct intel_context *ce)
+{
+ /*
+ * Because we emit WA_TAIL_DWORDS there may be a disparity
+ * between our bookkeeping in ce->ring->head and ce->ring->tail and
+ * that stored in context. As we only write new commands from
+ * ce->ring->tail onwards, everything before that is junk. If the GPU
+ * starts reading from its RING_HEAD from the context, it may try to
+ * execute that junk and die.
+ *
+ * The contexts that are stilled pinned on resume belong to the
+ * kernel, and are local to each engine. All other contexts will
+ * have their head/tail sanitized upon pinning before use, so they
+ * will never see garbage,
+ *
+ * So to avoid that we reset the context images upon resume. For
+ * simplicity, we just zero everything out.
+ */
+ intel_ring_reset(ce->ring, 0);
+ __execlists_update_reg_state(ce, ce->engine);
+}
- ce->ops = &execlists_context_ops;
+static const struct intel_context_ops execlists_context_ops = {
+ .pin = execlists_context_pin,
+ .unpin = execlists_context_unpin,
- return __execlists_context_pin(engine, ctx, ce);
-}
+ .reset = execlists_context_reset,
+ .destroy = execlists_context_destroy,
+};
static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
@@ -1387,6 +1345,10 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
*cs++ = rq->fence.seqno - 1;
intel_ring_advance(rq, cs);
+
+ /* Record the updated position of the request's payload */
+ rq->infix = intel_ring_offset(rq, cs);
+
return 0;
}
@@ -1424,10 +1386,11 @@ static int emit_pdps(struct i915_request *rq)
*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
for (i = GEN8_3LVL_PDPES; i--; ) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
+ u32 base = engine->mmio_base;
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
*cs++ = lower_32_bits(pd_daddr);
}
*cs++ = MI_NOOP;
@@ -1447,7 +1410,7 @@ static int execlists_request_alloc(struct i915_request *request)
{
int ret;
- GEM_BUG_ON(!request->hw_context->pin_count);
+ GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
/*
* Flush enough space to reduce the likelihood of waiting after
@@ -1465,7 +1428,7 @@ static int execlists_request_alloc(struct i915_request *request)
*/
/* Unconditionally invalidate GPU caches and TLBs. */
- if (i915_vm_is_48bit(&request->gem_context->ppgtt->vm))
+ if (i915_vm_is_4lvl(&request->gem_context->ppgtt->vm))
ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
else
ret = emit_pdps(request);
@@ -1732,7 +1695,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
unsigned int i;
int ret;
- if (GEM_DEBUG_WARN_ON(engine->id != RCS))
+ if (GEM_DEBUG_WARN_ON(engine->id != RCS0))
return -EINVAL;
switch (INTEL_GEN(engine->i915)) {
@@ -1796,17 +1759,9 @@ static void enable_execlists(struct intel_engine_cs *engine)
intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
- /*
- * Make sure we're not enabling the new 12-deep CSB
- * FIFO as that requires a slightly updated handling
- * in the ctx switch irq. Since we're currently only
- * using only 2 elements of the enhanced execlists the
- * deeper FIFO it's not needed and it's not worth adding
- * more statements to the irq handler to support it.
- */
if (INTEL_GEN(dev_priv) >= 11)
I915_WRITE(RING_MODE_GEN7(engine),
- _MASKED_BIT_DISABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+ _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
else
I915_WRITE(RING_MODE_GEN7(engine),
_MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
@@ -1872,20 +1827,72 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
__tasklet_disable_sync_once(&execlists->tasklet);
GEM_BUG_ON(!reset_in_progress(execlists));
+ intel_engine_stop_cs(engine);
+
/* And flush any current direct submission. */
spin_lock_irqsave(&engine->timeline.lock, flags);
- process_csb(engine); /* drain preemption events */
spin_unlock_irqrestore(&engine->timeline.lock, flags);
}
-static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
+static bool lrc_regs_ok(const struct i915_request *rq)
+{
+ const struct intel_ring *ring = rq->ring;
+ const u32 *regs = rq->hw_context->lrc_reg_state;
+
+ /* Quick spot check for the common signs of context corruption */
+
+ if (regs[CTX_RING_BUFFER_CONTROL + 1] !=
+ (RING_CTL_SIZE(ring->size) | RING_VALID))
+ return false;
+
+ if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma))
+ return false;
+
+ return true;
+}
+
+static void reset_csb_pointers(struct intel_engine_execlists *execlists)
+{
+ const unsigned int reset_value = execlists->csb_size - 1;
+
+ /*
+ * After a reset, the HW starts writing into CSB entry [0]. We
+ * therefore have to set our HEAD pointer back one entry so that
+ * the *first* entry we check is entry 0. To complicate this further,
+ * as we don't wait for the first interrupt after reset, we have to
+ * fake the HW write to point back to the last entry so that our
+ * inline comparison of our cached head position against the last HW
+ * write works even before the first interrupt.
+ */
+ execlists->csb_head = reset_value;
+ WRITE_ONCE(*execlists->csb_write, reset_value);
+ wmb(); /* Make sure this is visible to HW (paranoia?) */
+
+ invalidate_csb_entries(&execlists->csb_status[0],
+ &execlists->csb_status[reset_value]);
+}
+
+static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct intel_context *ce;
struct i915_request *rq;
- unsigned long flags;
u32 *regs;
- spin_lock_irqsave(&engine->timeline.lock, flags);
+ process_csb(engine); /* drain preemption events */
+
+ /* Following the reset, we need to reload the CSB read/write pointers */
+ reset_csb_pointers(&engine->execlists);
+
+ /*
+ * Save the currently executing context, even if we completed
+ * its request, it was still running at the time of the
+ * reset and will have been clobbered.
+ */
+ if (!port_isset(execlists->port))
+ goto out_clear;
+
+ ce = port_request(execlists->port)->hw_context;
/*
* Catch up with any missed context-switch interrupts.
@@ -1900,17 +1907,28 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
/* Push back any incomplete requests for replay after the reset. */
rq = __unwind_incomplete_requests(engine);
+ if (!rq)
+ goto out_replay;
- /* Following the reset, we need to reload the CSB read/write pointers */
- reset_csb_pointers(&engine->execlists);
+ if (rq->hw_context != ce) { /* caught just before a CS event */
+ rq = NULL;
+ goto out_replay;
+ }
- GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
- engine->name,
- rq ? rq->global_seqno : 0,
- intel_engine_get_seqno(engine),
- yesno(stalled));
- if (!rq)
- goto out_unlock;
+ /*
+ * If this request hasn't started yet, e.g. it is waiting on a
+ * semaphore, we need to avoid skipping the request or else we
+ * break the signaling chain. However, if the context is corrupt
+ * the request will not restart and we will be stuck with a wedged
+ * device. It is quite often the case that if we issue a reset
+ * while the GPU is loading the context image, that the context
+ * image becomes corrupt.
+ *
+ * Otherwise, if we have not started yet, the request should replay
+ * perfectly and we do not need to flag the result as being erroneous.
+ */
+ if (!i915_request_started(rq) && lrc_regs_ok(rq))
+ goto out_replay;
/*
* If the request was innocent, we leave the request in the ELSP
@@ -1924,8 +1942,8 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
* image back to the expected values to skip over the guilty request.
*/
i915_reset_request(rq, stalled);
- if (!stalled)
- goto out_unlock;
+ if (!stalled && lrc_regs_ok(rq))
+ goto out_replay;
/*
* We want a simple context + ring to execute the breadcrumb update.
@@ -1935,21 +1953,103 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
* future request will be after userspace has had the opportunity
* to recreate its own state.
*/
- regs = rq->hw_context->lrc_reg_state;
+ regs = ce->lrc_reg_state;
if (engine->pinned_default_state) {
memcpy(regs, /* skip restoring the vanilla PPHWSP */
engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
engine->context_size - PAGE_SIZE);
}
+ execlists_init_reg_state(regs, ce, engine, ce->ring);
- /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
- rq->ring->head = intel_ring_wrap(rq->ring, rq->postfix);
- intel_ring_update_space(rq->ring);
+ /* Rerun the request; its payload has been neutered (if guilty). */
+out_replay:
+ ce->ring->head =
+ rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
+ intel_ring_update_space(ce->ring);
+ __execlists_update_reg_state(ce, engine);
+
+out_clear:
+ execlists_clear_all_active(execlists);
+}
+
+static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
+{
+ unsigned long flags;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ spin_lock_irqsave(&engine->timeline.lock, flags);
- execlists_init_reg_state(regs, rq->gem_context, engine, rq->ring);
- __execlists_update_reg_state(engine, rq->hw_context);
+ __execlists_reset(engine, stalled);
+
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
+static void nop_submission_tasklet(unsigned long data)
+{
+ /* The driver is wedged; don't process any more events. */
+}
+
+static void execlists_cancel_requests(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct i915_request *rq, *rn;
+ struct rb_node *rb;
+ unsigned long flags;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ /*
+ * Before we call engine->cancel_requests(), we should have exclusive
+ * access to the submission state. This is arranged for us by the
+ * caller disabling the interrupt generation, the tasklet and other
+ * threads that may then access the same state, giving us a free hand
+ * to reset state. However, we still need to let lockdep be aware that
+ * we know this state may be accessed in hardirq context, so we
+ * disable the irq around this manipulation and we want to keep
+ * the spinlock focused on its duties and not accidentally conflate
+ * coverage to the submission's irq state. (Similarly, although we
+ * shouldn't need to disable irq around the manipulation of the
+ * submission's irq state, we also wish to remind ourselves that
+ * it is irq state.)
+ */
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+
+ __execlists_reset(engine, true);
+
+ /* Mark all executing requests as skipped. */
+ list_for_each_entry(rq, &engine->timeline.requests, link) {
+ if (!i915_request_signaled(rq))
+ dma_fence_set_error(&rq->fence, -EIO);
+
+ i915_request_mark_complete(rq);
+ }
+
+ /* Flush the queued requests to the timeline list (for retiring). */
+ while ((rb = rb_first_cached(&execlists->queue))) {
+ struct i915_priolist *p = to_priolist(rb);
+ int i;
+
+ priolist_for_each_request_consume(rq, rn, p, i) {
+ list_del_init(&rq->sched.link);
+ __i915_request_submit(rq);
+ dma_fence_set_error(&rq->fence, -EIO);
+ i915_request_mark_complete(rq);
+ }
+
+ rb_erase_cached(&p->node, &execlists->queue);
+ i915_priolist_free(p);
+ }
+
+ /* Remaining _unready_ requests will be nop'ed when submitted */
+
+ execlists->queue_priority_hint = INT_MIN;
+ execlists->queue = RB_ROOT_CACHED;
+ GEM_BUG_ON(port_isset(execlists->port));
+
+ GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
+ execlists->tasklet.func = nop_submission_tasklet;
-out_unlock:
spin_unlock_irqrestore(&engine->timeline.lock, flags);
}
@@ -1961,13 +2061,14 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
* After a GPU reset, we may have requests to replay. Do so now while
* we still have the forcewake to be sure that the GPU is not allowed
* to sleep before we restart and reload a context.
- *
*/
GEM_BUG_ON(!reset_in_progress(execlists));
if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
execlists->tasklet.func(execlists->tasklet.data);
- tasklet_enable(&execlists->tasklet);
+ if (__tasklet_enable(&execlists->tasklet))
+ /* And kick in case we missed a new request submission. */
+ tasklet_hi_schedule(&execlists->tasklet);
GEM_TRACE("%s: depth->%d\n", engine->name,
atomic_read(&execlists->tasklet.count));
}
@@ -1978,7 +2079,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
{
u32 *cs;
- cs = intel_ring_begin(rq, 6);
+ cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
@@ -1989,19 +2090,37 @@ static int gen8_emit_bb_start(struct i915_request *rq,
* particular all the gen that do not need the w/a at all!), if we
* took care to make sure that on every switch into this context
* (both ordinary and for preemption) that arbitrartion was enabled
- * we would be fine. However, there doesn't seem to be a downside to
- * being paranoid and making sure it is set before each batch and
- * every context-switch.
- *
- * Note that if we fail to enable arbitration before the request
- * is complete, then we do not see the context-switch interrupt and
- * the engine hangs (with RING_HEAD == RING_TAIL).
- *
- * That satisfies both the GPGPU w/a and our heavy-handed paranoia.
+ * we would be fine. However, for gen8 there is another w/a that
+ * requires us to not preempt inside GPGPU execution, so we keep
+ * arbitration disabled for gen8 batches. Arbitration will be
+ * re-enabled before we close the request
+ * (engine->emit_fini_breadcrumb).
*/
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+ /* FIXME(BDW+): Address space and security selectors. */
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+ (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int gen9_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
- /* FIXME(BDW): Address space and security selectors. */
*cs++ = MI_BATCH_BUFFER_START_GEN8 |
(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
*cs++ = lower_32_bits(offset);
@@ -2017,16 +2136,14 @@ static int gen8_emit_bb_start(struct i915_request *rq,
static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
{
- struct drm_i915_private *dev_priv = engine->i915;
- I915_WRITE_IMR(engine,
- ~(engine->irq_enable_mask | engine->irq_keep_mask));
- POSTING_READ_FW(RING_IMR(engine->mmio_base));
+ ENGINE_WRITE(engine, RING_IMR,
+ ~(engine->irq_enable_mask | engine->irq_keep_mask));
+ ENGINE_POSTING_READ(engine, RING_IMR);
}
static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
{
- struct drm_i915_private *dev_priv = engine->i915;
- I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
+ ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
}
static int gen8_emit_flush(struct i915_request *request, u32 mode)
@@ -2148,16 +2265,16 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
{
- /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
- BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
-
cs = gen8_emit_ggtt_write(cs,
request->fence.seqno,
- request->timeline->hwsp_offset);
+ request->timeline->hwsp_offset,
+ 0);
cs = gen8_emit_ggtt_write(cs,
- request->global_seqno,
- intel_hws_seqno_address(request->engine));
+ intel_engine_next_hangcheck_seqno(request->engine),
+ I915_GEM_HWS_HANGCHECK_ADDR,
+ MI_FLUSH_DW_STORE_INDEX);
+
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -2180,9 +2297,9 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
PIPE_CONTROL_CS_STALL);
cs = gen8_emit_ggtt_write_rcs(cs,
- request->global_seqno,
- intel_hws_seqno_address(request->engine),
- PIPE_CONTROL_CS_STALL);
+ intel_engine_next_hangcheck_seqno(request->engine),
+ I915_GEM_HWS_HANGCHECK_ADDR,
+ PIPE_CONTROL_STORE_DATA_INDEX);
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -2231,7 +2348,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
dev_priv = engine->i915;
if (engine->buffer) {
- WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0);
+ WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
}
if (engine->cleanup)
@@ -2254,19 +2371,18 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->execlists.tasklet.func = execlists_submission_tasklet;
engine->reset.prepare = execlists_reset_prepare;
+ engine->reset.reset = execlists_reset;
+ engine->reset.finish = execlists_reset_finish;
engine->park = NULL;
engine->unpark = NULL;
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (engine->i915->preempt_context)
+ if (!intel_vgpu_active(engine->i915))
+ engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
+ if (engine->preempt_context &&
+ HAS_LOGICAL_RING_PREEMPTION(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
-
- engine->i915->caps.scheduler =
- I915_SCHEDULER_CAP_ENABLED |
- I915_SCHEDULER_CAP_PRIORITY;
- if (intel_engine_has_preemption(engine))
- engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION;
}
static void
@@ -2279,7 +2395,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->reset.reset = execlists_reset;
engine->reset.finish = execlists_reset_finish;
- engine->context_pin = execlists_context_pin;
+ engine->cops = &execlists_context_ops;
engine->request_alloc = execlists_request_alloc;
engine->emit_flush = gen8_emit_flush;
@@ -2299,7 +2415,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
* until a more refined solution exists.
*/
}
- engine->emit_bb_start = gen8_emit_bb_start;
+ if (IS_GEN(engine->i915, 8))
+ engine->emit_bb_start = gen8_emit_bb_start;
+ else
+ engine->emit_bb_start = gen9_emit_bb_start;
}
static inline void
@@ -2309,11 +2428,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
if (INTEL_GEN(engine->i915) < 11) {
const u8 irq_shifts[] = {
- [RCS] = GEN8_RCS_IRQ_SHIFT,
- [BCS] = GEN8_BCS_IRQ_SHIFT,
- [VCS] = GEN8_VCS1_IRQ_SHIFT,
- [VCS2] = GEN8_VCS2_IRQ_SHIFT,
- [VECS] = GEN8_VECS_IRQ_SHIFT,
+ [RCS0] = GEN8_RCS_IRQ_SHIFT,
+ [BCS0] = GEN8_BCS_IRQ_SHIFT,
+ [VCS0] = GEN8_VCS0_IRQ_SHIFT,
+ [VCS1] = GEN8_VCS1_IRQ_SHIFT,
+ [VECS0] = GEN8_VECS_IRQ_SHIFT,
};
shift = irq_shifts[engine->id];
@@ -2348,6 +2467,7 @@ static int logical_ring_init(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
struct intel_engine_execlists * const execlists = &engine->execlists;
+ u32 base = engine->mmio_base;
int ret;
ret = intel_engine_init_common(engine);
@@ -2357,23 +2477,19 @@ static int logical_ring_init(struct intel_engine_cs *engine)
intel_engine_init_workarounds(engine);
if (HAS_LOGICAL_RING_ELSQ(i915)) {
- execlists->submit_reg = i915->regs +
- i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine));
- execlists->ctrl_reg = i915->regs +
- i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine));
+ execlists->submit_reg = i915->uncore.regs +
+ i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
+ execlists->ctrl_reg = i915->uncore.regs +
+ i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
} else {
- execlists->submit_reg = i915->regs +
- i915_mmio_reg_offset(RING_ELSP(engine));
+ execlists->submit_reg = i915->uncore.regs +
+ i915_mmio_reg_offset(RING_ELSP(base));
}
execlists->preempt_complete_status = ~0u;
- if (i915->preempt_context) {
- struct intel_context *ce =
- to_intel_context(i915->preempt_context, engine);
-
+ if (engine->preempt_context)
execlists->preempt_complete_status =
- upper_32_bits(ce->lrc_desc);
- }
+ upper_32_bits(engine->preempt_context->lrc_desc);
execlists->csb_status =
&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
@@ -2381,6 +2497,11 @@ static int logical_ring_init(struct intel_engine_cs *engine)
execlists->csb_write =
&engine->status_page.addr[intel_hws_csb_write_index(i915)];
+ if (INTEL_GEN(engine->i915) < 11)
+ execlists->csb_size = GEN8_CSB_ENTRIES;
+ else
+ execlists->csb_size = GEN11_CSB_ENTRIES;
+
reset_csb_pointers(execlists);
return 0;
@@ -2592,13 +2713,13 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
}
static void execlists_init_reg_state(u32 *regs,
- struct i915_gem_context *ctx,
+ struct intel_context *ce,
struct intel_engine_cs *engine,
struct intel_ring *ring)
{
- struct drm_i915_private *dev_priv = engine->i915;
- u32 base = engine->mmio_base;
+ struct i915_hw_ppgtt *ppgtt = ce->gem_context->ppgtt;
bool rcs = engine->class == RENDER_CLASS;
+ u32 base = engine->mmio_base;
/* A context is actually a big batch buffer with several
* MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
@@ -2610,10 +2731,10 @@ static void execlists_init_reg_state(u32 *regs,
regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
MI_LRI_FORCE_POSTED;
- CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine),
+ CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
- if (INTEL_GEN(dev_priv) < 11) {
+ if (INTEL_GEN(engine->i915) < 11) {
regs[CTX_CONTEXT_CONTROL + 1] |=
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
@@ -2659,42 +2780,42 @@ static void execlists_init_reg_state(u32 *regs,
CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
/* PDP values well be assigned later if needed */
- CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), 0);
- CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), 0);
- CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), 0);
- CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), 0);
- CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), 0);
- CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), 0);
- CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0);
- CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0);
-
- if (i915_vm_is_48bit(&ctx->ppgtt->vm)) {
+ CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
+ CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
+ CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
+ CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
+ CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
+ CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
+ CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
+ CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
+
+ if (i915_vm_is_4lvl(&ppgtt->vm)) {
/* 64b PPGTT (48bit canonical)
* PDP0_DESCRIPTOR contains the base address to PML4 and
* other PDP Descriptors are ignored.
*/
- ASSIGN_CTX_PML4(ctx->ppgtt, regs);
+ ASSIGN_CTX_PML4(ppgtt, regs);
} else {
- ASSIGN_CTX_PDP(ctx->ppgtt, regs, 3);
- ASSIGN_CTX_PDP(ctx->ppgtt, regs, 2);
- ASSIGN_CTX_PDP(ctx->ppgtt, regs, 1);
- ASSIGN_CTX_PDP(ctx->ppgtt, regs, 0);
+ ASSIGN_CTX_PDP(ppgtt, regs, 3);
+ ASSIGN_CTX_PDP(ppgtt, regs, 2);
+ ASSIGN_CTX_PDP(ppgtt, regs, 1);
+ ASSIGN_CTX_PDP(ppgtt, regs, 0);
}
if (rcs) {
regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
- i915_oa_init_reg_state(engine, ctx, regs);
+ i915_oa_init_reg_state(engine, ce, regs);
}
regs[CTX_END] = MI_BATCH_BUFFER_END;
- if (INTEL_GEN(dev_priv) >= 10)
+ if (INTEL_GEN(engine->i915) >= 10)
regs[CTX_END] |= BIT(0);
}
static int
-populate_lr_context(struct i915_gem_context *ctx,
+populate_lr_context(struct intel_context *ce,
struct drm_i915_gem_object *ctx_obj,
struct intel_engine_cs *engine,
struct intel_ring *ring)
@@ -2703,19 +2824,12 @@ populate_lr_context(struct i915_gem_context *ctx,
u32 *regs;
int ret;
- ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
- if (ret) {
- DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
- return ret;
- }
-
vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
if (IS_ERR(vaddr)) {
ret = PTR_ERR(vaddr);
DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
return ret;
}
- ctx_obj->mm.dirty = true;
if (engine->default_state) {
/*
@@ -2740,23 +2854,35 @@ populate_lr_context(struct i915_gem_context *ctx,
/* The second page of the context object contains some fields which must
* be set up prior to the first execution. */
regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
- execlists_init_reg_state(regs, ctx, engine, ring);
+ execlists_init_reg_state(regs, ce, engine, ring);
if (!engine->default_state)
regs[CTX_CONTEXT_CONTROL + 1] |=
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
- if (ctx == ctx->i915->preempt_context && INTEL_GEN(engine->i915) < 11)
+ if (ce->gem_context == engine->i915->preempt_context &&
+ INTEL_GEN(engine->i915) < 11)
regs[CTX_CONTEXT_CONTROL + 1] |=
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
+ ret = 0;
err_unpin_ctx:
+ __i915_gem_object_flush_map(ctx_obj,
+ LRC_HEADER_PAGES * PAGE_SIZE,
+ engine->context_size);
i915_gem_object_unpin_map(ctx_obj);
return ret;
}
-static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine,
- struct intel_context *ce)
+static struct i915_timeline *get_timeline(struct i915_gem_context *ctx)
+{
+ if (ctx->timeline)
+ return i915_timeline_get(ctx->timeline);
+ else
+ return i915_timeline_create(ctx->i915, NULL);
+}
+
+static int execlists_context_deferred_alloc(struct intel_context *ce,
+ struct intel_engine_cs *engine)
{
struct drm_i915_gem_object *ctx_obj;
struct i915_vma *vma;
@@ -2776,30 +2902,32 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
*/
context_size += LRC_HEADER_PAGES * PAGE_SIZE;
- ctx_obj = i915_gem_object_create(ctx->i915, context_size);
+ ctx_obj = i915_gem_object_create(engine->i915, context_size);
if (IS_ERR(ctx_obj))
return PTR_ERR(ctx_obj);
- vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.vm, NULL);
+ vma = i915_vma_instance(ctx_obj, &engine->i915->ggtt.vm, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto error_deref_obj;
}
- timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
+ timeline = get_timeline(ce->gem_context);
if (IS_ERR(timeline)) {
ret = PTR_ERR(timeline);
goto error_deref_obj;
}
- ring = intel_engine_create_ring(engine, timeline, ctx->ring_size);
+ ring = intel_engine_create_ring(engine,
+ timeline,
+ ce->gem_context->ring_size);
i915_timeline_put(timeline);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
goto error_deref_obj;
}
- ret = populate_lr_context(ctx, ctx_obj, engine, ring);
+ ret = populate_lr_context(ce, ctx_obj, engine, ring);
if (ret) {
DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
goto error_ring_free;
@@ -2811,45 +2939,12 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
return 0;
error_ring_free:
- intel_ring_free(ring);
+ intel_ring_put(ring);
error_deref_obj:
i915_gem_object_put(ctx_obj);
return ret;
}
-void intel_lr_context_resume(struct drm_i915_private *i915)
-{
- struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
- enum intel_engine_id id;
-
- /*
- * Because we emit WA_TAIL_DWORDS there may be a disparity
- * between our bookkeeping in ce->ring->head and ce->ring->tail and
- * that stored in context. As we only write new commands from
- * ce->ring->tail onwards, everything before that is junk. If the GPU
- * starts reading from its RING_HEAD from the context, it may try to
- * execute that junk and die.
- *
- * So to avoid that we reset the context images upon resume. For
- * simplicity, we just zero everything out.
- */
- list_for_each_entry(ctx, &i915->contexts.list, link) {
- for_each_engine(engine, i915, id) {
- struct intel_context *ce =
- to_intel_context(ctx, engine);
-
- if (!ce->state)
- continue;
-
- intel_ring_reset(ce->ring, 0);
-
- if (ce->pin_count) /* otherwise done in context_pin */
- __execlists_update_reg_state(engine, ce);
- }
- }
-}
-
void intel_execlists_show_requests(struct intel_engine_cs *engine,
struct drm_printer *m,
void (*show_request)(struct drm_printer *m,
@@ -2910,6 +3005,37 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
spin_unlock_irqrestore(&engine->timeline.lock, flags);
}
+void intel_lr_context_reset(struct intel_engine_cs *engine,
+ struct intel_context *ce,
+ u32 head,
+ bool scrub)
+{
+ /*
+ * We want a simple context + ring to execute the breadcrumb update.
+ * We cannot rely on the context being intact across the GPU hang,
+ * so clear it and rebuild just what we need for the breadcrumb.
+ * All pending requests for this context will be zapped, and any
+ * future request will be after userspace has had the opportunity
+ * to recreate its own state.
+ */
+ if (scrub) {
+ u32 *regs = ce->lrc_reg_state;
+
+ if (engine->pinned_default_state) {
+ memcpy(regs, /* skip restoring the vanilla PPHWSP */
+ engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+ engine->context_size - PAGE_SIZE);
+ }
+ execlists_init_reg_state(regs, ce, engine, ce->ring);
+ }
+
+ /* Rerun the request; its payload has been neutered (if guilty). */
+ ce->ring->head = head;
+ intel_ring_update_space(ce->ring);
+
+ __execlists_update_reg_state(ce, engine);
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/intel_lrc.c"
#endif