drm/i915: add infrastructure to hold off preemption on a request

We want to set this flag in the next commit on requests containing perf queries so that the result of the perf query can just be a delta of global counters, rather than doing post processing of the OA buffer. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> [ickle: add basic selftest for nopreempt] Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190709164227.25859-1-chris@chris-wilson.co.uk
2019-07-09 17:42:27 +01:00 · 2019-07-09 17:42:27 +01:00 · 2a98f4e65b
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@ -258,6 +258,17 @@ static int effective_prio(const struct i915_request *rq)
 {
 	int prio = rq_prio(rq);

+	/*
+	 * If this request is special and must not be interrupted at any
+	 * cost, so be it. Note we are only checking the most recent request
+	 * in the context and so may be masking an earlier vip request. It
+	 * is hoped that under the conditions where nopreempt is used, this
+	 * will not matter (i.e. all requests to that context will be
+	 * nopreempt for as long as desired).
+	 */
+	if (i915_request_has_nopreempt(rq))
+		prio = I915_PRIORITY_UNPREEMPTABLE;
+
 	/*
 	 * On unwinding the active request, we give it a priority bump
 	 * if it has completed waiting on any semaphore. If we know that
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@ -721,6 +721,114 @@ static void preempt_client_fini(struct preempt_client *c)
 	kernel_context_close(c->ctx);
 }

+static int live_nopreempt(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct preempt_client a, b;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	int err = -ENOMEM;
+
+	/*
+	 * Verify that we can disable preemption for an individual request
+	 * that may be being observed and not want to be interrupted.
+	 */
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	if (preempt_client_init(i915, &a))
+		goto err_unlock;
+	if (preempt_client_init(i915, &b))
+		goto err_client_a;
+	b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
+
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq_a, *rq_b;
+
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
+		engine->execlists.preempt_hang.count = 0;
+
+		rq_a = igt_spinner_create_request(&a.spin,
+						  a.ctx, engine,
+						  MI_ARB_CHECK);
+		if (IS_ERR(rq_a)) {
+			err = PTR_ERR(rq_a);
+			goto err_client_b;
+		}
+
+		/* Low priority client, but unpreemptable! */
+		rq_a->flags |= I915_REQUEST_NOPREEMPT;
+
+		i915_request_add(rq_a);
+		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
+			pr_err("First client failed to start\n");
+			goto err_wedged;
+		}
+
+		rq_b = igt_spinner_create_request(&b.spin,
+						  b.ctx, engine,
+						  MI_ARB_CHECK);
+		if (IS_ERR(rq_b)) {
+			err = PTR_ERR(rq_b);
+			goto err_client_b;
+		}
+
+		i915_request_add(rq_b);
+
+		/* B is much more important than A! (But A is unpreemptable.) */
+		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
+
+		/* Wait long enough for preemption and timeslicing */
+		if (igt_wait_for_spinner(&b.spin, rq_b)) {
+			pr_err("Second client started too early!\n");
+			goto err_wedged;
+		}
+
+		igt_spinner_end(&a.spin);
+
+		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
+			pr_err("Second client failed to start\n");
+			goto err_wedged;
+		}
+
+		igt_spinner_end(&b.spin);
+
+		if (engine->execlists.preempt_hang.count) {
+			pr_err("Preemption recorded x%d; should have been suppressed!\n",
+			       engine->execlists.preempt_hang.count);
+			err = -EINVAL;
+			goto err_wedged;
+		}
+
+		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+			goto err_wedged;
+	}
+
+	err = 0;
+err_client_b:
+	preempt_client_fini(&b);
+err_client_a:
+	preempt_client_fini(&a);
+err_unlock:
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+
+err_wedged:
+	igt_spinner_end(&b.spin);
+	igt_spinner_end(&a.spin);
+	i915_gem_set_wedged(i915);
+	err = -EIO;
+	goto err_client_b;
+}
+
 static int live_suppress_self_preempt(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@ -2028,6 +2136,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_busywait_preempt),
 		SUBTEST(live_preempt),
 		SUBTEST(live_late_preempt),
+		SUBTEST(live_nopreempt),
 		SUBTEST(live_suppress_self_preempt),
 		SUBTEST(live_suppress_wait_preempt),
 		SUBTEST(live_chain_preempt),
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@ -17,6 +17,16 @@ enum {
 	I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY,
 	I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1,

+	/*
+	 * Requests containing performance queries must not be preempted by
+	 * another context. They get scheduled with their default priority and
+	 * once they reach the execlist ports we ensure that they stick on the
+	 * HW until finished by pretending that they have maximum priority,
+	 * i.e. nothing can have higher priority and force us to usurp the
+	 * active request.
+	 */
+	I915_PRIORITY_UNPREEMPTABLE = INT_MAX,
+
 	I915_PRIORITY_INVALID = INT_MIN
 };

--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@ -292,7 +292,7 @@ static bool i915_request_retire(struct i915_request *rq)
 		dma_fence_signal_locked(&rq->fence);
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
 		i915_request_cancel_breadcrumb(rq);
-	if (rq->waitboost) {
+	if (i915_request_has_waitboost(rq)) {
 		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
 		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
 	}
@ -684,7 +684,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	rq->file_priv = NULL;
 	rq->batch = NULL;
 	rq->capture_list = NULL;
-	rq->waitboost = false;
+	rq->flags = 0;
 	rq->execution_mask = ALL_ENGINES;

 	INIT_LIST_HEAD(&rq->active_list);
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@ -216,7 +216,9 @@ struct i915_request {
 	/** Time at which this request was emitted, in jiffies. */
 	unsigned long emitted_jiffies;

-	bool waitboost;
+	unsigned long flags;
+#define I915_REQUEST_WAITBOOST BIT(0)
+#define I915_REQUEST_NOPREEMPT BIT(1)

 	/** timeline->request entry for this request */
 	struct list_head link;
@ -430,6 +432,17 @@ static inline void i915_request_mark_complete(struct i915_request *rq)
 	rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */
 }

+static inline bool i915_request_has_waitboost(const struct i915_request *rq)
+{
+	return rq->flags & I915_REQUEST_WAITBOOST;
+}
+
+static inline bool i915_request_has_nopreempt(const struct i915_request *rq)
+{
+	/* Preemption should only be disabled very rarely */
+	return unlikely(rq->flags & I915_REQUEST_NOPREEMPT);
+}
+
 bool i915_retire_requests(struct drm_i915_private *i915);

 #endif /* I915_REQUEST_H */
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@ -707,6 +707,16 @@ static inline int rq_prio(const struct i915_request *rq)
 	return rq->sched.attr.priority | __NO_PREEMPTION;
 }

+static inline int effective_prio(const struct i915_request *rq)
+{
+	int prio = rq_prio(rq);
+
+	if (i915_request_has_nopreempt(rq))
+		prio = I915_PRIORITY_UNPREEMPTABLE;
+
+	return prio;
+}
+
 static struct i915_request *schedule_in(struct i915_request *rq, int idx)
 {
 	trace_i915_request_in(rq, idx);
@ -747,7 +757,8 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
 				&engine->i915->guc.preempt_work[engine->id];
 			int prio = execlists->queue_priority_hint;

-			if (i915_scheduler_need_preempt(prio, rq_prio(last))) {
+			if (i915_scheduler_need_preempt(prio,
+							effective_prio(last))) {
 				intel_write_status_page(engine,
 							I915_GEM_HWS_PREEMPT,
 							GUC_PREEMPT_INPROGRESS);
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@ -6876,9 +6876,10 @@ void gen6_rps_boost(struct i915_request *rq)
 	/* Serializes with i915_request_retire() */
 	boost = false;
 	spin_lock_irqsave(&rq->lock, flags);
-	if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
+	if (!i915_request_has_waitboost(rq) &&
+	    !dma_fence_is_signaled_locked(&rq->fence)) {
 		boost = !atomic_fetch_inc(&rps->num_waiters);
-		rq->waitboost = true;
+		rq->flags |= I915_REQUEST_WAITBOOST;
 	}
 	spin_unlock_irqrestore(&rq->lock, flags);
 	if (!boost)