drm/i915/gtt: Downgrade gen7 (ivb, byt, hsw) back to aliasing-ppgtt

With the upcoming change in timing (dramatically reducing the latency between manipulating the ppGTT and execution), no amount of tweaking could save Baytrail, it would always fail to invalidate its TLB. Ville was right, Baytrail is beyond hope. v2: Rollback on all gen7; same timing instability on TLB invalidation. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Acked-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190830180000.24608-1-chris@chris-wilson.co.uk
2019-08-30 18:59:58 +01:00 · 2019-08-30 18:59:58 +01:00 · 3dc007fe9b
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@ -1741,46 +1741,22 @@ static int remap_l3(struct i915_request *rq)

 static int switch_context(struct i915_request *rq)
 {
-	struct intel_engine_cs *engine = rq->engine;
-	struct i915_address_space *vm = vm_alias(rq->hw_context);
-	unsigned int unwind_mm = 0;
-	u32 hw_flags = 0;
+	struct intel_context *ce = rq->hw_context;
+	struct i915_address_space *vm = vm_alias(ce);
 	int ret;

 	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));

 	if (vm) {
-		struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-		int loops;
-
-		/*
-		 * Baytail takes a little more convincing that it really needs
-		 * to reload the PD between contexts. It is not just a little
-		 * longer, as adding more stalls after the load_pd_dir (i.e.
-		 * adding a long loop around flush_pd_dir) is not as effective
-		 * as reloading the PD umpteen times. 32 is derived from
-		 * experimentation (gem_exec_parallel/fds) and has no good
-		 * explanation.
-		 */
-		loops = 1;
-		if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915))
-			loops = 32;
-
-		do {
-			ret = load_pd_dir(rq, ppgtt);
-			if (ret)
-				goto err;
-		} while (--loops);
-
-		if (ppgtt->pd_dirty_engines & engine->mask) {
-			unwind_mm = engine->mask;
-			ppgtt->pd_dirty_engines &= ~unwind_mm;
-			hw_flags = MI_FORCE_RESTORE;
-		}
+		ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm));
+		if (ret)
+			return ret;
 	}

-	if (rq->hw_context->state) {
-		GEM_BUG_ON(engine->id != RCS0);
+	if (ce->state) {
+		u32 hw_flags;
+
+		GEM_BUG_ON(rq->engine->id != RCS0);

 		/*
 		 * The kernel context(s) is treated as pure scratch and is not
@ -1789,22 +1765,25 @@ static int switch_context(struct i915_request *rq)
 		 * as nothing actually executes using the kernel context; it
 		 * is purely used for flushing user contexts.
 		 */
+		hw_flags = 0;
 		if (i915_gem_context_is_kernel(rq->gem_context))
 			hw_flags = MI_RESTORE_INHIBIT;

 		ret = mi_set_context(rq, hw_flags);
 		if (ret)
-			goto err_mm;
+			return ret;
 	}

 	if (vm) {
+		struct intel_engine_cs *engine = rq->engine;
+
 		ret = engine->emit_flush(rq, EMIT_INVALIDATE);
 		if (ret)
-			goto err_mm;
+			return ret;

 		ret = flush_pd_dir(rq);
 		if (ret)
-			goto err_mm;
+			return ret;

 		/*
 		 * Not only do we need a full barrier (post-sync write) after
@ -1816,24 +1795,18 @@ static int switch_context(struct i915_request *rq)
 		 */
 		ret = engine->emit_flush(rq, EMIT_INVALIDATE);
 		if (ret)
-			goto err_mm;
+			return ret;

 		ret = engine->emit_flush(rq, EMIT_FLUSH);
 		if (ret)
-			goto err_mm;
+			return ret;
 	}

 	ret = remap_l3(rq);
 	if (ret)
-		goto err_mm;
+		return ret;

 	return 0;
-
-err_mm:
-	if (unwind_mm)
-		i915_vm_to_ppgtt(vm)->pd_dirty_engines |= unwind_mm;
-err:
-	return ret;
 }

 static int ring_request_alloc(struct i915_request *request)
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@ -420,7 +420,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = {
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
 	.has_rps = true, \
-	.ppgtt_type = INTEL_PPGTT_FULL, \
+	.ppgtt_type = INTEL_PPGTT_ALIASING, \
 	.ppgtt_size = 31, \
 	IVB_PIPE_OFFSETS, \
 	IVB_CURSOR_OFFSETS, \
@ -476,7 +476,7 @@ static const struct intel_device_info intel_valleyview_info = {
 	.has_rps = true,
 	.display.has_gmch = 1,
 	.display.has_hotplug = 1,
-	.ppgtt_type = INTEL_PPGTT_FULL,
+	.ppgtt_type = INTEL_PPGTT_ALIASING,
 	.ppgtt_size = 31,
 	.has_snoop = true,
 	.has_coherent_ggtt = false,