From 3204c343bb691c42a5e568cbd2a9ec9b2b5703c0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 27 Apr 2017 11:46:51 +0100 Subject: [PATCH] drm/i915: Defer context state allocation for legacy ring submission Almost from the outset for execlists, we used deferred allocation of the logical context and rings. Then we ported the infrastructure for pinning contexts back to legacy, and so now we are able to also implement deferred allocation for context objects prior to first use on the legacy submission. v2: We still need to differentiate between legacy engines, Joonas is fixing that but I want this first ;) (Joonas) Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170427104651.22394-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_context.c | 59 ------------------------- drivers/gpu/drm/i915/intel_ringbuffer.c | 50 +++++++++++++++++++++ 2 files changed, 50 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8bd0c4966913..d46a69d3d390 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -151,45 +151,6 @@ void i915_gem_context_free(struct kref *ctx_ref) kfree(ctx); } -static struct drm_i915_gem_object * -alloc_context_obj(struct drm_i915_private *dev_priv, u64 size) -{ - struct drm_i915_gem_object *obj; - int ret; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - obj = i915_gem_object_create(dev_priv, size); - if (IS_ERR(obj)) - return obj; - - /* - * Try to make the context utilize L3 as well as LLC. - * - * On VLV we don't have L3 controls in the PTEs so we - * shouldn't touch the cache level, especially as that - * would make the object snooped which might have a - * negative performance impact. - * - * Snooping is required on non-llc platforms in execlist - * mode, but since all GGTT accesses use PAT entry 0 we - * get snooping anyway regardless of cache_level. - * - * This is only applicable for Ivy Bridge devices since - * later platforms don't have L3 control bits in the PTE. - */ - if (IS_IVYBRIDGE(dev_priv)) { - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); - /* Failure shouldn't ever happen this early */ - if (WARN_ON(ret)) { - i915_gem_object_put(obj); - return ERR_PTR(ret); - } - } - - return obj; -} - static void context_close(struct i915_gem_context *ctx) { i915_gem_context_set_closed(ctx); @@ -266,26 +227,6 @@ __create_hw_context(struct drm_i915_private *dev_priv, list_add_tail(&ctx->link, &dev_priv->context_list); ctx->i915 = dev_priv; - if (dev_priv->hw_context_size) { - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - - obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size); - if (IS_ERR(obj)) { - ret = PTR_ERR(obj); - goto err_out; - } - - vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); - if (IS_ERR(vma)) { - i915_gem_object_put(obj); - ret = PTR_ERR(vma); - goto err_out; - } - - ctx->engine[RCS].state = vma; - } - /* Default context will never have a file_priv */ ret = DEFAULT_CONTEXT_HANDLE; if (file_priv) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 6836efb7e3d2..61f612454ce7 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1437,6 +1437,44 @@ static int context_pin(struct i915_gem_context *ctx) PIN_GLOBAL | PIN_HIGH); } +static struct i915_vma * +alloc_context_vma(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create(i915, i915->hw_context_size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + /* + * Try to make the context utilize L3 as well as LLC. + * + * On VLV we don't have L3 controls in the PTEs so we + * shouldn't touch the cache level, especially as that + * would make the object snooped which might have a + * negative performance impact. + * + * Snooping is required on non-llc platforms in execlist + * mode, but since all GGTT accesses use PAT entry 0 we + * get snooping anyway regardless of cache_level. + * + * This is only applicable for Ivy Bridge devices since + * later platforms don't have L3 control bits in the PTE. + */ + if (IS_IVYBRIDGE(i915)) { + /* Ignore any error, regard it as a simple optimisation */ + i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); + } + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + i915_gem_object_put(obj); + + return vma; +} + static int intel_ring_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { @@ -1449,6 +1487,18 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, return 0; GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ + if (engine->id == RCS && !ce->state && engine->i915->hw_context_size) { + struct i915_vma *vma; + + vma = alloc_context_vma(engine); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto error; + } + + ce->state = vma; + } + if (ce->state) { ret = context_pin(ctx); if (ret)