drm/i915: Prepare object synchronisation for asynchronicity

We are about to specialize object synchronisation to enable nonblocking execbuf submission. First we make a copy of the current object synchronisation for execbuffer. The general i915_gem_object_sync() will be removed following the removal of CS flips in the near future. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: John Harrison <john.c.harrison@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160909131201.16673-16-chris@chris-wilson.co.uk
2016-09-09 14:11:56 +01:00 · 2016-09-09 14:11:56 +01:00 · a2bc4695bb
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@ -3221,8 +3221,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
 }
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
 			 struct drm_i915_gem_request *to);
 void i915_vma_move_to_active(struct i915_vma *vma,
 			     struct drm_i915_gem_request *req,
 			     unsigned int flags);
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@ -2818,97 +2818,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	return ret;
 }
 static int
 __i915_gem_object_sync(struct drm_i915_gem_request *to,
 		       struct drm_i915_gem_request *from)
 {
 	int ret;
 	if (to->engine == from->engine)
 		return 0;
 	if (!i915.semaphores) {
 		ret = i915_wait_request(from,
 					from->i915->mm.interruptible |
 					I915_WAIT_LOCKED,
 					NULL,
 					NO_WAITBOOST);
 		if (ret)
 			return ret;
 	} else {
 		int idx = intel_engine_sync_index(from->engine, to->engine);
 		if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
 			return 0;
 		trace_i915_gem_ring_sync_to(to, from);
 		ret = to->engine->semaphore.sync_to(to, from);
 		if (ret)
 			return ret;
 		from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
 	}
 	return 0;
 }
 /**
 * i915_gem_object_sync - sync an object to a ring.
 *
 * @obj: object which may be in use on another ring.
 * @to: request we are wishing to use
 *
 * This code is meant to abstract object synchronization with the GPU.
 * Conceptually we serialise writes between engines inside the GPU.
 * We only allow one engine to write into a buffer at any time, but
 * multiple readers. To ensure each has a coherent view of memory, we must:
 *
 * - If there is an outstanding write request to the object, the new
 *   request must wait for it to complete (either CPU or in hw, requests
 *   on the same ring will be naturally ordered).
 *
 * - If we are a write request (pending_write_domain is set), the new
 *   request must wait for outstanding read requests to complete.
 *
 * Returns 0 if successful, else propagates up the lower layer error.
 */
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
 		     struct drm_i915_gem_request *to)
 {
 	struct i915_gem_active *active;
 	unsigned long active_mask;
 	int idx;
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 	active_mask = i915_gem_object_get_active(obj);
 	if (!active_mask)
 		return 0;
 	if (obj->base.pending_write_domain) {
 		active = obj->last_read;
 	} else {
 		active_mask = 1;
 		active = &obj->last_write;
 	}
 	for_each_active(active_mask, idx) {
 		struct drm_i915_gem_request *request;
 		int ret;
 		request = i915_gem_active_peek(&active[idx],
 					       &obj->base.dev->struct_mutex);
 		if (!request)
 			continue;
 		ret = __i915_gem_object_sync(to, request);
 		if (ret)
 			return ret;
 	}
 	return 0;
 }
 static void __i915_vma_iounmap(struct i915_vma *vma)
 {
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@ -1133,7 +1133,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 		struct drm_i915_gem_object *obj = vma->obj;
 		if (obj->flags & other_rings) {
-			ret = i915_gem_object_sync(obj, req);
+			ret = i915_gem_request_await_object
 				(req, obj, obj->base.pending_write_domain);
 			if (ret)
 				return ret;
 		}
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@ -460,6 +460,93 @@ err:
 	return ERR_PTR(ret);
 }
 static int
 i915_gem_request_await_request(struct drm_i915_gem_request *to,
 			       struct drm_i915_gem_request *from)
 {
 	int idx, ret;
 	GEM_BUG_ON(to == from);
 	if (to->engine == from->engine)
 		return 0;
 	idx = intel_engine_sync_index(from->engine, to->engine);
 	if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
 		return 0;
 	trace_i915_gem_ring_sync_to(to, from);
 	if (!i915.semaphores) {
 		ret = i915_wait_request(from,
 					I915_WAIT_INTERRUPTIBLE |
 					I915_WAIT_LOCKED,
 					NULL, NO_WAITBOOST);
 		if (ret)
 			return ret;
 	} else {
 		ret = to->engine->semaphore.sync_to(to, from);
 		if (ret)
 			return ret;
 	}
 	from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
 	return 0;
 }
 /**
 * i915_gem_request_await_object - set this request to (async) wait upon a bo
 *
 * @to: request we are wishing to use
 * @obj: object which may be in use on another ring.
 *
 * This code is meant to abstract object synchronization with the GPU.
 * Conceptually we serialise writes between engines inside the GPU.
 * We only allow one engine to write into a buffer at any time, but
 * multiple readers. To ensure each has a coherent view of memory, we must:
 *
 * - If there is an outstanding write request to the object, the new
 *   request must wait for it to complete (either CPU or in hw, requests
 *   on the same ring will be naturally ordered).
 *
 * - If we are a write request (pending_write_domain is set), the new
 *   request must wait for outstanding read requests to complete.
 *
 * Returns 0 if successful, else propagates up the lower layer error.
 */
 int
 i915_gem_request_await_object(struct drm_i915_gem_request *to,
 			      struct drm_i915_gem_object *obj,
 			      bool write)
 {
 	struct i915_gem_active *active;
 	unsigned long active_mask;
 	int idx;
 	if (write) {
 		active_mask = i915_gem_object_get_active(obj);
 		active = obj->last_read;
 	} else {
 		active_mask = 1;
 		active = &obj->last_write;
 	}
 	for_each_active(active_mask, idx) {
 		struct drm_i915_gem_request *request;
 		int ret;
 		request = i915_gem_active_peek(&active[idx],
 					       &obj->base.dev->struct_mutex);
 		if (!request)
 			continue;
 		ret = i915_gem_request_await_request(to, request);
 		if (ret)
 			return ret;
 	}
 	return 0;
 }
 static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@ -209,6 +209,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
 	*pdst = src;
 }
 int
 i915_gem_request_await_object(struct drm_i915_gem_request *to,
 			      struct drm_i915_gem_object *obj,
 			      bool write);
 void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 #define i915_add_request(req) \
 	__i915_add_request(req, true)
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@ -12273,7 +12273,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 			goto cleanup_unpin;
 		}
-		ret = i915_gem_object_sync(obj, request);
+		ret = i915_gem_request_await_object(request, obj, false);
 		if (ret)
 			goto cleanup_request;