drm/i915: Pull i915_gem_active into the i915_active family
Looking forward, we need to break the struct_mutex dependency on i915_gem_active. In the meantime, external use of i915_gem_active is quite beguiling, little do new users suspect that it implies a barrier as each request it tracks must be ordered wrt the previous one. As one of many, it can be used to track activity across multiple timelines, a shared fence, which fits our unordered request submission much better. We need to steer external users away from the singular, exclusive fence imposed by i915_gem_active to i915_active instead. As part of that process, we move i915_gem_active out of i915_request.c into i915_active.c to start separating the two concepts, and rename it to i915_active_request (both to tie it to the concept of tracking just one request, and to give it a longer, less appealing name). Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190205130005.2807-5-chris@chris-wilson.co.uk
This commit is contained in:
Родитель
5f5c139d69
Коммит
21950ee7cc
|
@ -21,7 +21,7 @@ static struct i915_global_active {
|
|||
} global;
|
||||
|
||||
struct active_node {
|
||||
struct i915_gem_active base;
|
||||
struct i915_active_request base;
|
||||
struct i915_active *ref;
|
||||
struct rb_node node;
|
||||
u64 timeline;
|
||||
|
@ -33,7 +33,7 @@ __active_park(struct i915_active *ref)
|
|||
struct active_node *it, *n;
|
||||
|
||||
rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
|
||||
GEM_BUG_ON(i915_gem_active_isset(&it->base));
|
||||
GEM_BUG_ON(i915_active_request_isset(&it->base));
|
||||
kmem_cache_free(global.slab_cache, it);
|
||||
}
|
||||
ref->tree = RB_ROOT;
|
||||
|
@ -53,18 +53,18 @@ __active_retire(struct i915_active *ref)
|
|||
}
|
||||
|
||||
static void
|
||||
node_retire(struct i915_gem_active *base, struct i915_request *rq)
|
||||
node_retire(struct i915_active_request *base, struct i915_request *rq)
|
||||
{
|
||||
__active_retire(container_of(base, struct active_node, base)->ref);
|
||||
}
|
||||
|
||||
static void
|
||||
last_retire(struct i915_gem_active *base, struct i915_request *rq)
|
||||
last_retire(struct i915_active_request *base, struct i915_request *rq)
|
||||
{
|
||||
__active_retire(container_of(base, struct i915_active, last));
|
||||
}
|
||||
|
||||
static struct i915_gem_active *
|
||||
static struct i915_active_request *
|
||||
active_instance(struct i915_active *ref, u64 idx)
|
||||
{
|
||||
struct active_node *node;
|
||||
|
@ -85,7 +85,7 @@ active_instance(struct i915_active *ref, u64 idx)
|
|||
* twice for the same timeline (as the older rbtree element will be
|
||||
* retired before the new request added to last).
|
||||
*/
|
||||
old = i915_gem_active_raw(&ref->last, BKL(ref));
|
||||
old = i915_active_request_raw(&ref->last, BKL(ref));
|
||||
if (!old || old->fence.context == idx)
|
||||
goto out;
|
||||
|
||||
|
@ -110,7 +110,7 @@ active_instance(struct i915_active *ref, u64 idx)
|
|||
node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
|
||||
|
||||
/* kmalloc may retire the ref->last (thanks shrinker)! */
|
||||
if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) {
|
||||
if (unlikely(!i915_active_request_raw(&ref->last, BKL(ref)))) {
|
||||
kmem_cache_free(global.slab_cache, node);
|
||||
goto out;
|
||||
}
|
||||
|
@ -118,7 +118,7 @@ active_instance(struct i915_active *ref, u64 idx)
|
|||
if (unlikely(!node))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
init_request_active(&node->base, node_retire);
|
||||
i915_active_request_init(&node->base, NULL, node_retire);
|
||||
node->ref = ref;
|
||||
node->timeline = idx;
|
||||
|
||||
|
@ -133,7 +133,7 @@ replace:
|
|||
* callback not two, and so much undo the active counting for the
|
||||
* overwritten slot.
|
||||
*/
|
||||
if (i915_gem_active_isset(&node->base)) {
|
||||
if (i915_active_request_isset(&node->base)) {
|
||||
/* Retire ourselves from the old rq->active_list */
|
||||
__list_del_entry(&node->base.link);
|
||||
ref->count--;
|
||||
|
@ -154,7 +154,7 @@ void i915_active_init(struct drm_i915_private *i915,
|
|||
ref->i915 = i915;
|
||||
ref->retire = retire;
|
||||
ref->tree = RB_ROOT;
|
||||
init_request_active(&ref->last, last_retire);
|
||||
i915_active_request_init(&ref->last, NULL, last_retire);
|
||||
ref->count = 0;
|
||||
}
|
||||
|
||||
|
@ -162,15 +162,15 @@ int i915_active_ref(struct i915_active *ref,
|
|||
u64 timeline,
|
||||
struct i915_request *rq)
|
||||
{
|
||||
struct i915_gem_active *active;
|
||||
struct i915_active_request *active;
|
||||
|
||||
active = active_instance(ref, timeline);
|
||||
if (IS_ERR(active))
|
||||
return PTR_ERR(active);
|
||||
|
||||
if (!i915_gem_active_isset(active))
|
||||
if (!i915_active_request_isset(active))
|
||||
ref->count++;
|
||||
i915_gem_active_set(active, rq);
|
||||
__i915_active_request_set(active, rq);
|
||||
|
||||
GEM_BUG_ON(!ref->count);
|
||||
return 0;
|
||||
|
@ -196,12 +196,12 @@ int i915_active_wait(struct i915_active *ref)
|
|||
if (i915_active_acquire(ref))
|
||||
goto out_release;
|
||||
|
||||
ret = i915_gem_active_retire(&ref->last, BKL(ref));
|
||||
ret = i915_active_request_retire(&ref->last, BKL(ref));
|
||||
if (ret)
|
||||
goto out_release;
|
||||
|
||||
rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
|
||||
ret = i915_gem_active_retire(&it->base, BKL(ref));
|
||||
ret = i915_active_request_retire(&it->base, BKL(ref));
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
@ -211,11 +211,11 @@ out_release:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int __i915_request_await_active(struct i915_request *rq,
|
||||
struct i915_gem_active *active)
|
||||
int i915_request_await_active_request(struct i915_request *rq,
|
||||
struct i915_active_request *active)
|
||||
{
|
||||
struct i915_request *barrier =
|
||||
i915_gem_active_raw(active, &rq->i915->drm.struct_mutex);
|
||||
i915_active_request_raw(active, &rq->i915->drm.struct_mutex);
|
||||
|
||||
return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
|
||||
}
|
||||
|
@ -225,12 +225,12 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
|
|||
struct active_node *it, *n;
|
||||
int ret;
|
||||
|
||||
ret = __i915_request_await_active(rq, &ref->last);
|
||||
ret = i915_request_await_active_request(rq, &ref->last);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
|
||||
ret = __i915_request_await_active(rq, &it->base);
|
||||
ret = i915_request_await_active_request(rq, &it->base);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
@ -241,12 +241,32 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
|
|||
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
|
||||
void i915_active_fini(struct i915_active *ref)
|
||||
{
|
||||
GEM_BUG_ON(i915_gem_active_isset(&ref->last));
|
||||
GEM_BUG_ON(i915_active_request_isset(&ref->last));
|
||||
GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
|
||||
GEM_BUG_ON(ref->count);
|
||||
}
|
||||
#endif
|
||||
|
||||
int i915_active_request_set(struct i915_active_request *active,
|
||||
struct i915_request *rq)
|
||||
{
|
||||
int err;
|
||||
|
||||
/* Must maintain ordering wrt previous active requests */
|
||||
err = i915_request_await_active_request(rq, active);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
__i915_active_request_set(active, rq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void i915_active_retire_noop(struct i915_active_request *active,
|
||||
struct i915_request *request)
|
||||
{
|
||||
/* Space left intentionally blank */
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
||||
#include "selftests/i915_active.c"
|
||||
#endif
|
||||
|
|
|
@ -7,7 +7,354 @@
|
|||
#ifndef _I915_ACTIVE_H_
|
||||
#define _I915_ACTIVE_H_
|
||||
|
||||
#include <linux/lockdep.h>
|
||||
|
||||
#include "i915_active_types.h"
|
||||
#include "i915_request.h"
|
||||
|
||||
/*
|
||||
* We treat requests as fences. This is not be to confused with our
|
||||
* "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
|
||||
* We use the fences to synchronize access from the CPU with activity on the
|
||||
* GPU, for example, we should not rewrite an object's PTE whilst the GPU
|
||||
* is reading them. We also track fences at a higher level to provide
|
||||
* implicit synchronisation around GEM objects, e.g. set-domain will wait
|
||||
* for outstanding GPU rendering before marking the object ready for CPU
|
||||
* access, or a pageflip will wait until the GPU is complete before showing
|
||||
* the frame on the scanout.
|
||||
*
|
||||
* In order to use a fence, the object must track the fence it needs to
|
||||
* serialise with. For example, GEM objects want to track both read and
|
||||
* write access so that we can perform concurrent read operations between
|
||||
* the CPU and GPU engines, as well as waiting for all rendering to
|
||||
* complete, or waiting for the last GPU user of a "fence register". The
|
||||
* object then embeds a #i915_active_request to track the most recent (in
|
||||
* retirement order) request relevant for the desired mode of access.
|
||||
* The #i915_active_request is updated with i915_active_request_set() to
|
||||
* track the most recent fence request, typically this is done as part of
|
||||
* i915_vma_move_to_active().
|
||||
*
|
||||
* When the #i915_active_request completes (is retired), it will
|
||||
* signal its completion to the owner through a callback as well as mark
|
||||
* itself as idle (i915_active_request.request == NULL). The owner
|
||||
* can then perform any action, such as delayed freeing of an active
|
||||
* resource including itself.
|
||||
*/
|
||||
|
||||
void i915_active_retire_noop(struct i915_active_request *active,
|
||||
struct i915_request *request);
|
||||
|
||||
/**
|
||||
* i915_active_request_init - prepares the activity tracker for use
|
||||
* @active - the active tracker
|
||||
* @rq - initial request to track, can be NULL
|
||||
* @func - a callback when then the tracker is retired (becomes idle),
|
||||
* can be NULL
|
||||
*
|
||||
* i915_active_request_init() prepares the embedded @active struct for use as
|
||||
* an activity tracker, that is for tracking the last known active request
|
||||
* associated with it. When the last request becomes idle, when it is retired
|
||||
* after completion, the optional callback @func is invoked.
|
||||
*/
|
||||
static inline void
|
||||
i915_active_request_init(struct i915_active_request *active,
|
||||
struct i915_request *rq,
|
||||
i915_active_retire_fn retire)
|
||||
{
|
||||
RCU_INIT_POINTER(active->request, rq);
|
||||
INIT_LIST_HEAD(&active->link);
|
||||
active->retire = retire ?: i915_active_retire_noop;
|
||||
}
|
||||
|
||||
#define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL)
|
||||
|
||||
/**
|
||||
* i915_active_request_set - updates the tracker to watch the current request
|
||||
* @active - the active tracker
|
||||
* @request - the request to watch
|
||||
*
|
||||
* __i915_active_request_set() watches the given @request for completion. Whilst
|
||||
* that @request is busy, the @active reports busy. When that @request is
|
||||
* retired, the @active tracker is updated to report idle.
|
||||
*/
|
||||
static inline void
|
||||
__i915_active_request_set(struct i915_active_request *active,
|
||||
struct i915_request *request)
|
||||
{
|
||||
list_move(&active->link, &request->active_list);
|
||||
rcu_assign_pointer(active->request, request);
|
||||
}
|
||||
|
||||
int __must_check
|
||||
i915_active_request_set(struct i915_active_request *active,
|
||||
struct i915_request *rq);
|
||||
|
||||
/**
|
||||
* i915_active_request_set_retire_fn - updates the retirement callback
|
||||
* @active - the active tracker
|
||||
* @fn - the routine called when the request is retired
|
||||
* @mutex - struct_mutex used to guard retirements
|
||||
*
|
||||
* i915_active_request_set_retire_fn() updates the function pointer that
|
||||
* is called when the final request associated with the @active tracker
|
||||
* is retired.
|
||||
*/
|
||||
static inline void
|
||||
i915_active_request_set_retire_fn(struct i915_active_request *active,
|
||||
i915_active_retire_fn fn,
|
||||
struct mutex *mutex)
|
||||
{
|
||||
lockdep_assert_held(mutex);
|
||||
active->retire = fn ?: i915_active_retire_noop;
|
||||
}
|
||||
|
||||
static inline struct i915_request *
|
||||
__i915_active_request_peek(const struct i915_active_request *active)
|
||||
{
|
||||
/*
|
||||
* Inside the error capture (running with the driver in an unknown
|
||||
* state), we want to bend the rules slightly (a lot).
|
||||
*
|
||||
* Work is in progress to make it safer, in the meantime this keeps
|
||||
* the known issue from spamming the logs.
|
||||
*/
|
||||
return rcu_dereference_protected(active->request, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_active_request_raw - return the active request
|
||||
* @active - the active tracker
|
||||
*
|
||||
* i915_active_request_raw() returns the current request being tracked, or NULL.
|
||||
* It does not obtain a reference on the request for the caller, so the caller
|
||||
* must hold struct_mutex.
|
||||
*/
|
||||
static inline struct i915_request *
|
||||
i915_active_request_raw(const struct i915_active_request *active,
|
||||
struct mutex *mutex)
|
||||
{
|
||||
return rcu_dereference_protected(active->request,
|
||||
lockdep_is_held(mutex));
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_active_request_peek - report the active request being monitored
|
||||
* @active - the active tracker
|
||||
*
|
||||
* i915_active_request_peek() returns the current request being tracked if
|
||||
* still active, or NULL. It does not obtain a reference on the request
|
||||
* for the caller, so the caller must hold struct_mutex.
|
||||
*/
|
||||
static inline struct i915_request *
|
||||
i915_active_request_peek(const struct i915_active_request *active,
|
||||
struct mutex *mutex)
|
||||
{
|
||||
struct i915_request *request;
|
||||
|
||||
request = i915_active_request_raw(active, mutex);
|
||||
if (!request || i915_request_completed(request))
|
||||
return NULL;
|
||||
|
||||
return request;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_active_request_get - return a reference to the active request
|
||||
* @active - the active tracker
|
||||
*
|
||||
* i915_active_request_get() returns a reference to the active request, or NULL
|
||||
* if the active tracker is idle. The caller must hold struct_mutex.
|
||||
*/
|
||||
static inline struct i915_request *
|
||||
i915_active_request_get(const struct i915_active_request *active,
|
||||
struct mutex *mutex)
|
||||
{
|
||||
return i915_request_get(i915_active_request_peek(active, mutex));
|
||||
}
|
||||
|
||||
/**
|
||||
* __i915_active_request_get_rcu - return a reference to the active request
|
||||
* @active - the active tracker
|
||||
*
|
||||
* __i915_active_request_get() returns a reference to the active request,
|
||||
* or NULL if the active tracker is idle. The caller must hold the RCU read
|
||||
* lock, but the returned pointer is safe to use outside of RCU.
|
||||
*/
|
||||
static inline struct i915_request *
|
||||
__i915_active_request_get_rcu(const struct i915_active_request *active)
|
||||
{
|
||||
/*
|
||||
* Performing a lockless retrieval of the active request is super
|
||||
* tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
|
||||
* slab of request objects will not be freed whilst we hold the
|
||||
* RCU read lock. It does not guarantee that the request itself
|
||||
* will not be freed and then *reused*. Viz,
|
||||
*
|
||||
* Thread A Thread B
|
||||
*
|
||||
* rq = active.request
|
||||
* retire(rq) -> free(rq);
|
||||
* (rq is now first on the slab freelist)
|
||||
* active.request = NULL
|
||||
*
|
||||
* rq = new submission on a new object
|
||||
* ref(rq)
|
||||
*
|
||||
* To prevent the request from being reused whilst the caller
|
||||
* uses it, we take a reference like normal. Whilst acquiring
|
||||
* the reference we check that it is not in a destroyed state
|
||||
* (refcnt == 0). That prevents the request being reallocated
|
||||
* whilst the caller holds on to it. To check that the request
|
||||
* was not reallocated as we acquired the reference we have to
|
||||
* check that our request remains the active request across
|
||||
* the lookup, in the same manner as a seqlock. The visibility
|
||||
* of the pointer versus the reference counting is controlled
|
||||
* by using RCU barriers (rcu_dereference and rcu_assign_pointer).
|
||||
*
|
||||
* In the middle of all that, we inspect whether the request is
|
||||
* complete. Retiring is lazy so the request may be completed long
|
||||
* before the active tracker is updated. Querying whether the
|
||||
* request is complete is far cheaper (as it involves no locked
|
||||
* instructions setting cachelines to exclusive) than acquiring
|
||||
* the reference, so we do it first. The RCU read lock ensures the
|
||||
* pointer dereference is valid, but does not ensure that the
|
||||
* seqno nor HWS is the right one! However, if the request was
|
||||
* reallocated, that means the active tracker's request was complete.
|
||||
* If the new request is also complete, then both are and we can
|
||||
* just report the active tracker is idle. If the new request is
|
||||
* incomplete, then we acquire a reference on it and check that
|
||||
* it remained the active request.
|
||||
*
|
||||
* It is then imperative that we do not zero the request on
|
||||
* reallocation, so that we can chase the dangling pointers!
|
||||
* See i915_request_alloc().
|
||||
*/
|
||||
do {
|
||||
struct i915_request *request;
|
||||
|
||||
request = rcu_dereference(active->request);
|
||||
if (!request || i915_request_completed(request))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* An especially silly compiler could decide to recompute the
|
||||
* result of i915_request_completed, more specifically
|
||||
* re-emit the load for request->fence.seqno. A race would catch
|
||||
* a later seqno value, which could flip the result from true to
|
||||
* false. Which means part of the instructions below might not
|
||||
* be executed, while later on instructions are executed. Due to
|
||||
* barriers within the refcounting the inconsistency can't reach
|
||||
* past the call to i915_request_get_rcu, but not executing
|
||||
* that while still executing i915_request_put() creates
|
||||
* havoc enough. Prevent this with a compiler barrier.
|
||||
*/
|
||||
barrier();
|
||||
|
||||
request = i915_request_get_rcu(request);
|
||||
|
||||
/*
|
||||
* What stops the following rcu_access_pointer() from occurring
|
||||
* before the above i915_request_get_rcu()? If we were
|
||||
* to read the value before pausing to get the reference to
|
||||
* the request, we may not notice a change in the active
|
||||
* tracker.
|
||||
*
|
||||
* The rcu_access_pointer() is a mere compiler barrier, which
|
||||
* means both the CPU and compiler are free to perform the
|
||||
* memory read without constraint. The compiler only has to
|
||||
* ensure that any operations after the rcu_access_pointer()
|
||||
* occur afterwards in program order. This means the read may
|
||||
* be performed earlier by an out-of-order CPU, or adventurous
|
||||
* compiler.
|
||||
*
|
||||
* The atomic operation at the heart of
|
||||
* i915_request_get_rcu(), see dma_fence_get_rcu(), is
|
||||
* atomic_inc_not_zero() which is only a full memory barrier
|
||||
* when successful. That is, if i915_request_get_rcu()
|
||||
* returns the request (and so with the reference counted
|
||||
* incremented) then the following read for rcu_access_pointer()
|
||||
* must occur after the atomic operation and so confirm
|
||||
* that this request is the one currently being tracked.
|
||||
*
|
||||
* The corresponding write barrier is part of
|
||||
* rcu_assign_pointer().
|
||||
*/
|
||||
if (!request || request == rcu_access_pointer(active->request))
|
||||
return rcu_pointer_handoff(request);
|
||||
|
||||
i915_request_put(request);
|
||||
} while (1);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_active_request_get_unlocked - return a reference to the active request
|
||||
* @active - the active tracker
|
||||
*
|
||||
* i915_active_request_get_unlocked() returns a reference to the active request,
|
||||
* or NULL if the active tracker is idle. The reference is obtained under RCU,
|
||||
* so no locking is required by the caller.
|
||||
*
|
||||
* The reference should be freed with i915_request_put().
|
||||
*/
|
||||
static inline struct i915_request *
|
||||
i915_active_request_get_unlocked(const struct i915_active_request *active)
|
||||
{
|
||||
struct i915_request *request;
|
||||
|
||||
rcu_read_lock();
|
||||
request = __i915_active_request_get_rcu(active);
|
||||
rcu_read_unlock();
|
||||
|
||||
return request;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_active_request_isset - report whether the active tracker is assigned
|
||||
* @active - the active tracker
|
||||
*
|
||||
* i915_active_request_isset() returns true if the active tracker is currently
|
||||
* assigned to a request. Due to the lazy retiring, that request may be idle
|
||||
* and this may report stale information.
|
||||
*/
|
||||
static inline bool
|
||||
i915_active_request_isset(const struct i915_active_request *active)
|
||||
{
|
||||
return rcu_access_pointer(active->request);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_active_request_retire - waits until the request is retired
|
||||
* @active - the active request on which to wait
|
||||
*
|
||||
* i915_active_request_retire() waits until the request is completed,
|
||||
* and then ensures that at least the retirement handler for this
|
||||
* @active tracker is called before returning. If the @active
|
||||
* tracker is idle, the function returns immediately.
|
||||
*/
|
||||
static inline int __must_check
|
||||
i915_active_request_retire(struct i915_active_request *active,
|
||||
struct mutex *mutex)
|
||||
{
|
||||
struct i915_request *request;
|
||||
long ret;
|
||||
|
||||
request = i915_active_request_raw(active, mutex);
|
||||
if (!request)
|
||||
return 0;
|
||||
|
||||
ret = i915_request_wait(request,
|
||||
I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
list_del_init(&active->link);
|
||||
RCU_INIT_POINTER(active->request, NULL);
|
||||
|
||||
active->retire(active, request);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* GPU activity tracking
|
||||
|
@ -47,6 +394,8 @@ int i915_active_wait(struct i915_active *ref);
|
|||
|
||||
int i915_request_await_active(struct i915_request *rq,
|
||||
struct i915_active *ref);
|
||||
int i915_request_await_active_request(struct i915_request *rq,
|
||||
struct i915_active_request *active);
|
||||
|
||||
bool i915_active_acquire(struct i915_active *ref);
|
||||
|
||||
|
|
|
@ -8,16 +8,26 @@
|
|||
#define _I915_ACTIVE_TYPES_H_
|
||||
|
||||
#include <linux/rbtree.h>
|
||||
|
||||
#include "i915_request.h"
|
||||
#include <linux/rcupdate.h>
|
||||
|
||||
struct drm_i915_private;
|
||||
struct i915_active_request;
|
||||
struct i915_request;
|
||||
|
||||
typedef void (*i915_active_retire_fn)(struct i915_active_request *,
|
||||
struct i915_request *);
|
||||
|
||||
struct i915_active_request {
|
||||
struct i915_request __rcu *request;
|
||||
struct list_head link;
|
||||
i915_active_retire_fn retire;
|
||||
};
|
||||
|
||||
struct i915_active {
|
||||
struct drm_i915_private *i915;
|
||||
|
||||
struct rb_root tree;
|
||||
struct i915_gem_active last;
|
||||
struct i915_active_request last;
|
||||
unsigned int count;
|
||||
|
||||
void (*retire)(struct i915_active *ref);
|
||||
|
|
|
@ -206,7 +206,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
|
|||
if (vma->fence)
|
||||
seq_printf(m, " , fence: %d%s",
|
||||
vma->fence->id,
|
||||
i915_gem_active_isset(&vma->last_fence) ? "*" : "");
|
||||
i915_active_request_isset(&vma->last_fence) ? "*" : "");
|
||||
seq_puts(m, ")");
|
||||
}
|
||||
if (obj->stolen)
|
||||
|
|
|
@ -3018,7 +3018,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
|
|||
|
||||
GEM_BUG_ON(i915->gt.active_requests);
|
||||
for_each_engine(engine, i915, id) {
|
||||
GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request));
|
||||
GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request));
|
||||
GEM_BUG_ON(engine->last_retired_context !=
|
||||
to_intel_context(i915->kernel_context, engine));
|
||||
}
|
||||
|
@ -3264,7 +3264,7 @@ wait_for_timelines(struct drm_i915_private *i915,
|
|||
list_for_each_entry(tl, >->active_list, link) {
|
||||
struct i915_request *rq;
|
||||
|
||||
rq = i915_gem_active_get_unlocked(&tl->last_request);
|
||||
rq = i915_active_request_get_unlocked(&tl->last_request);
|
||||
if (!rq)
|
||||
continue;
|
||||
|
||||
|
@ -4165,7 +4165,8 @@ out:
|
|||
}
|
||||
|
||||
static void
|
||||
frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request)
|
||||
frontbuffer_retire(struct i915_active_request *active,
|
||||
struct i915_request *request)
|
||||
{
|
||||
struct drm_i915_gem_object *obj =
|
||||
container_of(active, typeof(*obj), frontbuffer_write);
|
||||
|
@ -4192,7 +4193,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
|
|||
obj->resv = &obj->__builtin_resv;
|
||||
|
||||
obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
|
||||
init_request_active(&obj->frontbuffer_write, frontbuffer_retire);
|
||||
i915_active_request_init(&obj->frontbuffer_write,
|
||||
NULL, frontbuffer_retire);
|
||||
|
||||
obj->mm.madv = I915_MADV_WILLNEED;
|
||||
INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
|
||||
|
|
|
@ -322,7 +322,7 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
|
|||
return desc;
|
||||
}
|
||||
|
||||
static void intel_context_retire(struct i915_gem_active *active,
|
||||
static void intel_context_retire(struct i915_active_request *active,
|
||||
struct i915_request *rq)
|
||||
{
|
||||
struct intel_context *ce =
|
||||
|
@ -344,7 +344,8 @@ intel_context_init(struct intel_context *ce,
|
|||
/* Use the whole device by default */
|
||||
ce->sseu = intel_device_default_sseu(ctx->i915);
|
||||
|
||||
init_request_active(&ce->active_tracker, intel_context_retire);
|
||||
i915_active_request_init(&ce->active_tracker,
|
||||
NULL, intel_context_retire);
|
||||
}
|
||||
|
||||
static struct i915_gem_context *
|
||||
|
@ -668,8 +669,8 @@ last_request_on_engine(struct i915_timeline *timeline,
|
|||
|
||||
GEM_BUG_ON(timeline == &engine->timeline);
|
||||
|
||||
rq = i915_gem_active_raw(&timeline->last_request,
|
||||
&engine->i915->drm.struct_mutex);
|
||||
rq = i915_active_request_raw(&timeline->last_request,
|
||||
&engine->i915->drm.struct_mutex);
|
||||
if (rq && rq->engine == engine) {
|
||||
GEM_TRACE("last request for %s on engine %s: %llx:%llu\n",
|
||||
timeline->name, engine->name,
|
||||
|
@ -1015,8 +1016,8 @@ gen8_modify_rpcs_gpu(struct intel_context *ce,
|
|||
}
|
||||
|
||||
/* Queue this switch after all other activity by this context. */
|
||||
prev = i915_gem_active_raw(&ce->ring->timeline->last_request,
|
||||
&i915->drm.struct_mutex);
|
||||
prev = i915_active_request_raw(&ce->ring->timeline->last_request,
|
||||
&i915->drm.struct_mutex);
|
||||
if (prev && !i915_request_completed(prev)) {
|
||||
ret = i915_request_await_dma_fence(rq, &prev->fence);
|
||||
if (ret < 0)
|
||||
|
@ -1039,9 +1040,9 @@ gen8_modify_rpcs_gpu(struct intel_context *ce,
|
|||
* But we only need to take one pin on the account of it. Or in other
|
||||
* words transfer the pinned ce object to tracked active request.
|
||||
*/
|
||||
if (!i915_gem_active_isset(&ce->active_tracker))
|
||||
if (!i915_active_request_isset(&ce->active_tracker))
|
||||
__intel_context_pin(ce);
|
||||
i915_gem_active_set(&ce->active_tracker, rq);
|
||||
__i915_active_request_set(&ce->active_tracker, rq);
|
||||
|
||||
out_add:
|
||||
i915_request_add(rq);
|
||||
|
|
|
@ -187,7 +187,7 @@ struct i915_gem_context {
|
|||
* active_tracker: Active tracker for the external rq activity
|
||||
* on this intel_context object.
|
||||
*/
|
||||
struct i915_gem_active active_tracker;
|
||||
struct i915_active_request active_tracker;
|
||||
|
||||
const struct intel_context_ops *ops;
|
||||
|
||||
|
|
|
@ -223,7 +223,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
|
|||
i915_gem_object_get_tiling(vma->obj)))
|
||||
return -EINVAL;
|
||||
|
||||
ret = i915_gem_active_retire(&vma->last_fence,
|
||||
ret = i915_active_request_retire(&vma->last_fence,
|
||||
&vma->obj->base.dev->struct_mutex);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -232,7 +232,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
|
|||
if (fence->vma) {
|
||||
struct i915_vma *old = fence->vma;
|
||||
|
||||
ret = i915_gem_active_retire(&old->last_fence,
|
||||
ret = i915_active_request_retire(&old->last_fence,
|
||||
&old->obj->base.dev->struct_mutex);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
|
|
@ -1918,7 +1918,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
|
|||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
i915_active_init(i915, &vma->active, NULL);
|
||||
init_request_active(&vma->last_fence, NULL);
|
||||
INIT_ACTIVE_REQUEST(&vma->last_fence);
|
||||
|
||||
vma->vm = &ggtt->vm;
|
||||
vma->ops = &pd_vma_ops;
|
||||
|
|
|
@ -175,7 +175,7 @@ struct drm_i915_gem_object {
|
|||
|
||||
atomic_t frontbuffer_bits;
|
||||
unsigned int frontbuffer_ggtt_origin; /* write once */
|
||||
struct i915_gem_active frontbuffer_write;
|
||||
struct i915_active_request frontbuffer_write;
|
||||
|
||||
/** Current tiling stride for the object, if it's tiled. */
|
||||
unsigned int tiling_and_stride;
|
||||
|
|
|
@ -1062,23 +1062,23 @@ i915_error_object_create(struct drm_i915_private *i915,
|
|||
}
|
||||
|
||||
/* The error capture is special as tries to run underneath the normal
|
||||
* locking rules - so we use the raw version of the i915_gem_active lookup.
|
||||
* locking rules - so we use the raw version of the i915_active_request lookup.
|
||||
*/
|
||||
static inline u32
|
||||
__active_get_seqno(struct i915_gem_active *active)
|
||||
__active_get_seqno(struct i915_active_request *active)
|
||||
{
|
||||
struct i915_request *request;
|
||||
|
||||
request = __i915_gem_active_peek(active);
|
||||
request = __i915_active_request_peek(active);
|
||||
return request ? request->global_seqno : 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
__active_get_engine_id(struct i915_gem_active *active)
|
||||
__active_get_engine_id(struct i915_active_request *active)
|
||||
{
|
||||
struct i915_request *request;
|
||||
|
||||
request = __i915_gem_active_peek(active);
|
||||
request = __i915_active_request_peek(active);
|
||||
return request ? request->engine->id : -1;
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include <linux/sched/signal.h>
|
||||
|
||||
#include "i915_drv.h"
|
||||
#include "i915_active.h"
|
||||
#include "i915_reset.h"
|
||||
|
||||
static const char *i915_fence_get_driver_name(struct dma_fence *fence)
|
||||
|
@ -125,12 +126,6 @@ static void unreserve_gt(struct drm_i915_private *i915)
|
|||
i915_gem_park(i915);
|
||||
}
|
||||
|
||||
void i915_gem_retire_noop(struct i915_gem_active *active,
|
||||
struct i915_request *request)
|
||||
{
|
||||
/* Space left intentionally blank */
|
||||
}
|
||||
|
||||
static void advance_ring(struct i915_request *request)
|
||||
{
|
||||
struct intel_ring *ring = request->ring;
|
||||
|
@ -244,7 +239,7 @@ static void __retire_engine_upto(struct intel_engine_cs *engine,
|
|||
|
||||
static void i915_request_retire(struct i915_request *request)
|
||||
{
|
||||
struct i915_gem_active *active, *next;
|
||||
struct i915_active_request *active, *next;
|
||||
|
||||
GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
|
||||
request->engine->name,
|
||||
|
@ -278,10 +273,10 @@ static void i915_request_retire(struct i915_request *request)
|
|||
* we may spend an inordinate amount of time simply handling
|
||||
* the retirement of requests and processing their callbacks.
|
||||
* Of which, this loop itself is particularly hot due to the
|
||||
* cache misses when jumping around the list of i915_gem_active.
|
||||
* So we try to keep this loop as streamlined as possible and
|
||||
* also prefetch the next i915_gem_active to try and hide
|
||||
* the likely cache miss.
|
||||
* cache misses when jumping around the list of
|
||||
* i915_active_request. So we try to keep this loop as
|
||||
* streamlined as possible and also prefetch the next
|
||||
* i915_active_request to try and hide the likely cache miss.
|
||||
*/
|
||||
prefetchw(next);
|
||||
|
||||
|
@ -526,17 +521,9 @@ out:
|
|||
return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL);
|
||||
}
|
||||
|
||||
static int add_barrier(struct i915_request *rq, struct i915_gem_active *active)
|
||||
{
|
||||
struct i915_request *barrier =
|
||||
i915_gem_active_raw(active, &rq->i915->drm.struct_mutex);
|
||||
|
||||
return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
|
||||
}
|
||||
|
||||
static int add_timeline_barrier(struct i915_request *rq)
|
||||
{
|
||||
return add_barrier(rq, &rq->timeline->barrier);
|
||||
return i915_request_await_active_request(rq, &rq->timeline->barrier);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -595,7 +582,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
|
|||
* We use RCU to look up requests in flight. The lookups may
|
||||
* race with the request being allocated from the slab freelist.
|
||||
* That is the request we are writing to here, may be in the process
|
||||
* of being read by __i915_gem_active_get_rcu(). As such,
|
||||
* of being read by __i915_active_request_get_rcu(). As such,
|
||||
* we have to be very careful when overwriting the contents. During
|
||||
* the RCU lookup, we change chase the request->engine pointer,
|
||||
* read the request->global_seqno and increment the reference count.
|
||||
|
@ -937,8 +924,8 @@ void i915_request_add(struct i915_request *request)
|
|||
* see a more recent value in the hws than we are tracking.
|
||||
*/
|
||||
|
||||
prev = i915_gem_active_raw(&timeline->last_request,
|
||||
&request->i915->drm.struct_mutex);
|
||||
prev = i915_active_request_raw(&timeline->last_request,
|
||||
&request->i915->drm.struct_mutex);
|
||||
if (prev && !i915_request_completed(prev)) {
|
||||
i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
|
||||
&request->submitq);
|
||||
|
@ -954,7 +941,7 @@ void i915_request_add(struct i915_request *request)
|
|||
spin_unlock_irq(&timeline->lock);
|
||||
|
||||
GEM_BUG_ON(timeline->seqno != request->fence.seqno);
|
||||
i915_gem_active_set(&timeline->last_request, request);
|
||||
__i915_active_request_set(&timeline->last_request, request);
|
||||
|
||||
list_add_tail(&request->ring_link, &ring->request_list);
|
||||
if (list_is_first(&request->ring_link, &ring->request_list)) {
|
||||
|
|
|
@ -403,387 +403,4 @@ static inline void i915_request_mark_complete(struct i915_request *rq)
|
|||
|
||||
void i915_retire_requests(struct drm_i915_private *i915);
|
||||
|
||||
/*
|
||||
* We treat requests as fences. This is not be to confused with our
|
||||
* "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
|
||||
* We use the fences to synchronize access from the CPU with activity on the
|
||||
* GPU, for example, we should not rewrite an object's PTE whilst the GPU
|
||||
* is reading them. We also track fences at a higher level to provide
|
||||
* implicit synchronisation around GEM objects, e.g. set-domain will wait
|
||||
* for outstanding GPU rendering before marking the object ready for CPU
|
||||
* access, or a pageflip will wait until the GPU is complete before showing
|
||||
* the frame on the scanout.
|
||||
*
|
||||
* In order to use a fence, the object must track the fence it needs to
|
||||
* serialise with. For example, GEM objects want to track both read and
|
||||
* write access so that we can perform concurrent read operations between
|
||||
* the CPU and GPU engines, as well as waiting for all rendering to
|
||||
* complete, or waiting for the last GPU user of a "fence register". The
|
||||
* object then embeds a #i915_gem_active to track the most recent (in
|
||||
* retirement order) request relevant for the desired mode of access.
|
||||
* The #i915_gem_active is updated with i915_gem_active_set() to track the
|
||||
* most recent fence request, typically this is done as part of
|
||||
* i915_vma_move_to_active().
|
||||
*
|
||||
* When the #i915_gem_active completes (is retired), it will
|
||||
* signal its completion to the owner through a callback as well as mark
|
||||
* itself as idle (i915_gem_active.request == NULL). The owner
|
||||
* can then perform any action, such as delayed freeing of an active
|
||||
* resource including itself.
|
||||
*/
|
||||
struct i915_gem_active;
|
||||
|
||||
typedef void (*i915_gem_retire_fn)(struct i915_gem_active *,
|
||||
struct i915_request *);
|
||||
|
||||
struct i915_gem_active {
|
||||
struct i915_request __rcu *request;
|
||||
struct list_head link;
|
||||
i915_gem_retire_fn retire;
|
||||
};
|
||||
|
||||
void i915_gem_retire_noop(struct i915_gem_active *,
|
||||
struct i915_request *request);
|
||||
|
||||
/**
|
||||
* init_request_active - prepares the activity tracker for use
|
||||
* @active - the active tracker
|
||||
* @func - a callback when then the tracker is retired (becomes idle),
|
||||
* can be NULL
|
||||
*
|
||||
* init_request_active() prepares the embedded @active struct for use as
|
||||
* an activity tracker, that is for tracking the last known active request
|
||||
* associated with it. When the last request becomes idle, when it is retired
|
||||
* after completion, the optional callback @func is invoked.
|
||||
*/
|
||||
static inline void
|
||||
init_request_active(struct i915_gem_active *active,
|
||||
i915_gem_retire_fn retire)
|
||||
{
|
||||
RCU_INIT_POINTER(active->request, NULL);
|
||||
INIT_LIST_HEAD(&active->link);
|
||||
active->retire = retire ?: i915_gem_retire_noop;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_active_set - updates the tracker to watch the current request
|
||||
* @active - the active tracker
|
||||
* @request - the request to watch
|
||||
*
|
||||
* i915_gem_active_set() watches the given @request for completion. Whilst
|
||||
* that @request is busy, the @active reports busy. When that @request is
|
||||
* retired, the @active tracker is updated to report idle.
|
||||
*/
|
||||
static inline void
|
||||
i915_gem_active_set(struct i915_gem_active *active,
|
||||
struct i915_request *request)
|
||||
{
|
||||
list_move(&active->link, &request->active_list);
|
||||
rcu_assign_pointer(active->request, request);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_active_set_retire_fn - updates the retirement callback
|
||||
* @active - the active tracker
|
||||
* @fn - the routine called when the request is retired
|
||||
* @mutex - struct_mutex used to guard retirements
|
||||
*
|
||||
* i915_gem_active_set_retire_fn() updates the function pointer that
|
||||
* is called when the final request associated with the @active tracker
|
||||
* is retired.
|
||||
*/
|
||||
static inline void
|
||||
i915_gem_active_set_retire_fn(struct i915_gem_active *active,
|
||||
i915_gem_retire_fn fn,
|
||||
struct mutex *mutex)
|
||||
{
|
||||
lockdep_assert_held(mutex);
|
||||
active->retire = fn ?: i915_gem_retire_noop;
|
||||
}
|
||||
|
||||
static inline struct i915_request *
|
||||
__i915_gem_active_peek(const struct i915_gem_active *active)
|
||||
{
|
||||
/*
|
||||
* Inside the error capture (running with the driver in an unknown
|
||||
* state), we want to bend the rules slightly (a lot).
|
||||
*
|
||||
* Work is in progress to make it safer, in the meantime this keeps
|
||||
* the known issue from spamming the logs.
|
||||
*/
|
||||
return rcu_dereference_protected(active->request, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_active_raw - return the active request
|
||||
* @active - the active tracker
|
||||
*
|
||||
* i915_gem_active_raw() returns the current request being tracked, or NULL.
|
||||
* It does not obtain a reference on the request for the caller, so the caller
|
||||
* must hold struct_mutex.
|
||||
*/
|
||||
static inline struct i915_request *
|
||||
i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex)
|
||||
{
|
||||
return rcu_dereference_protected(active->request,
|
||||
lockdep_is_held(mutex));
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_active_peek - report the active request being monitored
|
||||
* @active - the active tracker
|
||||
*
|
||||
* i915_gem_active_peek() returns the current request being tracked if
|
||||
* still active, or NULL. It does not obtain a reference on the request
|
||||
* for the caller, so the caller must hold struct_mutex.
|
||||
*/
|
||||
static inline struct i915_request *
|
||||
i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
|
||||
{
|
||||
struct i915_request *request;
|
||||
|
||||
request = i915_gem_active_raw(active, mutex);
|
||||
if (!request || i915_request_completed(request))
|
||||
return NULL;
|
||||
|
||||
return request;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_active_get - return a reference to the active request
|
||||
* @active - the active tracker
|
||||
*
|
||||
* i915_gem_active_get() returns a reference to the active request, or NULL
|
||||
* if the active tracker is idle. The caller must hold struct_mutex.
|
||||
*/
|
||||
static inline struct i915_request *
|
||||
i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex)
|
||||
{
|
||||
return i915_request_get(i915_gem_active_peek(active, mutex));
|
||||
}
|
||||
|
||||
/**
|
||||
* __i915_gem_active_get_rcu - return a reference to the active request
|
||||
* @active - the active tracker
|
||||
*
|
||||
* __i915_gem_active_get() returns a reference to the active request, or NULL
|
||||
* if the active tracker is idle. The caller must hold the RCU read lock, but
|
||||
* the returned pointer is safe to use outside of RCU.
|
||||
*/
|
||||
static inline struct i915_request *
|
||||
__i915_gem_active_get_rcu(const struct i915_gem_active *active)
|
||||
{
|
||||
/*
|
||||
* Performing a lockless retrieval of the active request is super
|
||||
* tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
|
||||
* slab of request objects will not be freed whilst we hold the
|
||||
* RCU read lock. It does not guarantee that the request itself
|
||||
* will not be freed and then *reused*. Viz,
|
||||
*
|
||||
* Thread A Thread B
|
||||
*
|
||||
* rq = active.request
|
||||
* retire(rq) -> free(rq);
|
||||
* (rq is now first on the slab freelist)
|
||||
* active.request = NULL
|
||||
*
|
||||
* rq = new submission on a new object
|
||||
* ref(rq)
|
||||
*
|
||||
* To prevent the request from being reused whilst the caller
|
||||
* uses it, we take a reference like normal. Whilst acquiring
|
||||
* the reference we check that it is not in a destroyed state
|
||||
* (refcnt == 0). That prevents the request being reallocated
|
||||
* whilst the caller holds on to it. To check that the request
|
||||
* was not reallocated as we acquired the reference we have to
|
||||
* check that our request remains the active request across
|
||||
* the lookup, in the same manner as a seqlock. The visibility
|
||||
* of the pointer versus the reference counting is controlled
|
||||
* by using RCU barriers (rcu_dereference and rcu_assign_pointer).
|
||||
*
|
||||
* In the middle of all that, we inspect whether the request is
|
||||
* complete. Retiring is lazy so the request may be completed long
|
||||
* before the active tracker is updated. Querying whether the
|
||||
* request is complete is far cheaper (as it involves no locked
|
||||
* instructions setting cachelines to exclusive) than acquiring
|
||||
* the reference, so we do it first. The RCU read lock ensures the
|
||||
* pointer dereference is valid, but does not ensure that the
|
||||
* seqno nor HWS is the right one! However, if the request was
|
||||
* reallocated, that means the active tracker's request was complete.
|
||||
* If the new request is also complete, then both are and we can
|
||||
* just report the active tracker is idle. If the new request is
|
||||
* incomplete, then we acquire a reference on it and check that
|
||||
* it remained the active request.
|
||||
*
|
||||
* It is then imperative that we do not zero the request on
|
||||
* reallocation, so that we can chase the dangling pointers!
|
||||
* See i915_request_alloc().
|
||||
*/
|
||||
do {
|
||||
struct i915_request *request;
|
||||
|
||||
request = rcu_dereference(active->request);
|
||||
if (!request || i915_request_completed(request))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* An especially silly compiler could decide to recompute the
|
||||
* result of i915_request_completed, more specifically
|
||||
* re-emit the load for request->fence.seqno. A race would catch
|
||||
* a later seqno value, which could flip the result from true to
|
||||
* false. Which means part of the instructions below might not
|
||||
* be executed, while later on instructions are executed. Due to
|
||||
* barriers within the refcounting the inconsistency can't reach
|
||||
* past the call to i915_request_get_rcu, but not executing
|
||||
* that while still executing i915_request_put() creates
|
||||
* havoc enough. Prevent this with a compiler barrier.
|
||||
*/
|
||||
barrier();
|
||||
|
||||
request = i915_request_get_rcu(request);
|
||||
|
||||
/*
|
||||
* What stops the following rcu_access_pointer() from occurring
|
||||
* before the above i915_request_get_rcu()? If we were
|
||||
* to read the value before pausing to get the reference to
|
||||
* the request, we may not notice a change in the active
|
||||
* tracker.
|
||||
*
|
||||
* The rcu_access_pointer() is a mere compiler barrier, which
|
||||
* means both the CPU and compiler are free to perform the
|
||||
* memory read without constraint. The compiler only has to
|
||||
* ensure that any operations after the rcu_access_pointer()
|
||||
* occur afterwards in program order. This means the read may
|
||||
* be performed earlier by an out-of-order CPU, or adventurous
|
||||
* compiler.
|
||||
*
|
||||
* The atomic operation at the heart of
|
||||
* i915_request_get_rcu(), see dma_fence_get_rcu(), is
|
||||
* atomic_inc_not_zero() which is only a full memory barrier
|
||||
* when successful. That is, if i915_request_get_rcu()
|
||||
* returns the request (and so with the reference counted
|
||||
* incremented) then the following read for rcu_access_pointer()
|
||||
* must occur after the atomic operation and so confirm
|
||||
* that this request is the one currently being tracked.
|
||||
*
|
||||
* The corresponding write barrier is part of
|
||||
* rcu_assign_pointer().
|
||||
*/
|
||||
if (!request || request == rcu_access_pointer(active->request))
|
||||
return rcu_pointer_handoff(request);
|
||||
|
||||
i915_request_put(request);
|
||||
} while (1);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_active_get_unlocked - return a reference to the active request
|
||||
* @active - the active tracker
|
||||
*
|
||||
* i915_gem_active_get_unlocked() returns a reference to the active request,
|
||||
* or NULL if the active tracker is idle. The reference is obtained under RCU,
|
||||
* so no locking is required by the caller.
|
||||
*
|
||||
* The reference should be freed with i915_request_put().
|
||||
*/
|
||||
static inline struct i915_request *
|
||||
i915_gem_active_get_unlocked(const struct i915_gem_active *active)
|
||||
{
|
||||
struct i915_request *request;
|
||||
|
||||
rcu_read_lock();
|
||||
request = __i915_gem_active_get_rcu(active);
|
||||
rcu_read_unlock();
|
||||
|
||||
return request;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_active_isset - report whether the active tracker is assigned
|
||||
* @active - the active tracker
|
||||
*
|
||||
* i915_gem_active_isset() returns true if the active tracker is currently
|
||||
* assigned to a request. Due to the lazy retiring, that request may be idle
|
||||
* and this may report stale information.
|
||||
*/
|
||||
static inline bool
|
||||
i915_gem_active_isset(const struct i915_gem_active *active)
|
||||
{
|
||||
return rcu_access_pointer(active->request);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_active_wait - waits until the request is completed
|
||||
* @active - the active request on which to wait
|
||||
* @flags - how to wait
|
||||
* @timeout - how long to wait at most
|
||||
* @rps - userspace client to charge for a waitboost
|
||||
*
|
||||
* i915_gem_active_wait() waits until the request is completed before
|
||||
* returning, without requiring any locks to be held. Note that it does not
|
||||
* retire any requests before returning.
|
||||
*
|
||||
* This function relies on RCU in order to acquire the reference to the active
|
||||
* request without holding any locks. See __i915_gem_active_get_rcu() for the
|
||||
* glory details on how that is managed. Once the reference is acquired, we
|
||||
* can then wait upon the request, and afterwards release our reference,
|
||||
* free of any locking.
|
||||
*
|
||||
* This function wraps i915_request_wait(), see it for the full details on
|
||||
* the arguments.
|
||||
*
|
||||
* Returns 0 if successful, or a negative error code.
|
||||
*/
|
||||
static inline int
|
||||
i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags)
|
||||
{
|
||||
struct i915_request *request;
|
||||
long ret = 0;
|
||||
|
||||
request = i915_gem_active_get_unlocked(active);
|
||||
if (request) {
|
||||
ret = i915_request_wait(request, flags, MAX_SCHEDULE_TIMEOUT);
|
||||
i915_request_put(request);
|
||||
}
|
||||
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_active_retire - waits until the request is retired
|
||||
* @active - the active request on which to wait
|
||||
*
|
||||
* i915_gem_active_retire() waits until the request is completed,
|
||||
* and then ensures that at least the retirement handler for this
|
||||
* @active tracker is called before returning. If the @active
|
||||
* tracker is idle, the function returns immediately.
|
||||
*/
|
||||
static inline int __must_check
|
||||
i915_gem_active_retire(struct i915_gem_active *active,
|
||||
struct mutex *mutex)
|
||||
{
|
||||
struct i915_request *request;
|
||||
long ret;
|
||||
|
||||
request = i915_gem_active_raw(active, mutex);
|
||||
if (!request)
|
||||
return 0;
|
||||
|
||||
ret = i915_request_wait(request,
|
||||
I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
list_del_init(&active->link);
|
||||
RCU_INIT_POINTER(active->request, NULL);
|
||||
|
||||
active->retire(active, request);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define for_each_active(mask, idx) \
|
||||
for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx))
|
||||
|
||||
#endif /* I915_REQUEST_H */
|
||||
|
|
|
@ -862,7 +862,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
|
|||
struct i915_request *rq;
|
||||
long timeout;
|
||||
|
||||
rq = i915_gem_active_get_unlocked(&tl->last_request);
|
||||
rq = i915_active_request_get_unlocked(&tl->last_request);
|
||||
if (!rq)
|
||||
continue;
|
||||
|
||||
|
|
|
@ -163,8 +163,8 @@ int i915_timeline_init(struct drm_i915_private *i915,
|
|||
|
||||
spin_lock_init(&timeline->lock);
|
||||
|
||||
init_request_active(&timeline->barrier, NULL);
|
||||
init_request_active(&timeline->last_request, NULL);
|
||||
INIT_ACTIVE_REQUEST(&timeline->barrier);
|
||||
INIT_ACTIVE_REQUEST(&timeline->last_request);
|
||||
INIT_LIST_HEAD(&timeline->requests);
|
||||
|
||||
i915_syncmap_init(&timeline->sync);
|
||||
|
@ -236,7 +236,7 @@ void i915_timeline_fini(struct i915_timeline *timeline)
|
|||
{
|
||||
GEM_BUG_ON(timeline->pin_count);
|
||||
GEM_BUG_ON(!list_empty(&timeline->requests));
|
||||
GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier));
|
||||
GEM_BUG_ON(i915_active_request_isset(&timeline->barrier));
|
||||
|
||||
i915_syncmap_free(&timeline->sync);
|
||||
hwsp_free(timeline);
|
||||
|
@ -311,25 +311,6 @@ void i915_timeline_unpin(struct i915_timeline *tl)
|
|||
__i915_vma_unpin(tl->hwsp_ggtt);
|
||||
}
|
||||
|
||||
int i915_timeline_set_barrier(struct i915_timeline *tl, struct i915_request *rq)
|
||||
{
|
||||
struct i915_request *old;
|
||||
int err;
|
||||
|
||||
lockdep_assert_held(&rq->i915->drm.struct_mutex);
|
||||
|
||||
/* Must maintain ordering wrt existing barriers */
|
||||
old = i915_gem_active_raw(&tl->barrier, &rq->i915->drm.struct_mutex);
|
||||
if (old) {
|
||||
err = i915_request_await_dma_fence(rq, &old->fence);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
i915_gem_active_set(&tl->barrier, rq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __i915_timeline_free(struct kref *kref)
|
||||
{
|
||||
struct i915_timeline *timeline =
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include <linux/list.h>
|
||||
#include <linux/kref.h>
|
||||
|
||||
#include "i915_active.h"
|
||||
#include "i915_request.h"
|
||||
#include "i915_syncmap.h"
|
||||
#include "i915_utils.h"
|
||||
|
@ -58,10 +59,10 @@ struct i915_timeline {
|
|||
|
||||
/* Contains an RCU guarded pointer to the last request. No reference is
|
||||
* held to the request, users must carefully acquire a reference to
|
||||
* the request using i915_gem_active_get_request_rcu(), or hold the
|
||||
* the request using i915_active_request_get_request_rcu(), or hold the
|
||||
* struct_mutex.
|
||||
*/
|
||||
struct i915_gem_active last_request;
|
||||
struct i915_active_request last_request;
|
||||
|
||||
/**
|
||||
* We track the most recent seqno that we wait on in every context so
|
||||
|
@ -82,7 +83,7 @@ struct i915_timeline {
|
|||
* subsequent submissions to this timeline be executed only after the
|
||||
* barrier has been completed.
|
||||
*/
|
||||
struct i915_gem_active barrier;
|
||||
struct i915_active_request barrier;
|
||||
|
||||
struct list_head link;
|
||||
const char *name;
|
||||
|
@ -174,7 +175,10 @@ void i915_timelines_fini(struct drm_i915_private *i915);
|
|||
* submissions on @timeline. Subsequent requests will not be submitted to GPU
|
||||
* until the barrier has been completed.
|
||||
*/
|
||||
int i915_timeline_set_barrier(struct i915_timeline *timeline,
|
||||
struct i915_request *rq);
|
||||
static inline int
|
||||
i915_timeline_set_barrier(struct i915_timeline *tl, struct i915_request *rq)
|
||||
{
|
||||
return i915_active_request_set(&tl->barrier, rq);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -120,7 +120,7 @@ vma_create(struct drm_i915_gem_object *obj,
|
|||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
|
||||
init_request_active(&vma->last_fence, NULL);
|
||||
INIT_ACTIVE_REQUEST(&vma->last_fence);
|
||||
|
||||
vma->vm = vm;
|
||||
vma->ops = &vm->vma_ops;
|
||||
|
@ -808,7 +808,7 @@ static void __i915_vma_destroy(struct i915_vma *vma)
|
|||
GEM_BUG_ON(vma->node.allocated);
|
||||
GEM_BUG_ON(vma->fence);
|
||||
|
||||
GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
|
||||
GEM_BUG_ON(i915_active_request_isset(&vma->last_fence));
|
||||
|
||||
mutex_lock(&vma->vm->mutex);
|
||||
list_del(&vma->vm_link);
|
||||
|
@ -942,14 +942,14 @@ int i915_vma_move_to_active(struct i915_vma *vma,
|
|||
obj->write_domain = I915_GEM_DOMAIN_RENDER;
|
||||
|
||||
if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
|
||||
i915_gem_active_set(&obj->frontbuffer_write, rq);
|
||||
__i915_active_request_set(&obj->frontbuffer_write, rq);
|
||||
|
||||
obj->read_domains = 0;
|
||||
}
|
||||
obj->read_domains |= I915_GEM_GPU_DOMAINS;
|
||||
|
||||
if (flags & EXEC_OBJECT_NEEDS_FENCE)
|
||||
i915_gem_active_set(&vma->last_fence, rq);
|
||||
__i915_active_request_set(&vma->last_fence, rq);
|
||||
|
||||
export_fence(vma, rq, flags);
|
||||
return 0;
|
||||
|
@ -986,8 +986,8 @@ int i915_vma_unbind(struct i915_vma *vma)
|
|||
if (ret)
|
||||
goto unpin;
|
||||
|
||||
ret = i915_gem_active_retire(&vma->last_fence,
|
||||
&vma->vm->i915->drm.struct_mutex);
|
||||
ret = i915_active_request_retire(&vma->last_fence,
|
||||
&vma->vm->i915->drm.struct_mutex);
|
||||
unpin:
|
||||
__i915_vma_unpin(vma);
|
||||
if (ret)
|
||||
|
|
|
@ -110,7 +110,7 @@ struct i915_vma {
|
|||
#define I915_VMA_GGTT_WRITE BIT(15)
|
||||
|
||||
struct i915_active active;
|
||||
struct i915_gem_active last_fence;
|
||||
struct i915_active_request last_fence;
|
||||
|
||||
/**
|
||||
* Support different GGTT views into the same object.
|
||||
|
|
|
@ -1086,7 +1086,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
|
|||
* the last request that remains in the timeline. When idle, it is
|
||||
* the last executed context as tracked by retirement.
|
||||
*/
|
||||
rq = __i915_gem_active_peek(&engine->timeline.last_request);
|
||||
rq = __i915_active_request_peek(&engine->timeline.last_request);
|
||||
if (rq)
|
||||
return rq->hw_context == kernel_context;
|
||||
else
|
||||
|
|
|
@ -184,7 +184,7 @@ struct intel_overlay {
|
|||
struct overlay_registers __iomem *regs;
|
||||
u32 flip_addr;
|
||||
/* flip handling */
|
||||
struct i915_gem_active last_flip;
|
||||
struct i915_active_request last_flip;
|
||||
};
|
||||
|
||||
static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv,
|
||||
|
@ -212,23 +212,23 @@ static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv,
|
|||
|
||||
static void intel_overlay_submit_request(struct intel_overlay *overlay,
|
||||
struct i915_request *rq,
|
||||
i915_gem_retire_fn retire)
|
||||
i915_active_retire_fn retire)
|
||||
{
|
||||
GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip,
|
||||
&overlay->i915->drm.struct_mutex));
|
||||
i915_gem_active_set_retire_fn(&overlay->last_flip, retire,
|
||||
&overlay->i915->drm.struct_mutex);
|
||||
i915_gem_active_set(&overlay->last_flip, rq);
|
||||
GEM_BUG_ON(i915_active_request_peek(&overlay->last_flip,
|
||||
&overlay->i915->drm.struct_mutex));
|
||||
i915_active_request_set_retire_fn(&overlay->last_flip, retire,
|
||||
&overlay->i915->drm.struct_mutex);
|
||||
__i915_active_request_set(&overlay->last_flip, rq);
|
||||
i915_request_add(rq);
|
||||
}
|
||||
|
||||
static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
|
||||
struct i915_request *rq,
|
||||
i915_gem_retire_fn retire)
|
||||
i915_active_retire_fn retire)
|
||||
{
|
||||
intel_overlay_submit_request(overlay, rq, retire);
|
||||
return i915_gem_active_retire(&overlay->last_flip,
|
||||
&overlay->i915->drm.struct_mutex);
|
||||
return i915_active_request_retire(&overlay->last_flip,
|
||||
&overlay->i915->drm.struct_mutex);
|
||||
}
|
||||
|
||||
static struct i915_request *alloc_request(struct intel_overlay *overlay)
|
||||
|
@ -349,8 +349,9 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay)
|
|||
i915_vma_put(vma);
|
||||
}
|
||||
|
||||
static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active,
|
||||
struct i915_request *rq)
|
||||
static void
|
||||
intel_overlay_release_old_vid_tail(struct i915_active_request *active,
|
||||
struct i915_request *rq)
|
||||
{
|
||||
struct intel_overlay *overlay =
|
||||
container_of(active, typeof(*overlay), last_flip);
|
||||
|
@ -358,7 +359,7 @@ static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active,
|
|||
intel_overlay_release_old_vma(overlay);
|
||||
}
|
||||
|
||||
static void intel_overlay_off_tail(struct i915_gem_active *active,
|
||||
static void intel_overlay_off_tail(struct i915_active_request *active,
|
||||
struct i915_request *rq)
|
||||
{
|
||||
struct intel_overlay *overlay =
|
||||
|
@ -421,8 +422,8 @@ static int intel_overlay_off(struct intel_overlay *overlay)
|
|||
* We have to be careful not to repeat work forever an make forward progess. */
|
||||
static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
|
||||
{
|
||||
return i915_gem_active_retire(&overlay->last_flip,
|
||||
&overlay->i915->drm.struct_mutex);
|
||||
return i915_active_request_retire(&overlay->last_flip,
|
||||
&overlay->i915->drm.struct_mutex);
|
||||
}
|
||||
|
||||
/* Wait for pending overlay flip and release old frame.
|
||||
|
@ -1355,7 +1356,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
|
|||
overlay->contrast = 75;
|
||||
overlay->saturation = 146;
|
||||
|
||||
init_request_active(&overlay->last_flip, NULL);
|
||||
INIT_ACTIVE_REQUEST(&overlay->last_flip);
|
||||
|
||||
mutex_lock(&dev_priv->drm.struct_mutex);
|
||||
|
||||
|
|
|
@ -15,8 +15,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
|
|||
|
||||
spin_lock_init(&timeline->lock);
|
||||
|
||||
init_request_active(&timeline->barrier, NULL);
|
||||
init_request_active(&timeline->last_request, NULL);
|
||||
INIT_ACTIVE_REQUEST(&timeline->barrier);
|
||||
INIT_ACTIVE_REQUEST(&timeline->last_request);
|
||||
INIT_LIST_HEAD(&timeline->requests);
|
||||
|
||||
i915_syncmap_init(&timeline->sync);
|
||||
|
|
Загрузка…
Ссылка в новой задаче