drm/i915/guc: Implement GuC priority management

Implement a simple static mapping algorithm of the i915 priority levels
(int, -1k to 1k exposed to user) to the 4 GuC levels. Mapping is as
follows:

i915 level < 0          -> GuC low level     (3)
i915 level == 0         -> GuC normal level  (2)
i915 level < INT_MAX    -> GuC high level    (1)
i915 level == INT_MAX   -> GuC highest level (0)

We believe this mapping should cover the UMD use cases (3 distinct user
levels + 1 kernel level).

In addition to static mapping, a simple counter system is attached to
each context tracking the number of requests inflight on the context at
each level. This is needed as the GuC levels are per context while in
the i915 levels are per request.

v2:
 (Daniele)
  - Add BUILD_BUG_ON to enforce ordering of priority levels
  - Add missing lockdep to guc_prio_fini
  - Check for return before setting context registered flag
  - Map DISPLAY priority or higher to highest guc prio
  - Update comment for guc_prio

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-33-matthew.brost@intel.com
This commit is contained in:
Matthew Brost 2021-07-26 17:23:47 -07:00 коммит произвёл John Harrison
Родитель 3a7b72665e
Коммит ee242ca704
10 изменённых файлов: 282 добавлений и 5 удалений

Просмотреть файл

@ -245,6 +245,9 @@ static void signal_irq_work(struct irq_work *work)
llist_entry(signal, typeof(*rq), signal_node);
struct list_head cb_list;
if (rq->engine->sched_engine->retire_inflight_request_prio)
rq->engine->sched_engine->retire_inflight_request_prio(rq);
spin_lock(&rq->lock);
list_replace(&rq->fence.cb_list, &cb_list);
__dma_fence_signal__timestamp(&rq->fence, timestamp);

Просмотреть файл

@ -18,8 +18,9 @@
#include "intel_engine_types.h"
#include "intel_sseu.h"
#define CONTEXT_REDZONE POISON_INUSE
#include "uc/intel_guc_fwif.h"
#define CONTEXT_REDZONE POISON_INUSE
DECLARE_EWMA(runtime, 3, 8);
struct i915_gem_context;
@ -191,6 +192,12 @@ struct intel_context {
/* GuC context blocked fence */
struct i915_sw_fence guc_blocked;
/*
* GuC priority management
*/
u8 guc_prio;
u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM];
};
#endif /* __INTEL_CONTEXT_TYPES__ */

Просмотреть файл

@ -11,6 +11,7 @@
#include "intel_engine.h"
#include "intel_engine_user.h"
#include "intel_gt.h"
#include "uc/intel_guc_submission.h"
struct intel_engine_cs *
intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
@ -115,6 +116,9 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
disabled |= (I915_SCHEDULER_CAP_ENABLED |
I915_SCHEDULER_CAP_PRIORITY);
if (intel_uc_uses_guc_submission(&i915->gt.uc))
enabled |= I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP;
for (i = 0; i < ARRAY_SIZE(map); i++) {
if (engine->flags & BIT(map[i].engine))
enabled |= BIT(map[i].sched);

Просмотреть файл

@ -81,6 +81,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
*/
#define SCHED_STATE_NO_LOCK_ENABLED BIT(0)
#define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1)
#define SCHED_STATE_NO_LOCK_REGISTERED BIT(2)
static inline bool context_enabled(struct intel_context *ce)
{
return (atomic_read(&ce->guc_sched_state_no_lock) &
@ -116,6 +117,24 @@ static inline void clr_context_pending_enable(struct intel_context *ce)
&ce->guc_sched_state_no_lock);
}
static inline bool context_registered(struct intel_context *ce)
{
return (atomic_read(&ce->guc_sched_state_no_lock) &
SCHED_STATE_NO_LOCK_REGISTERED);
}
static inline void set_context_registered(struct intel_context *ce)
{
atomic_or(SCHED_STATE_NO_LOCK_REGISTERED,
&ce->guc_sched_state_no_lock);
}
static inline void clr_context_registered(struct intel_context *ce)
{
atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED,
&ce->guc_sched_state_no_lock);
}
/*
* Below is a set of functions which control the GuC scheduling state which
* require a lock, aside from the special case where the functions are called
@ -1092,6 +1111,7 @@ static int steal_guc_id(struct intel_guc *guc)
list_del_init(&ce->guc_id_link);
guc_id = ce->guc_id;
clr_context_registered(ce);
set_context_guc_id_invalid(ce);
return guc_id;
} else {
@ -1201,10 +1221,15 @@ static int register_context(struct intel_context *ce, bool loop)
struct intel_guc *guc = ce_to_guc(ce);
u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) +
ce->guc_id * sizeof(struct guc_lrc_desc);
int ret;
trace_intel_context_register(ce);
return __guc_action_register_context(guc, ce->guc_id, offset, loop);
ret = __guc_action_register_context(guc, ce->guc_id, offset, loop);
if (likely(!ret))
set_context_registered(ce);
return ret;
}
static int __guc_action_deregister_context(struct intel_guc *guc,
@ -1260,6 +1285,8 @@ static void guc_context_policy_init(struct intel_engine_cs *engine,
desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
}
static inline u8 map_i915_prio_to_guc_prio(int prio);
static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
{
struct intel_engine_cs *engine = ce->engine;
@ -1267,6 +1294,8 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
struct intel_guc *guc = &engine->gt->uc.guc;
u32 desc_idx = ce->guc_id;
struct guc_lrc_desc *desc;
const struct i915_gem_context *ctx;
int prio = I915_CONTEXT_DEFAULT_PRIORITY;
bool context_registered;
intel_wakeref_t wakeref;
int ret = 0;
@ -1282,6 +1311,12 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
context_registered = lrc_desc_registered(guc, desc_idx);
rcu_read_lock();
ctx = rcu_dereference(ce->gem_context);
if (ctx)
prio = ctx->sched.priority;
rcu_read_unlock();
reset_lrc_desc(guc, desc_idx);
set_lrc_desc_registered(guc, desc_idx, ce);
@ -1290,7 +1325,8 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
desc->engine_submit_mask = adjust_engine_mask(engine->class,
engine->mask);
desc->hw_context_desc = ce->lrc.lrca;
desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
ce->guc_prio = map_i915_prio_to_guc_prio(prio);
desc->priority = ce->guc_prio;
desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
guc_context_policy_init(engine, desc);
init_sched_state(ce);
@ -1693,11 +1729,17 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
GEM_BUG_ON(ce != __get_context(guc, ce->guc_id));
GEM_BUG_ON(context_enabled(ce));
clr_context_registered(ce);
deregister_context(ce, ce->guc_id, true);
}
static void __guc_context_destroy(struct intel_context *ce)
{
GEM_BUG_ON(ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||
ce->guc_prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
ce->guc_prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
lrc_fini(ce);
intel_context_fini(ce);
@ -1791,15 +1833,124 @@ static int guc_context_alloc(struct intel_context *ce)
return lrc_alloc(ce, ce->engine);
}
static void guc_context_set_prio(struct intel_guc *guc,
struct intel_context *ce,
u8 prio)
{
u32 action[] = {
INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY,
ce->guc_id,
prio,
};
GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||
prio > GUC_CLIENT_PRIORITY_NORMAL);
if (ce->guc_prio == prio || submission_disabled(guc) ||
!context_registered(ce))
return;
guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
ce->guc_prio = prio;
trace_intel_context_set_prio(ce);
}
static inline u8 map_i915_prio_to_guc_prio(int prio)
{
if (prio == I915_PRIORITY_NORMAL)
return GUC_CLIENT_PRIORITY_KMD_NORMAL;
else if (prio < I915_PRIORITY_NORMAL)
return GUC_CLIENT_PRIORITY_NORMAL;
else if (prio < I915_PRIORITY_DISPLAY)
return GUC_CLIENT_PRIORITY_HIGH;
else
return GUC_CLIENT_PRIORITY_KMD_HIGH;
}
static inline void add_context_inflight_prio(struct intel_context *ce,
u8 guc_prio)
{
lockdep_assert_held(&ce->guc_active.lock);
GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count));
++ce->guc_prio_count[guc_prio];
/* Overflow protection */
GEM_WARN_ON(!ce->guc_prio_count[guc_prio]);
}
static inline void sub_context_inflight_prio(struct intel_context *ce,
u8 guc_prio)
{
lockdep_assert_held(&ce->guc_active.lock);
GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count));
/* Underflow protection */
GEM_WARN_ON(!ce->guc_prio_count[guc_prio]);
--ce->guc_prio_count[guc_prio];
}
static inline void update_context_prio(struct intel_context *ce)
{
struct intel_guc *guc = &ce->engine->gt->uc.guc;
int i;
BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0);
BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL);
lockdep_assert_held(&ce->guc_active.lock);
for (i = 0; i < ARRAY_SIZE(ce->guc_prio_count); ++i) {
if (ce->guc_prio_count[i]) {
guc_context_set_prio(guc, ce, i);
break;
}
}
}
static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
{
/* Lower value is higher priority */
return new_guc_prio < old_guc_prio;
}
static void add_to_context(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI);
spin_lock(&ce->guc_active.lock);
list_move_tail(&rq->sched.link, &ce->guc_active.requests);
if (rq->guc_prio == GUC_PRIO_INIT) {
rq->guc_prio = new_guc_prio;
add_context_inflight_prio(ce, rq->guc_prio);
} else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) {
sub_context_inflight_prio(ce, rq->guc_prio);
rq->guc_prio = new_guc_prio;
add_context_inflight_prio(ce, rq->guc_prio);
}
update_context_prio(ce);
spin_unlock(&ce->guc_active.lock);
}
static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
{
lockdep_assert_held(&ce->guc_active.lock);
if (rq->guc_prio != GUC_PRIO_INIT &&
rq->guc_prio != GUC_PRIO_FINI) {
sub_context_inflight_prio(ce, rq->guc_prio);
update_context_prio(ce);
}
rq->guc_prio = GUC_PRIO_FINI;
}
static void remove_from_context(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
@ -1812,6 +1963,8 @@ static void remove_from_context(struct i915_request *rq)
/* Prevent further __await_execution() registering a cb, then flush */
set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
guc_prio_fini(rq, ce);
spin_unlock_irq(&ce->guc_active.lock);
atomic_dec(&ce->guc_id_ref);
@ -2093,6 +2246,39 @@ static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
}
}
static void guc_bump_inflight_request_prio(struct i915_request *rq,
int prio)
{
struct intel_context *ce = rq->context;
u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
/* Short circuit function */
if (prio < I915_PRIORITY_NORMAL ||
rq->guc_prio == GUC_PRIO_FINI ||
(rq->guc_prio != GUC_PRIO_INIT &&
!new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
return;
spin_lock(&ce->guc_active.lock);
if (rq->guc_prio != GUC_PRIO_FINI) {
if (rq->guc_prio != GUC_PRIO_INIT)
sub_context_inflight_prio(ce, rq->guc_prio);
rq->guc_prio = new_guc_prio;
add_context_inflight_prio(ce, rq->guc_prio);
update_context_prio(ce);
}
spin_unlock(&ce->guc_active.lock);
}
static void guc_retire_inflight_request_prio(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
spin_lock(&ce->guc_active.lock);
guc_prio_fini(rq, ce);
spin_unlock(&ce->guc_active.lock);
}
static void sanitize_hwsp(struct intel_engine_cs *engine)
{
struct intel_timeline *tl;
@ -2317,6 +2503,10 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
guc->sched_engine->disabled = guc_sched_engine_disabled;
guc->sched_engine->private_data = guc;
guc->sched_engine->destroy = guc_sched_engine_destroy;
guc->sched_engine->bump_inflight_request_prio =
guc_bump_inflight_request_prio;
guc->sched_engine->retire_inflight_request_prio =
guc_retire_inflight_request_prio;
tasklet_setup(&guc->sched_engine->tasklet,
guc_submission_tasklet);
}
@ -2694,6 +2884,22 @@ void intel_guc_submission_print_info(struct intel_guc *guc,
drm_printf(p, "\n");
}
static inline void guc_log_context_priority(struct drm_printer *p,
struct intel_context *ce)
{
int i;
drm_printf(p, "\t\tPriority: %d\n",
ce->guc_prio);
drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
for (i = GUC_CLIENT_PRIORITY_KMD_HIGH;
i < GUC_CLIENT_PRIORITY_NUM; ++i) {
drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
i, ce->guc_prio_count[i]);
}
drm_printf(p, "\n");
}
void intel_guc_submission_print_context_info(struct intel_guc *guc,
struct drm_printer *p)
{
@ -2716,6 +2922,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
drm_printf(p, "\t\tSchedule State: 0x%x, 0x%x\n\n",
ce->guc_state.sched_state,
atomic_read(&ce->guc_sched_state_no_lock));
guc_log_context_priority(p, ce);
}
}

Просмотреть файл

@ -114,6 +114,9 @@ static void i915_fence_release(struct dma_fence *fence)
{
struct i915_request *rq = to_request(fence);
GEM_BUG_ON(rq->guc_prio != GUC_PRIO_INIT &&
rq->guc_prio != GUC_PRIO_FINI);
/*
* The request is put onto a RCU freelist (i.e. the address
* is immediately reused), mark the fences as being freed now.
@ -924,6 +927,8 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
rq->guc_prio = GUC_PRIO_INIT;
/* We bump the ref for the fence chain */
i915_sw_fence_reinit(&i915_request_get(rq)->submit);
i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);

Просмотреть файл

@ -293,6 +293,15 @@ struct i915_request {
*/
struct list_head guc_fence_link;
/**
* Priority level while the request is inflight. Differs from i915
* scheduler priority. See comment above
* I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP for details.
*/
#define GUC_PRIO_INIT 0xff
#define GUC_PRIO_FINI 0xfe
u8 guc_prio;
I915_SELFTEST_DECLARE(struct {
struct list_head link;
unsigned long delay;

Просмотреть файл

@ -241,6 +241,9 @@ static void __i915_schedule(struct i915_sched_node *node,
/* Fifo and depth-first replacement ensure our deps execute before us */
sched_engine = lock_sched_engine(node, sched_engine, &cache);
list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
struct i915_request *from = container_of(dep->signaler,
struct i915_request,
sched);
INIT_LIST_HEAD(&dep->dfs_link);
node = dep->signaler;
@ -254,6 +257,10 @@ static void __i915_schedule(struct i915_sched_node *node,
GEM_BUG_ON(node_to_request(node)->engine->sched_engine !=
sched_engine);
/* Must be called before changing the nodes priority */
if (sched_engine->bump_inflight_request_prio)
sched_engine->bump_inflight_request_prio(from, prio);
WRITE_ONCE(node->attr.priority, prio);
/*

Просмотреть файл

@ -179,6 +179,18 @@ struct i915_sched_engine {
void (*kick_backend)(const struct i915_request *rq,
int prio);
/**
* @bump_inflight_request_prio: update priority of an inflight request
*/
void (*bump_inflight_request_prio)(struct i915_request *rq,
int prio);
/**
* @retire_inflight_request_prio: indicate request is retired to
* priority tracking
*/
void (*retire_inflight_request_prio)(struct i915_request *rq);
/**
* @schedule: adjust priority of request
*

Просмотреть файл

@ -904,6 +904,7 @@ DECLARE_EVENT_CLASS(intel_context,
__field(int, pin_count)
__field(u32, sched_state)
__field(u32, guc_sched_state_no_lock)
__field(u8, guc_prio)
),
TP_fast_assign(
@ -912,12 +913,19 @@ DECLARE_EVENT_CLASS(intel_context,
__entry->sched_state = ce->guc_state.sched_state;
__entry->guc_sched_state_no_lock =
atomic_read(&ce->guc_sched_state_no_lock);
__entry->guc_prio = ce->guc_prio;
),
TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x,0x%x",
TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x,0x%x, guc_prio=%u",
__entry->guc_id, __entry->pin_count,
__entry->sched_state,
__entry->guc_sched_state_no_lock)
__entry->guc_sched_state_no_lock,
__entry->guc_prio)
);
DEFINE_EVENT(intel_context, intel_context_set_prio,
TP_PROTO(struct intel_context *ce),
TP_ARGS(ce)
);
DEFINE_EVENT(intel_context, intel_context_reset,
@ -1017,6 +1025,11 @@ trace_i915_request_out(struct i915_request *rq)
{
}
static inline void
trace_intel_context_set_prio(struct intel_context *ce)
{
}
static inline void
trace_intel_context_reset(struct intel_context *ce)
{

Просмотреть файл

@ -572,6 +572,15 @@ typedef struct drm_i915_irq_wait {
#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2)
#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3)
#define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4)
/*
* Indicates the 2k user priority levels are statically mapped into 3 buckets as
* follows:
*
* -1k to -1 Low priority
* 0 Normal priority
* 1 to 1k Highest priority
*/
#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5)
#define I915_PARAM_HUC_STATUS 42