drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
If we only call process_csb() from the tasklet, though we lose the ability to bypass ksoftirqd interrupt processing on direct submission paths, we can push it out of the irq-off spinlock. The penalty is that we then allow schedule_out to be called concurrently with schedule_in requiring us to handle the usage count (baked into the pointer itself) atomically. As we do kick the tasklets (via local_bh_enable()) after our submission, there is a possibility there to see if we can pull the local softirq processing back from the ksoftirqd. v2: Store the 'switch_priority_hint' on submission, so that we can safely check during process_csb(). Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190816171608.11760-1-chris@chris-wilson.co.uk
This commit is contained in:
Родитель
25ffd4b11d
Коммит
df40306902
|
@ -42,8 +42,6 @@ struct intel_context {
|
||||||
struct intel_engine_cs *inflight;
|
struct intel_engine_cs *inflight;
|
||||||
#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
|
#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
|
||||||
#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
|
#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
|
||||||
#define intel_context_inflight_inc(ce) ptr_count_inc(&(ce)->inflight)
|
|
||||||
#define intel_context_inflight_dec(ce) ptr_count_dec(&(ce)->inflight)
|
|
||||||
|
|
||||||
struct i915_address_space *vm;
|
struct i915_address_space *vm;
|
||||||
struct i915_gem_context *gem_context;
|
struct i915_gem_context *gem_context;
|
||||||
|
|
|
@ -1459,7 +1459,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
|
||||||
|
|
||||||
for (port = execlists->pending; (rq = *port); port++) {
|
for (port = execlists->pending; (rq = *port); port++) {
|
||||||
/* Exclude any contexts already counted in active */
|
/* Exclude any contexts already counted in active */
|
||||||
if (intel_context_inflight_count(rq->hw_context) == 1)
|
if (!intel_context_inflight_count(rq->hw_context))
|
||||||
engine->stats.active++;
|
engine->stats.active++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -204,6 +204,16 @@ struct intel_engine_execlists {
|
||||||
*/
|
*/
|
||||||
unsigned int port_mask;
|
unsigned int port_mask;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @switch_priority_hint: Second context priority.
|
||||||
|
*
|
||||||
|
* We submit multiple contexts to the HW simultaneously and would
|
||||||
|
* like to occasionally switch between them to emulate timeslicing.
|
||||||
|
* To know when timeslicing is suitable, we track the priority of
|
||||||
|
* the context submitted second.
|
||||||
|
*/
|
||||||
|
int switch_priority_hint;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @queue_priority_hint: Highest pending priority.
|
* @queue_priority_hint: Highest pending priority.
|
||||||
*
|
*
|
||||||
|
|
|
@ -547,27 +547,39 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
|
||||||
status, rq);
|
status, rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct intel_engine_cs *
|
||||||
|
__execlists_schedule_in(struct i915_request *rq)
|
||||||
|
{
|
||||||
|
struct intel_engine_cs * const engine = rq->engine;
|
||||||
|
struct intel_context * const ce = rq->hw_context;
|
||||||
|
|
||||||
|
intel_context_get(ce);
|
||||||
|
|
||||||
|
intel_gt_pm_get(engine->gt);
|
||||||
|
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
|
||||||
|
intel_engine_context_in(engine);
|
||||||
|
|
||||||
|
return engine;
|
||||||
|
}
|
||||||
|
|
||||||
static inline struct i915_request *
|
static inline struct i915_request *
|
||||||
execlists_schedule_in(struct i915_request *rq, int idx)
|
execlists_schedule_in(struct i915_request *rq, int idx)
|
||||||
{
|
{
|
||||||
struct intel_context *ce = rq->hw_context;
|
struct intel_context * const ce = rq->hw_context;
|
||||||
int count;
|
struct intel_engine_cs *old;
|
||||||
|
|
||||||
|
GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
|
||||||
trace_i915_request_in(rq, idx);
|
trace_i915_request_in(rq, idx);
|
||||||
|
|
||||||
count = intel_context_inflight_count(ce);
|
old = READ_ONCE(ce->inflight);
|
||||||
if (!count) {
|
do {
|
||||||
intel_context_get(ce);
|
if (!old) {
|
||||||
ce->inflight = rq->engine;
|
WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
|
||||||
|
break;
|
||||||
intel_gt_pm_get(ce->inflight->gt);
|
|
||||||
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
|
|
||||||
intel_engine_context_in(ce->inflight);
|
|
||||||
}
|
}
|
||||||
|
} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
|
||||||
|
|
||||||
intel_context_inflight_inc(ce);
|
|
||||||
GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
|
GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
|
||||||
|
|
||||||
return i915_request_get(rq);
|
return i915_request_get(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -581,19 +593,14 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
execlists_schedule_out(struct i915_request *rq)
|
__execlists_schedule_out(struct i915_request *rq,
|
||||||
|
struct intel_engine_cs * const engine)
|
||||||
{
|
{
|
||||||
struct intel_context *ce = rq->hw_context;
|
struct intel_context * const ce = rq->hw_context;
|
||||||
|
|
||||||
GEM_BUG_ON(!intel_context_inflight_count(ce));
|
intel_engine_context_out(engine);
|
||||||
|
|
||||||
trace_i915_request_out(rq);
|
|
||||||
|
|
||||||
intel_context_inflight_dec(ce);
|
|
||||||
if (!intel_context_inflight_count(ce)) {
|
|
||||||
intel_engine_context_out(ce->inflight);
|
|
||||||
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
|
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
|
||||||
intel_gt_pm_put(ce->inflight->gt);
|
intel_gt_pm_put(engine->gt);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this is part of a virtual engine, its next request may
|
* If this is part of a virtual engine, its next request may
|
||||||
|
@ -604,13 +611,28 @@ execlists_schedule_out(struct i915_request *rq)
|
||||||
* request before the tasklet runs and do not need to rebuild
|
* request before the tasklet runs and do not need to rebuild
|
||||||
* each virtual tree and kick everyone again.
|
* each virtual tree and kick everyone again.
|
||||||
*/
|
*/
|
||||||
ce->inflight = NULL;
|
if (ce->engine != engine)
|
||||||
if (rq->engine != ce->engine)
|
|
||||||
kick_siblings(rq, ce);
|
kick_siblings(rq, ce);
|
||||||
|
|
||||||
intel_context_put(ce);
|
intel_context_put(ce);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
execlists_schedule_out(struct i915_request *rq)
|
||||||
|
{
|
||||||
|
struct intel_context * const ce = rq->hw_context;
|
||||||
|
struct intel_engine_cs *cur, *old;
|
||||||
|
|
||||||
|
trace_i915_request_out(rq);
|
||||||
|
GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
|
||||||
|
|
||||||
|
old = READ_ONCE(ce->inflight);
|
||||||
|
do
|
||||||
|
cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
|
||||||
|
while (!try_cmpxchg(&ce->inflight, &old, cur));
|
||||||
|
if (!cur)
|
||||||
|
__execlists_schedule_out(rq, old);
|
||||||
|
|
||||||
i915_request_put(rq);
|
i915_request_put(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -684,6 +706,9 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
|
||||||
|
|
||||||
trace_ports(execlists, msg, execlists->pending);
|
trace_ports(execlists, msg, execlists->pending);
|
||||||
|
|
||||||
|
if (!execlists->pending[0])
|
||||||
|
return false;
|
||||||
|
|
||||||
if (execlists->pending[execlists_num_ports(execlists)])
|
if (execlists->pending[execlists_num_ports(execlists)])
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -941,12 +966,24 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
|
||||||
return hint >= effective_prio(rq);
|
return hint >= effective_prio(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static int
|
||||||
enable_timeslice(struct intel_engine_cs *engine)
|
switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
|
||||||
{
|
{
|
||||||
struct i915_request *last = last_active(&engine->execlists);
|
if (list_is_last(&rq->sched.link, &engine->active.requests))
|
||||||
|
return INT_MIN;
|
||||||
|
|
||||||
return last && need_timeslice(engine, last);
|
return rq_prio(list_next_entry(rq, sched.link));
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
enable_timeslice(const struct intel_engine_execlists *execlists)
|
||||||
|
{
|
||||||
|
const struct i915_request *rq = *execlists->active;
|
||||||
|
|
||||||
|
if (i915_request_completed(rq))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return execlists->switch_priority_hint >= effective_prio(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void record_preemption(struct intel_engine_execlists *execlists)
|
static void record_preemption(struct intel_engine_execlists *execlists)
|
||||||
|
@ -1292,6 +1329,8 @@ done:
|
||||||
*port = execlists_schedule_in(last, port - execlists->pending);
|
*port = execlists_schedule_in(last, port - execlists->pending);
|
||||||
memset(port + 1, 0, (last_port - port) * sizeof(*port));
|
memset(port + 1, 0, (last_port - port) * sizeof(*port));
|
||||||
execlists_submit_ports(engine);
|
execlists_submit_ports(engine);
|
||||||
|
execlists->switch_priority_hint =
|
||||||
|
switch_prio(engine, *execlists->pending);
|
||||||
} else {
|
} else {
|
||||||
ring_set_paused(engine, 0);
|
ring_set_paused(engine, 0);
|
||||||
}
|
}
|
||||||
|
@ -1356,7 +1395,6 @@ static void process_csb(struct intel_engine_cs *engine)
|
||||||
const u8 num_entries = execlists->csb_size;
|
const u8 num_entries = execlists->csb_size;
|
||||||
u8 head, tail;
|
u8 head, tail;
|
||||||
|
|
||||||
lockdep_assert_held(&engine->active.lock);
|
|
||||||
GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
|
GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1427,15 +1465,14 @@ static void process_csb(struct intel_engine_cs *engine)
|
||||||
execlists->pending,
|
execlists->pending,
|
||||||
execlists_num_ports(execlists) *
|
execlists_num_ports(execlists) *
|
||||||
sizeof(*execlists->pending));
|
sizeof(*execlists->pending));
|
||||||
execlists->pending[0] = NULL;
|
|
||||||
|
|
||||||
trace_ports(execlists, "promoted", execlists->active);
|
if (enable_timeslice(execlists))
|
||||||
|
|
||||||
if (enable_timeslice(engine))
|
|
||||||
mod_timer(&execlists->timer, jiffies + 1);
|
mod_timer(&execlists->timer, jiffies + 1);
|
||||||
|
|
||||||
if (!inject_preempt_hang(execlists))
|
if (!inject_preempt_hang(execlists))
|
||||||
ring_set_paused(engine, 0);
|
ring_set_paused(engine, 0);
|
||||||
|
|
||||||
|
WRITE_ONCE(execlists->pending[0], NULL);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CSB_COMPLETE: /* port0 completed, advanced to port1 */
|
case CSB_COMPLETE: /* port0 completed, advanced to port1 */
|
||||||
|
@ -1479,8 +1516,6 @@ static void process_csb(struct intel_engine_cs *engine)
|
||||||
static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
|
static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
|
||||||
{
|
{
|
||||||
lockdep_assert_held(&engine->active.lock);
|
lockdep_assert_held(&engine->active.lock);
|
||||||
|
|
||||||
process_csb(engine);
|
|
||||||
if (!engine->execlists.pending[0])
|
if (!engine->execlists.pending[0])
|
||||||
execlists_dequeue(engine);
|
execlists_dequeue(engine);
|
||||||
}
|
}
|
||||||
|
@ -1494,10 +1529,13 @@ static void execlists_submission_tasklet(unsigned long data)
|
||||||
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
|
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
|
process_csb(engine);
|
||||||
|
if (!READ_ONCE(engine->execlists.pending[0])) {
|
||||||
spin_lock_irqsave(&engine->active.lock, flags);
|
spin_lock_irqsave(&engine->active.lock, flags);
|
||||||
__execlists_submission_tasklet(engine);
|
__execlists_submission_tasklet(engine);
|
||||||
spin_unlock_irqrestore(&engine->active.lock, flags);
|
spin_unlock_irqrestore(&engine->active.lock, flags);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void execlists_submission_timer(struct timer_list *timer)
|
static void execlists_submission_timer(struct timer_list *timer)
|
||||||
{
|
{
|
||||||
|
|
|
@ -161,17 +161,15 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
|
||||||
((typeof(ptr))((unsigned long)(ptr) | __bits)); \
|
((typeof(ptr))((unsigned long)(ptr) | __bits)); \
|
||||||
})
|
})
|
||||||
|
|
||||||
#define ptr_count_dec(p_ptr) do { \
|
#define ptr_dec(ptr) ({ \
|
||||||
typeof(p_ptr) __p = (p_ptr); \
|
unsigned long __v = (unsigned long)(ptr); \
|
||||||
unsigned long __v = (unsigned long)(*__p); \
|
(typeof(ptr))(__v - 1); \
|
||||||
*__p = (typeof(*p_ptr))(--__v); \
|
})
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define ptr_count_inc(p_ptr) do { \
|
#define ptr_inc(ptr) ({ \
|
||||||
typeof(p_ptr) __p = (p_ptr); \
|
unsigned long __v = (unsigned long)(ptr); \
|
||||||
unsigned long __v = (unsigned long)(*__p); \
|
(typeof(ptr))(__v + 1); \
|
||||||
*__p = (typeof(*p_ptr))(++__v); \
|
})
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
|
#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
|
||||||
#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
|
#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
|
||||||
|
|
Загрузка…
Ссылка в новой задаче