blk-iocost: revamp debt handling
Debt handling had several issues. * How much inuse a debtor carries wasn't clearly defined. inuse would be driven down over time from not issuing IOs but it'd be better to clamp it to minimum immediately once in debt. * How much can be paid off was determined by hweight_inuse. As inuse was driven down, the payment amount would fall together regardless of the debtor's active weight. This means that the debtors were punished harshly. * ioc_rqos_merge() wasn't calling blkcg_schedule_throttle() after iocg_kick_delay(). This patch revamps debt handling so that * Debt handling owns inuse for iocgs in debt and keeps them at zero. * Payment amount is determined by hweight_active. This is more deterministic and safer than hweight_inuse but still far from ideal in that it doesn't factor in possible donations from other iocgs for debt payments. This likely needs further improvements in the future. * iocg_rqos_merge() now calls blkcg_schedule_throttle() as necessary. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Andy Newell <newella@fb.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Родитель
b0853ab4a2
Коммит
c421a3eb2e
|
@ -1206,13 +1206,13 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
|
||||||
struct blkcg_gq *blkg = iocg_to_blkg(iocg);
|
struct blkcg_gq *blkg = iocg_to_blkg(iocg);
|
||||||
u64 vtime = atomic64_read(&iocg->vtime);
|
u64 vtime = atomic64_read(&iocg->vtime);
|
||||||
u64 delta_ns, expires, oexpires;
|
u64 delta_ns, expires, oexpires;
|
||||||
u32 hw_inuse;
|
u32 hwa;
|
||||||
|
|
||||||
lockdep_assert_held(&iocg->waitq.lock);
|
lockdep_assert_held(&iocg->waitq.lock);
|
||||||
|
|
||||||
/* debt-adjust vtime */
|
/* debt-adjust vtime */
|
||||||
current_hweight(iocg, NULL, &hw_inuse);
|
current_hweight(iocg, &hwa, NULL);
|
||||||
vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
|
vtime += abs_cost_to_cost(iocg->abs_vdebt, hwa);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clear or maintain depending on the overage. Non-zero vdebt is what
|
* Clear or maintain depending on the overage. Non-zero vdebt is what
|
||||||
|
@ -1258,6 +1258,47 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
|
||||||
return HRTIMER_NORESTART;
|
return HRTIMER_NORESTART;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void iocg_incur_debt(struct ioc_gq *iocg, u64 abs_cost,
|
||||||
|
struct ioc_now *now)
|
||||||
|
{
|
||||||
|
struct iocg_pcpu_stat *gcs;
|
||||||
|
|
||||||
|
lockdep_assert_held(&iocg->ioc->lock);
|
||||||
|
lockdep_assert_held(&iocg->waitq.lock);
|
||||||
|
WARN_ON_ONCE(list_empty(&iocg->active_list));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Once in debt, debt handling owns inuse. @iocg stays at the minimum
|
||||||
|
* inuse donating all of it share to others until its debt is paid off.
|
||||||
|
*/
|
||||||
|
if (!iocg->abs_vdebt && abs_cost)
|
||||||
|
propagate_weights(iocg, iocg->active, 0, false, now);
|
||||||
|
|
||||||
|
iocg->abs_vdebt += abs_cost;
|
||||||
|
|
||||||
|
gcs = get_cpu_ptr(iocg->pcpu_stat);
|
||||||
|
local64_add(abs_cost, &gcs->abs_vusage);
|
||||||
|
put_cpu_ptr(gcs);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay,
|
||||||
|
struct ioc_now *now)
|
||||||
|
{
|
||||||
|
lockdep_assert_held(&iocg->ioc->lock);
|
||||||
|
lockdep_assert_held(&iocg->waitq.lock);
|
||||||
|
|
||||||
|
/* make sure that nobody messed with @iocg */
|
||||||
|
WARN_ON_ONCE(list_empty(&iocg->active_list));
|
||||||
|
WARN_ON_ONCE(iocg->inuse > 1);
|
||||||
|
|
||||||
|
iocg->abs_vdebt -= min(abs_vpay, iocg->abs_vdebt);
|
||||||
|
|
||||||
|
/* if debt is paid in full, restore inuse */
|
||||||
|
if (!iocg->abs_vdebt)
|
||||||
|
propagate_weights(iocg, iocg->active, iocg->last_inuse,
|
||||||
|
false, now);
|
||||||
|
}
|
||||||
|
|
||||||
static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode,
|
static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode,
|
||||||
int flags, void *key)
|
int flags, void *key)
|
||||||
{
|
{
|
||||||
|
@ -1296,26 +1337,25 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, bool pay_debt,
|
||||||
struct iocg_wake_ctx ctx = { .iocg = iocg };
|
struct iocg_wake_ctx ctx = { .iocg = iocg };
|
||||||
u64 vshortage, expires, oexpires;
|
u64 vshortage, expires, oexpires;
|
||||||
s64 vbudget;
|
s64 vbudget;
|
||||||
u32 hw_inuse;
|
u32 hwa;
|
||||||
|
|
||||||
lockdep_assert_held(&iocg->waitq.lock);
|
lockdep_assert_held(&iocg->waitq.lock);
|
||||||
|
|
||||||
current_hweight(iocg, NULL, &hw_inuse);
|
current_hweight(iocg, &hwa, NULL);
|
||||||
vbudget = now->vnow - atomic64_read(&iocg->vtime);
|
vbudget = now->vnow - atomic64_read(&iocg->vtime);
|
||||||
|
|
||||||
/* pay off debt */
|
/* pay off debt */
|
||||||
if (pay_debt && iocg->abs_vdebt && vbudget > 0) {
|
if (pay_debt && iocg->abs_vdebt && vbudget > 0) {
|
||||||
u64 vdebt = abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
|
u64 abs_vbudget = cost_to_abs_cost(vbudget, hwa);
|
||||||
u64 delta = min_t(u64, vbudget, vdebt);
|
u64 abs_vpay = min_t(u64, abs_vbudget, iocg->abs_vdebt);
|
||||||
u64 abs_delta = min(cost_to_abs_cost(delta, hw_inuse),
|
u64 vpay = abs_cost_to_cost(abs_vpay, hwa);
|
||||||
iocg->abs_vdebt);
|
|
||||||
|
|
||||||
lockdep_assert_held(&ioc->lock);
|
lockdep_assert_held(&ioc->lock);
|
||||||
|
|
||||||
atomic64_add(delta, &iocg->vtime);
|
atomic64_add(vpay, &iocg->vtime);
|
||||||
atomic64_add(delta, &iocg->done_vtime);
|
atomic64_add(vpay, &iocg->done_vtime);
|
||||||
iocg->abs_vdebt -= abs_delta;
|
iocg_pay_debt(iocg, abs_vpay, now);
|
||||||
vbudget -= vdebt;
|
vbudget -= vpay;
|
||||||
|
|
||||||
iocg_kick_delay(iocg, now);
|
iocg_kick_delay(iocg, now);
|
||||||
}
|
}
|
||||||
|
@ -1327,17 +1367,20 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, bool pay_debt,
|
||||||
* not positive.
|
* not positive.
|
||||||
*/
|
*/
|
||||||
if (iocg->abs_vdebt) {
|
if (iocg->abs_vdebt) {
|
||||||
s64 vdebt = abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
|
s64 vdebt = abs_cost_to_cost(iocg->abs_vdebt, hwa);
|
||||||
vbudget = min_t(s64, 0, vbudget - vdebt);
|
vbudget = min_t(s64, 0, vbudget - vdebt);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wake up the ones which are due and see how much vtime we'll need
|
* Wake up the ones which are due and see how much vtime we'll need for
|
||||||
* for the next one.
|
* the next one. As paying off debt restores hw_inuse, it must be read
|
||||||
|
* after the above debt payment.
|
||||||
*/
|
*/
|
||||||
ctx.hw_inuse = hw_inuse;
|
|
||||||
ctx.vbudget = vbudget;
|
ctx.vbudget = vbudget;
|
||||||
|
current_hweight(iocg, NULL, &ctx.hw_inuse);
|
||||||
|
|
||||||
__wake_up_locked_key(&iocg->waitq, TASK_NORMAL, &ctx);
|
__wake_up_locked_key(&iocg->waitq, TASK_NORMAL, &ctx);
|
||||||
|
|
||||||
if (!waitqueue_active(&iocg->waitq))
|
if (!waitqueue_active(&iocg->waitq))
|
||||||
return;
|
return;
|
||||||
if (WARN_ON_ONCE(ctx.vbudget >= 0))
|
if (WARN_ON_ONCE(ctx.vbudget >= 0))
|
||||||
|
@ -1525,6 +1568,10 @@ static u32 hweight_after_donation(struct ioc_gq *iocg, u32 hwm, u32 usage,
|
||||||
u64 vtime = atomic64_read(&iocg->vtime);
|
u64 vtime = atomic64_read(&iocg->vtime);
|
||||||
s64 excess, delta, target, new_hwi;
|
s64 excess, delta, target, new_hwi;
|
||||||
|
|
||||||
|
/* debt handling owns inuse for debtors */
|
||||||
|
if (iocg->abs_vdebt)
|
||||||
|
return 1;
|
||||||
|
|
||||||
/* see whether minimum margin requirement is met */
|
/* see whether minimum margin requirement is met */
|
||||||
if (waitqueue_active(&iocg->waitq) ||
|
if (waitqueue_active(&iocg->waitq) ||
|
||||||
time_after64(vtime, now->vnow - ioc->margins.min))
|
time_after64(vtime, now->vnow - ioc->margins.min))
|
||||||
|
@ -1798,6 +1845,18 @@ static void transfer_surpluses(struct list_head *surpluses, struct ioc_now *now)
|
||||||
struct ioc_gq *parent = iocg->ancestors[iocg->level - 1];
|
struct ioc_gq *parent = iocg->ancestors[iocg->level - 1];
|
||||||
u32 inuse;
|
u32 inuse;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In-debt iocgs participated in the donation calculation with
|
||||||
|
* the minimum target hweight_inuse. Configuring inuse
|
||||||
|
* accordingly would work fine but debt handling expects
|
||||||
|
* @iocg->inuse stay at the minimum and we don't wanna
|
||||||
|
* interfere.
|
||||||
|
*/
|
||||||
|
if (iocg->abs_vdebt) {
|
||||||
|
WARN_ON_ONCE(iocg->inuse > 1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
/* w' = s' * b' / b'_p, note that b' == b'_t for donating leaves */
|
/* w' = s' * b' / b'_p, note that b' == b'_t for donating leaves */
|
||||||
inuse = DIV64_U64_ROUND_UP(
|
inuse = DIV64_U64_ROUND_UP(
|
||||||
parent->child_adjusted_sum * iocg->hweight_after_donation,
|
parent->child_adjusted_sum * iocg->hweight_after_donation,
|
||||||
|
@ -2081,6 +2140,10 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime,
|
||||||
cost = abs_cost_to_cost(abs_cost, hwi);
|
cost = abs_cost_to_cost(abs_cost, hwi);
|
||||||
margin = now->vnow - vtime - cost;
|
margin = now->vnow - vtime - cost;
|
||||||
|
|
||||||
|
/* debt handling owns inuse for debtors */
|
||||||
|
if (iocg->abs_vdebt)
|
||||||
|
return cost;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We only increase inuse during period and do so iff the margin has
|
* We only increase inuse during period and do so iff the margin has
|
||||||
* deteriorated since the previous adjustment.
|
* deteriorated since the previous adjustment.
|
||||||
|
@ -2092,7 +2155,7 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime,
|
||||||
spin_lock_irq(&ioc->lock);
|
spin_lock_irq(&ioc->lock);
|
||||||
|
|
||||||
/* we own inuse only when @iocg is in the normal active state */
|
/* we own inuse only when @iocg is in the normal active state */
|
||||||
if (list_empty(&iocg->active_list)) {
|
if (iocg->abs_vdebt || list_empty(&iocg->active_list)) {
|
||||||
spin_unlock_irq(&ioc->lock);
|
spin_unlock_irq(&ioc->lock);
|
||||||
return cost;
|
return cost;
|
||||||
}
|
}
|
||||||
|
@ -2266,7 +2329,7 @@ retry_lock:
|
||||||
* penalizing the cgroup and its descendants.
|
* penalizing the cgroup and its descendants.
|
||||||
*/
|
*/
|
||||||
if (use_debt) {
|
if (use_debt) {
|
||||||
iocg->abs_vdebt += abs_cost;
|
iocg_incur_debt(iocg, abs_cost, &now);
|
||||||
if (iocg_kick_delay(iocg, &now))
|
if (iocg_kick_delay(iocg, &now))
|
||||||
blkcg_schedule_throttle(rqos->q,
|
blkcg_schedule_throttle(rqos->q,
|
||||||
(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
|
(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
|
||||||
|
@ -2275,7 +2338,7 @@ retry_lock:
|
||||||
}
|
}
|
||||||
|
|
||||||
/* guarantee that iocgs w/ waiters have maximum inuse */
|
/* guarantee that iocgs w/ waiters have maximum inuse */
|
||||||
if (iocg->inuse != iocg->active) {
|
if (!iocg->abs_vdebt && iocg->inuse != iocg->active) {
|
||||||
if (!ioc_locked) {
|
if (!ioc_locked) {
|
||||||
iocg_unlock(iocg, false, &flags);
|
iocg_unlock(iocg, false, &flags);
|
||||||
ioc_locked = true;
|
ioc_locked = true;
|
||||||
|
@ -2363,14 +2426,20 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
|
||||||
* be for the vast majority of cases. See debt handling in
|
* be for the vast majority of cases. See debt handling in
|
||||||
* ioc_rqos_throttle() for details.
|
* ioc_rqos_throttle() for details.
|
||||||
*/
|
*/
|
||||||
spin_lock_irqsave(&iocg->waitq.lock, flags);
|
spin_lock_irqsave(&ioc->lock, flags);
|
||||||
|
spin_lock(&iocg->waitq.lock);
|
||||||
|
|
||||||
if (likely(!list_empty(&iocg->active_list))) {
|
if (likely(!list_empty(&iocg->active_list))) {
|
||||||
iocg->abs_vdebt += abs_cost;
|
iocg_incur_debt(iocg, abs_cost, &now);
|
||||||
iocg_kick_delay(iocg, &now);
|
if (iocg_kick_delay(iocg, &now))
|
||||||
|
blkcg_schedule_throttle(rqos->q,
|
||||||
|
(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
|
||||||
} else {
|
} else {
|
||||||
iocg_commit_bio(iocg, bio, abs_cost, cost);
|
iocg_commit_bio(iocg, bio, abs_cost, cost);
|
||||||
}
|
}
|
||||||
spin_unlock_irqrestore(&iocg->waitq.lock, flags);
|
|
||||||
|
spin_unlock(&iocg->waitq.lock);
|
||||||
|
spin_unlock_irqrestore(&ioc->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
|
static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
|
||||||
|
|
Загрузка…
Ссылка в новой задаче