for-5.19/block-2022-06-02
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmKZmfQQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpsLlEACPbK/ms8dMDwKjfEF/RMoc7uL/j6oC0cpf 0D2sfMka8D41QdrUfMiUismXZ61dyKdsiX/U/Q0gcjIomnlco8ZeLcLa6DlafjwY DtvO2aCb+eBAkII5sX2WM4ANNgFTy08Y4wBmgEy5En5u4nPlIGZ8DsulQQodqygx 1lJh31OXQKw+2kIyUdAeC0GMiD9nddYDsH0CTFDSZsAijCcOBDOHbDPk27wHapzM GR1UAK5/SA7RfZgIMRHHclF6Ea49/uPJ45crD1T+8p6jLW+ldbxpiRD3ux9BnK2v U7EWS5MLMFAvb/nTLc8T37srJuEhBAT0r2bn614rjOiJofalPeD0eDeHfz4vRpPe +qTQREtpBUtJizYN+8rpcxP8f9S/hmPOBvIKD3XC0TlOo1NCf35fqWLWMli2hkTQ AfcY1auKjC/UYcnR0TQ91aHo1puM4fK5Pdc6lDGznrcxy9t1g1NvKAEL9Y3xK3No paglrliBCUbAN8vogKr4jc7jRkh/GLEqkxV2LIpOVp3lyT9GepvYM1xLQ8X/rszn /Il3fAwf5AyP+1RoVcmmOy1XW0ptUbKXWn03NlxN55Ya8x3tKCwWWDSmL2CP8SwV Vo5Qt+rKUkqA/TmHW8HOd7i+44Sa8oD/6WpSSPkwXN2cgRQmvmtaGmpXCKNTn5tk PMgFJOq3uw== =7JDU -----END PGP SIGNATURE----- Merge tag 'for-5.19/block-2022-06-02' of git://git.kernel.dk/linux-block Pull block fixes from Jens Axboe: "Just a collection of fixes that have been queued up since the initial merge window pull request, the majority of which are targeted for stable as well. One bio_set fix that fixes an issue with the dm adoption of cached bio structs that got introduced in this merge window" * tag 'for-5.19/block-2022-06-02' of git://git.kernel.dk/linux-block: block: Fix potential deadlock in blk_ia_range_sysfs_show() block: fix bio_clone_blkg_association() to associate with proper blkcg_gq block: remove useless BUG_ON() in blk_mq_put_tag() blk-mq: do not update io_ticks with passthrough requests block: make bioset_exit() fully resilient against being called twice block: use bio_queue_enter instead of blk_queue_enter in bio_poll block: document BLK_STS_AGAIN usage block: take destination bvec offsets into account in bio_copy_data_iter blk-iolatency: Fix inflight count imbalances and IO hangs on offline blk-mq: don't touch ->tagset in blk_mq_get_sq_hctx
This commit is contained in:
Коммит
34845d92bc
|
@ -722,6 +722,7 @@ static void bio_alloc_cache_destroy(struct bio_set *bs)
|
|||
bio_alloc_cache_prune(cache, -1U);
|
||||
}
|
||||
free_percpu(bs->cache);
|
||||
bs->cache = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1366,10 +1367,12 @@ void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
|
|||
struct bio_vec src_bv = bio_iter_iovec(src, *src_iter);
|
||||
struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter);
|
||||
unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
|
||||
void *src_buf;
|
||||
void *src_buf = bvec_kmap_local(&src_bv);
|
||||
void *dst_buf = bvec_kmap_local(&dst_bv);
|
||||
|
||||
src_buf = bvec_kmap_local(&src_bv);
|
||||
memcpy_to_bvec(&dst_bv, src_buf);
|
||||
memcpy(dst_buf, src_buf, bytes);
|
||||
|
||||
kunmap_local(dst_buf);
|
||||
kunmap_local(src_buf);
|
||||
|
||||
bio_advance_iter_single(src, src_iter, bytes);
|
||||
|
|
|
@ -1974,12 +1974,8 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg);
|
|||
*/
|
||||
void bio_clone_blkg_association(struct bio *dst, struct bio *src)
|
||||
{
|
||||
if (src->bi_blkg) {
|
||||
if (dst->bi_blkg)
|
||||
blkg_put(dst->bi_blkg);
|
||||
blkg_get(src->bi_blkg);
|
||||
dst->bi_blkg = src->bi_blkg;
|
||||
}
|
||||
if (src->bi_blkg)
|
||||
bio_associate_blkg_from_css(dst, bio_blkcg_css(src));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
|
||||
|
||||
|
|
|
@ -939,7 +939,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
|
|||
|
||||
blk_flush_plug(current->plug, false);
|
||||
|
||||
if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
|
||||
if (bio_queue_enter(bio))
|
||||
return 0;
|
||||
if (queue_is_mq(q)) {
|
||||
ret = blk_mq_poll(q, cookie, iob, flags);
|
||||
|
|
|
@ -54,13 +54,8 @@ static ssize_t blk_ia_range_sysfs_show(struct kobject *kobj,
|
|||
container_of(attr, struct blk_ia_range_sysfs_entry, attr);
|
||||
struct blk_independent_access_range *iar =
|
||||
container_of(kobj, struct blk_independent_access_range, kobj);
|
||||
ssize_t ret;
|
||||
|
||||
mutex_lock(&iar->queue->sysfs_lock);
|
||||
ret = entry->show(iar, buf);
|
||||
mutex_unlock(&iar->queue->sysfs_lock);
|
||||
|
||||
return ret;
|
||||
return entry->show(iar, buf);
|
||||
}
|
||||
|
||||
static const struct sysfs_ops blk_ia_range_sysfs_ops = {
|
||||
|
|
|
@ -87,7 +87,17 @@ struct iolatency_grp;
|
|||
struct blk_iolatency {
|
||||
struct rq_qos rqos;
|
||||
struct timer_list timer;
|
||||
atomic_t enabled;
|
||||
|
||||
/*
|
||||
* ->enabled is the master enable switch gating the throttling logic and
|
||||
* inflight tracking. The number of cgroups which have iolat enabled is
|
||||
* tracked in ->enable_cnt, and ->enable is flipped on/off accordingly
|
||||
* from ->enable_work with the request_queue frozen. For details, See
|
||||
* blkiolatency_enable_work_fn().
|
||||
*/
|
||||
bool enabled;
|
||||
atomic_t enable_cnt;
|
||||
struct work_struct enable_work;
|
||||
};
|
||||
|
||||
static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
|
||||
|
@ -95,11 +105,6 @@ static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
|
|||
return container_of(rqos, struct blk_iolatency, rqos);
|
||||
}
|
||||
|
||||
static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat)
|
||||
{
|
||||
return atomic_read(&blkiolat->enabled) > 0;
|
||||
}
|
||||
|
||||
struct child_latency_info {
|
||||
spinlock_t lock;
|
||||
|
||||
|
@ -464,7 +469,7 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
|
|||
struct blkcg_gq *blkg = bio->bi_blkg;
|
||||
bool issue_as_root = bio_issue_as_root_blkg(bio);
|
||||
|
||||
if (!blk_iolatency_enabled(blkiolat))
|
||||
if (!blkiolat->enabled)
|
||||
return;
|
||||
|
||||
while (blkg && blkg->parent) {
|
||||
|
@ -594,7 +599,6 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
|
|||
u64 window_start;
|
||||
u64 now;
|
||||
bool issue_as_root = bio_issue_as_root_blkg(bio);
|
||||
bool enabled = false;
|
||||
int inflight = 0;
|
||||
|
||||
blkg = bio->bi_blkg;
|
||||
|
@ -605,8 +609,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
|
|||
if (!iolat)
|
||||
return;
|
||||
|
||||
enabled = blk_iolatency_enabled(iolat->blkiolat);
|
||||
if (!enabled)
|
||||
if (!iolat->blkiolat->enabled)
|
||||
return;
|
||||
|
||||
now = ktime_to_ns(ktime_get());
|
||||
|
@ -645,6 +648,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
|
|||
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
|
||||
|
||||
del_timer_sync(&blkiolat->timer);
|
||||
flush_work(&blkiolat->enable_work);
|
||||
blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
|
||||
kfree(blkiolat);
|
||||
}
|
||||
|
@ -716,6 +720,44 @@ next:
|
|||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* blkiolatency_enable_work_fn - Enable or disable iolatency on the device
|
||||
* @work: enable_work of the blk_iolatency of interest
|
||||
*
|
||||
* iolatency needs to keep track of the number of in-flight IOs per cgroup. This
|
||||
* is relatively expensive as it involves walking up the hierarchy twice for
|
||||
* every IO. Thus, if iolatency is not enabled in any cgroup for the device, we
|
||||
* want to disable the in-flight tracking.
|
||||
*
|
||||
* We have to make sure that the counting is balanced - we don't want to leak
|
||||
* the in-flight counts by disabling accounting in the completion path while IOs
|
||||
* are in flight. This is achieved by ensuring that no IO is in flight by
|
||||
* freezing the queue while flipping ->enabled. As this requires a sleepable
|
||||
* context, ->enabled flipping is punted to this work function.
|
||||
*/
|
||||
static void blkiolatency_enable_work_fn(struct work_struct *work)
|
||||
{
|
||||
struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency,
|
||||
enable_work);
|
||||
bool enabled;
|
||||
|
||||
/*
|
||||
* There can only be one instance of this function running for @blkiolat
|
||||
* and it's guaranteed to be executed at least once after the latest
|
||||
* ->enabled_cnt modification. Acting on the latest ->enable_cnt is
|
||||
* sufficient.
|
||||
*
|
||||
* Also, we know @blkiolat is safe to access as ->enable_work is flushed
|
||||
* in blkcg_iolatency_exit().
|
||||
*/
|
||||
enabled = atomic_read(&blkiolat->enable_cnt);
|
||||
if (enabled != blkiolat->enabled) {
|
||||
blk_mq_freeze_queue(blkiolat->rqos.q);
|
||||
blkiolat->enabled = enabled;
|
||||
blk_mq_unfreeze_queue(blkiolat->rqos.q);
|
||||
}
|
||||
}
|
||||
|
||||
int blk_iolatency_init(struct request_queue *q)
|
||||
{
|
||||
struct blk_iolatency *blkiolat;
|
||||
|
@ -741,17 +783,15 @@ int blk_iolatency_init(struct request_queue *q)
|
|||
}
|
||||
|
||||
timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
|
||||
INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
|
||||
* return 0.
|
||||
*/
|
||||
static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
|
||||
static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
|
||||
{
|
||||
struct iolatency_grp *iolat = blkg_to_lat(blkg);
|
||||
struct blk_iolatency *blkiolat = iolat->blkiolat;
|
||||
u64 oldval = iolat->min_lat_nsec;
|
||||
|
||||
iolat->min_lat_nsec = val;
|
||||
|
@ -759,13 +799,15 @@ static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
|
|||
iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec,
|
||||
BLKIOLATENCY_MAX_WIN_SIZE);
|
||||
|
||||
if (!oldval && val)
|
||||
return 1;
|
||||
if (!oldval && val) {
|
||||
if (atomic_inc_return(&blkiolat->enable_cnt) == 1)
|
||||
schedule_work(&blkiolat->enable_work);
|
||||
}
|
||||
if (oldval && !val) {
|
||||
blkcg_clear_delay(blkg);
|
||||
return -1;
|
||||
if (atomic_dec_return(&blkiolat->enable_cnt) == 0)
|
||||
schedule_work(&blkiolat->enable_work);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void iolatency_clear_scaling(struct blkcg_gq *blkg)
|
||||
|
@ -797,7 +839,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
|
|||
u64 lat_val = 0;
|
||||
u64 oldval;
|
||||
int ret;
|
||||
int enable = 0;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
|
||||
if (ret)
|
||||
|
@ -832,41 +873,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
|
|||
blkg = ctx.blkg;
|
||||
oldval = iolat->min_lat_nsec;
|
||||
|
||||
enable = iolatency_set_min_lat_nsec(blkg, lat_val);
|
||||
if (enable) {
|
||||
if (!blk_get_queue(blkg->q)) {
|
||||
ret = -ENODEV;
|
||||
goto out;
|
||||
}
|
||||
|
||||
blkg_get(blkg);
|
||||
}
|
||||
|
||||
if (oldval != iolat->min_lat_nsec) {
|
||||
iolatency_set_min_lat_nsec(blkg, lat_val);
|
||||
if (oldval != iolat->min_lat_nsec)
|
||||
iolatency_clear_scaling(blkg);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
blkg_conf_finish(&ctx);
|
||||
if (ret == 0 && enable) {
|
||||
struct iolatency_grp *tmp = blkg_to_lat(blkg);
|
||||
struct blk_iolatency *blkiolat = tmp->blkiolat;
|
||||
|
||||
blk_mq_freeze_queue(blkg->q);
|
||||
|
||||
if (enable == 1)
|
||||
atomic_inc(&blkiolat->enabled);
|
||||
else if (enable == -1)
|
||||
atomic_dec(&blkiolat->enabled);
|
||||
else
|
||||
WARN_ON_ONCE(1);
|
||||
|
||||
blk_mq_unfreeze_queue(blkg->q);
|
||||
|
||||
blkg_put(blkg);
|
||||
blk_put_queue(blkg->q);
|
||||
}
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
|
@ -1005,14 +1017,8 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd)
|
|||
{
|
||||
struct iolatency_grp *iolat = pd_to_lat(pd);
|
||||
struct blkcg_gq *blkg = lat_to_blkg(iolat);
|
||||
struct blk_iolatency *blkiolat = iolat->blkiolat;
|
||||
int ret;
|
||||
|
||||
ret = iolatency_set_min_lat_nsec(blkg, 0);
|
||||
if (ret == 1)
|
||||
atomic_inc(&blkiolat->enabled);
|
||||
if (ret == -1)
|
||||
atomic_dec(&blkiolat->enabled);
|
||||
iolatency_set_min_lat_nsec(blkg, 0);
|
||||
iolatency_clear_scaling(blkg);
|
||||
}
|
||||
|
||||
|
|
|
@ -228,7 +228,6 @@ void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
|
|||
BUG_ON(real_tag >= tags->nr_tags);
|
||||
sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
|
||||
} else {
|
||||
BUG_ON(tag >= tags->nr_reserved_tags);
|
||||
sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -133,7 +133,8 @@ static bool blk_mq_check_inflight(struct request *rq, void *priv,
|
|||
{
|
||||
struct mq_inflight *mi = priv;
|
||||
|
||||
if ((!mi->part->bd_partno || rq->part == mi->part) &&
|
||||
if (rq->part && blk_do_io_stat(rq) &&
|
||||
(!mi->part->bd_partno || rq->part == mi->part) &&
|
||||
blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
|
||||
mi->inflight[rq_data_dir(rq)]++;
|
||||
|
||||
|
@ -2174,8 +2175,7 @@ static bool blk_mq_has_sqsched(struct request_queue *q)
|
|||
*/
|
||||
static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
|
||||
struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
|
||||
/*
|
||||
* If the IO scheduler does not respect hardware queues when
|
||||
* dispatching, we just don't bother with multiple HW queues and
|
||||
|
@ -2183,8 +2183,8 @@ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q)
|
|||
* just causes lock contention inside the scheduler and pointless cache
|
||||
* bouncing.
|
||||
*/
|
||||
hctx = blk_mq_map_queue_type(q, HCTX_TYPE_DEFAULT,
|
||||
raw_smp_processor_id());
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, 0, ctx);
|
||||
|
||||
if (!blk_mq_hctx_stopped(hctx))
|
||||
return hctx;
|
||||
return NULL;
|
||||
|
|
|
@ -105,6 +105,10 @@ typedef u16 blk_short_t;
|
|||
/* hack for device mapper, don't use elsewhere: */
|
||||
#define BLK_STS_DM_REQUEUE ((__force blk_status_t)11)
|
||||
|
||||
/*
|
||||
* BLK_STS_AGAIN should only be returned if RQF_NOWAIT is set
|
||||
* and the bio would block (cf bio_wouldblock_error())
|
||||
*/
|
||||
#define BLK_STS_AGAIN ((__force blk_status_t)12)
|
||||
|
||||
/*
|
||||
|
|
Загрузка…
Ссылка в новой задаче