for-4.18/block-20180603
-----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABCAAGBQJbFIrHAAoJEPfTWPspceCm2+kQAKo7o7HL30aRxJYu+gYafkuW PV47zr3e4vhMDEzDaMsh1+V7I7bm3uS+NZu6cFbcV+N9KXFpeb4V4Hvvm5cs+OC3 WCOBi4eC1h4qnDQ3ZyySrCMN+KHYJ16pZqddEjqw+fhVudx8i+F+jz3Y4ZMDDc3q pArKZvjKh2wEuYXUMFTjaXY46IgPt+er94OwvrhyHk+4AcA+Q/oqSfSdDahUC8jb BVR3FV4I3NOHUaru0RbrUko13sVZSboWPCIFrlTDz8xXcJOnVHzdVS1WLFDXLHnB O8q9cADCfa4K08kz68RxykcJiNxNvz5ChDaG0KloCFO+q1tzYRoXLsfaxyuUDg57 Zd93OFZC6hAzXdhclDFIuPET9OQIjDzwphodfKKmDsm3wtyOtydpA0o7JUEongp0 O1gQsEfYOXmQsXlo8Ot+Z7Ne/HvtGZ91JahUa/59edxQbcKaMrktoyQsQ/d1nOEL 4kXID18wPcFHWRQHYXyVuw6kbpRtQnh/U2m1eenSZ7tVQHwoe6mF3cfSf5MMseak k8nAnmsfEvOL4Ar9ftg61GOrImaQlidxOC2A8fmY5r0Sq/ZldvIFIZizsdTTCcni 8SOTxcQowyqPf5NvMNQ8cKqqCJap3ppj4m7anZNhbypDIF2TmOWsEcXcMDn4y9on fax14DPLo59gBRiPCn5f =nga/ -----END PGP SIGNATURE----- Merge tag 'for-4.18/block-20180603' of git://git.kernel.dk/linux-block Pull block updates from Jens Axboe: - clean up how we pass around gfp_t and blk_mq_req_flags_t (Christoph) - prepare us to defer scheduler attach (Christoph) - clean up drivers handling of bounce buffers (Christoph) - fix timeout handling corner cases (Christoph/Bart/Keith) - bcache fixes (Coly) - prep work for bcachefs and some block layer optimizations (Kent). - convert users of bio_sets to using embedded structs (Kent). - fixes for the BFQ io scheduler (Paolo/Davide/Filippo) - lightnvm fixes and improvements (Matias, with contributions from Hans and Javier) - adding discard throttling to blk-wbt (me) - sbitmap blk-mq-tag handling (me/Omar/Ming). - remove the sparc jsflash block driver, acked by DaveM. - Kyber scheduler improvement from Jianchao, making it more friendly wrt merging. - conversion of symbolic proc permissions to octal, from Joe Perches. Previously the block parts were a mix of both. - nbd fixes (Josef and Kevin Vigor) - unify how we handle the various kinds of timestamps that the block core and utility code uses (Omar) - three NVMe pull requests from Keith and Christoph, bringing AEN to feature completeness, file backed namespaces, cq/sq lock split, and various fixes - various little fixes and improvements all over the map * tag 'for-4.18/block-20180603' of git://git.kernel.dk/linux-block: (196 commits) blk-mq: update nr_requests when switching to 'none' scheduler block: don't use blocking queue entered for recursive bio submits dm-crypt: fix warning in shutdown path lightnvm: pblk: take bitmap alloc. out of critical section lightnvm: pblk: kick writer on new flush points lightnvm: pblk: only try to recover lines with written smeta lightnvm: pblk: remove unnecessary bio_get/put lightnvm: pblk: add possibility to set write buffer size manually lightnvm: fix partial read error path lightnvm: proper error handling for pblk_bio_add_pages lightnvm: pblk: fix smeta write error path lightnvm: pblk: garbage collect lines with failed writes lightnvm: pblk: rework write error recovery path lightnvm: pblk: remove dead function lightnvm: pass flag on graceful teardown to targets lightnvm: pblk: check for chunk size before allocating it lightnvm: pblk: remove unnecessary argument lightnvm: pblk: remove unnecessary indirection lightnvm: pblk: return NVM_ error on failed submission lightnvm: pblk: warn in case of corrupted write buffer ...
This commit is contained in:
Коммит
f459c34538
|
@ -71,13 +71,16 @@ use_per_node_hctx=[0/1]: Default: 0
|
|||
1: The multi-queue block layer is instantiated with a hardware dispatch
|
||||
queue for each CPU node in the system.
|
||||
|
||||
use_lightnvm=[0/1]: Default: 0
|
||||
Register device with LightNVM. Requires blk-mq and CONFIG_NVM to be enabled.
|
||||
|
||||
no_sched=[0/1]: Default: 0
|
||||
0: nullb* use default blk-mq io scheduler.
|
||||
1: nullb* doesn't use io scheduler.
|
||||
|
||||
blocking=[0/1]: Default: 0
|
||||
0: Register as a non-blocking blk-mq driver device.
|
||||
1: Register as a blocking blk-mq driver device, null_blk will set
|
||||
the BLK_MQ_F_BLOCKING flag, indicating that it sometimes/always
|
||||
needs to block in its ->queue_rq() function.
|
||||
|
||||
shared_tags=[0/1]: Default: 0
|
||||
0: Tag set is not shared.
|
||||
1: Tag set shared between devices for blk-mq. Only makes sense with
|
||||
|
|
|
@ -82,24 +82,13 @@ function
|
|||
1. invokes optional hostt->eh_timed_out() callback. Return value can
|
||||
be one of
|
||||
|
||||
- BLK_EH_HANDLED
|
||||
This indicates that eh_timed_out() dealt with the timeout.
|
||||
The command is passed back to the block layer and completed
|
||||
via __blk_complete_requests().
|
||||
|
||||
*NOTE* After returning BLK_EH_HANDLED the SCSI layer is
|
||||
assumed to be finished with the command, and no other
|
||||
functions from the SCSI layer will be called. So this
|
||||
should typically only be returned if the eh_timed_out()
|
||||
handler raced with normal completion.
|
||||
|
||||
- BLK_EH_RESET_TIMER
|
||||
This indicates that more time is required to finish the
|
||||
command. Timer is restarted. This action is counted as a
|
||||
retry and only allowed scmd->allowed + 1(!) times. Once the
|
||||
limit is reached, action for BLK_EH_NOT_HANDLED is taken instead.
|
||||
limit is reached, action for BLK_EH_DONE is taken instead.
|
||||
|
||||
- BLK_EH_NOT_HANDLED
|
||||
- BLK_EH_DONE
|
||||
eh_timed_out() callback did not handle the command.
|
||||
Step #2 is taken.
|
||||
|
||||
|
|
|
@ -9700,7 +9700,7 @@ S: Maintained
|
|||
F: drivers/net/ethernet/netronome/
|
||||
|
||||
NETWORK BLOCK DEVICE (NBD)
|
||||
M: Josef Bacik <jbacik@fb.com>
|
||||
M: Josef Bacik <josef@toxicpanda.com>
|
||||
S: Maintained
|
||||
L: linux-block@vger.kernel.org
|
||||
L: nbd@other.debian.org
|
||||
|
|
|
@ -1,40 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
* jsflash.h: OS Flash SIMM support for JavaStations.
|
||||
*
|
||||
* Copyright (C) 1999 Pete Zaitcev
|
||||
*/
|
||||
|
||||
#ifndef _SPARC_JSFLASH_H
|
||||
#define _SPARC_JSFLASH_H
|
||||
|
||||
#ifndef _SPARC_TYPES_H
|
||||
#include <linux/types.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Semantics of the offset is a full address.
|
||||
* Hardcode it or get it from probe ioctl.
|
||||
*
|
||||
* We use full bus address, so that we would be
|
||||
* automatically compatible with possible future systems.
|
||||
*/
|
||||
|
||||
#define JSFLASH_IDENT (('F'<<8)|54)
|
||||
struct jsflash_ident_arg {
|
||||
__u64 off; /* 0x20000000 is included */
|
||||
__u32 size;
|
||||
char name[32]; /* With trailing zero */
|
||||
};
|
||||
|
||||
#define JSFLASH_ERASE (('F'<<8)|55)
|
||||
/* Put 0 as argument, may be flags or sector number... */
|
||||
|
||||
#define JSFLASH_PROGRAM (('F'<<8)|56)
|
||||
struct jsflash_program_arg {
|
||||
__u64 data; /* char* for sparc and sparc64 */
|
||||
__u64 off;
|
||||
__u32 size;
|
||||
};
|
||||
|
||||
#endif /* _SPARC_JSFLASH_H */
|
|
@ -55,13 +55,13 @@ BFQG_FLAG_FNS(empty)
|
|||
/* This should be called with the scheduler lock held. */
|
||||
static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
|
||||
{
|
||||
unsigned long long now;
|
||||
u64 now;
|
||||
|
||||
if (!bfqg_stats_waiting(stats))
|
||||
return;
|
||||
|
||||
now = sched_clock();
|
||||
if (time_after64(now, stats->start_group_wait_time))
|
||||
now = ktime_get_ns();
|
||||
if (now > stats->start_group_wait_time)
|
||||
blkg_stat_add(&stats->group_wait_time,
|
||||
now - stats->start_group_wait_time);
|
||||
bfqg_stats_clear_waiting(stats);
|
||||
|
@ -77,20 +77,20 @@ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
|
|||
return;
|
||||
if (bfqg == curr_bfqg)
|
||||
return;
|
||||
stats->start_group_wait_time = sched_clock();
|
||||
stats->start_group_wait_time = ktime_get_ns();
|
||||
bfqg_stats_mark_waiting(stats);
|
||||
}
|
||||
|
||||
/* This should be called with the scheduler lock held. */
|
||||
static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
|
||||
{
|
||||
unsigned long long now;
|
||||
u64 now;
|
||||
|
||||
if (!bfqg_stats_empty(stats))
|
||||
return;
|
||||
|
||||
now = sched_clock();
|
||||
if (time_after64(now, stats->start_empty_time))
|
||||
now = ktime_get_ns();
|
||||
if (now > stats->start_empty_time)
|
||||
blkg_stat_add(&stats->empty_time,
|
||||
now - stats->start_empty_time);
|
||||
bfqg_stats_clear_empty(stats);
|
||||
|
@ -116,7 +116,7 @@ void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
|
|||
if (bfqg_stats_empty(stats))
|
||||
return;
|
||||
|
||||
stats->start_empty_time = sched_clock();
|
||||
stats->start_empty_time = ktime_get_ns();
|
||||
bfqg_stats_mark_empty(stats);
|
||||
}
|
||||
|
||||
|
@ -125,9 +125,9 @@ void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
|
|||
struct bfqg_stats *stats = &bfqg->stats;
|
||||
|
||||
if (bfqg_stats_idling(stats)) {
|
||||
unsigned long long now = sched_clock();
|
||||
u64 now = ktime_get_ns();
|
||||
|
||||
if (time_after64(now, stats->start_idle_time))
|
||||
if (now > stats->start_idle_time)
|
||||
blkg_stat_add(&stats->idle_time,
|
||||
now - stats->start_idle_time);
|
||||
bfqg_stats_clear_idling(stats);
|
||||
|
@ -138,7 +138,7 @@ void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
|
|||
{
|
||||
struct bfqg_stats *stats = &bfqg->stats;
|
||||
|
||||
stats->start_idle_time = sched_clock();
|
||||
stats->start_idle_time = ktime_get_ns();
|
||||
bfqg_stats_mark_idling(stats);
|
||||
}
|
||||
|
||||
|
@ -171,18 +171,18 @@ void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
|
|||
blkg_rwstat_add(&bfqg->stats.merged, op, 1);
|
||||
}
|
||||
|
||||
void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
|
||||
uint64_t io_start_time, unsigned int op)
|
||||
void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
|
||||
u64 io_start_time_ns, unsigned int op)
|
||||
{
|
||||
struct bfqg_stats *stats = &bfqg->stats;
|
||||
unsigned long long now = sched_clock();
|
||||
u64 now = ktime_get_ns();
|
||||
|
||||
if (time_after64(now, io_start_time))
|
||||
if (now > io_start_time_ns)
|
||||
blkg_rwstat_add(&stats->service_time, op,
|
||||
now - io_start_time);
|
||||
if (time_after64(io_start_time, start_time))
|
||||
now - io_start_time_ns);
|
||||
if (io_start_time_ns > start_time_ns)
|
||||
blkg_rwstat_add(&stats->wait_time, op,
|
||||
io_start_time - start_time);
|
||||
io_start_time_ns - start_time_ns);
|
||||
}
|
||||
|
||||
#else /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
|
||||
|
@ -191,8 +191,8 @@ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
|
|||
unsigned int op) { }
|
||||
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
|
||||
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
|
||||
void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
|
||||
uint64_t io_start_time, unsigned int op) { }
|
||||
void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
|
||||
u64 io_start_time_ns, unsigned int op) { }
|
||||
void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
|
||||
void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
|
||||
void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
|
||||
|
|
|
@ -49,9 +49,39 @@
|
|||
*
|
||||
* In particular, to provide these low-latency guarantees, BFQ
|
||||
* explicitly privileges the I/O of two classes of time-sensitive
|
||||
* applications: interactive and soft real-time. This feature enables
|
||||
* BFQ to provide applications in these classes with a very low
|
||||
* latency. Finally, BFQ also features additional heuristics for
|
||||
* applications: interactive and soft real-time. In more detail, BFQ
|
||||
* behaves this way if the low_latency parameter is set (default
|
||||
* configuration). This feature enables BFQ to provide applications in
|
||||
* these classes with a very low latency.
|
||||
*
|
||||
* To implement this feature, BFQ constantly tries to detect whether
|
||||
* the I/O requests in a bfq_queue come from an interactive or a soft
|
||||
* real-time application. For brevity, in these cases, the queue is
|
||||
* said to be interactive or soft real-time. In both cases, BFQ
|
||||
* privileges the service of the queue, over that of non-interactive
|
||||
* and non-soft-real-time queues. This privileging is performed,
|
||||
* mainly, by raising the weight of the queue. So, for brevity, we
|
||||
* call just weight-raising periods the time periods during which a
|
||||
* queue is privileged, because deemed interactive or soft real-time.
|
||||
*
|
||||
* The detection of soft real-time queues/applications is described in
|
||||
* detail in the comments on the function
|
||||
* bfq_bfqq_softrt_next_start. On the other hand, the detection of an
|
||||
* interactive queue works as follows: a queue is deemed interactive
|
||||
* if it is constantly non empty only for a limited time interval,
|
||||
* after which it does become empty. The queue may be deemed
|
||||
* interactive again (for a limited time), if it restarts being
|
||||
* constantly non empty, provided that this happens only after the
|
||||
* queue has remained empty for a given minimum idle time.
|
||||
*
|
||||
* By default, BFQ computes automatically the above maximum time
|
||||
* interval, i.e., the time interval after which a constantly
|
||||
* non-empty queue stops being deemed interactive. Since a queue is
|
||||
* weight-raised while it is deemed interactive, this maximum time
|
||||
* interval happens to coincide with the (maximum) duration of the
|
||||
* weight-raising for interactive queues.
|
||||
*
|
||||
* Finally, BFQ also features additional heuristics for
|
||||
* preserving both a low latency and a high throughput on NCQ-capable,
|
||||
* rotational or flash-based devices, and to get the job done quickly
|
||||
* for applications consisting in many I/O-bound processes.
|
||||
|
@ -61,14 +91,14 @@
|
|||
* all low-latency heuristics for that device, by setting low_latency
|
||||
* to 0.
|
||||
*
|
||||
* BFQ is described in [1], where also a reference to the initial, more
|
||||
* theoretical paper on BFQ can be found. The interested reader can find
|
||||
* in the latter paper full details on the main algorithm, as well as
|
||||
* formulas of the guarantees and formal proofs of all the properties.
|
||||
* With respect to the version of BFQ presented in these papers, this
|
||||
* implementation adds a few more heuristics, such as the one that
|
||||
* guarantees a low latency to soft real-time applications, and a
|
||||
* hierarchical extension based on H-WF2Q+.
|
||||
* BFQ is described in [1], where also a reference to the initial,
|
||||
* more theoretical paper on BFQ can be found. The interested reader
|
||||
* can find in the latter paper full details on the main algorithm, as
|
||||
* well as formulas of the guarantees and formal proofs of all the
|
||||
* properties. With respect to the version of BFQ presented in these
|
||||
* papers, this implementation adds a few more heuristics, such as the
|
||||
* ones that guarantee a low latency to interactive and soft real-time
|
||||
* applications, and a hierarchical extension based on H-WF2Q+.
|
||||
*
|
||||
* B-WF2Q+ is based on WF2Q+, which is described in [2], together with
|
||||
* H-WF2Q+, while the augmented tree used here to implement B-WF2Q+
|
||||
|
@ -218,56 +248,46 @@ static struct kmem_cache *bfq_pool;
|
|||
#define BFQ_RATE_SHIFT 16
|
||||
|
||||
/*
|
||||
* By default, BFQ computes the duration of the weight raising for
|
||||
* interactive applications automatically, using the following formula:
|
||||
* duration = (R / r) * T, where r is the peak rate of the device, and
|
||||
* R and T are two reference parameters.
|
||||
* In particular, R is the peak rate of the reference device (see
|
||||
* below), and T is a reference time: given the systems that are
|
||||
* likely to be installed on the reference device according to its
|
||||
* speed class, T is about the maximum time needed, under BFQ and
|
||||
* while reading two files in parallel, to load typical large
|
||||
* applications on these systems (see the comments on
|
||||
* max_service_from_wr below, for more details on how T is obtained).
|
||||
* In practice, the slower/faster the device at hand is, the more/less
|
||||
* it takes to load applications with respect to the reference device.
|
||||
* Accordingly, the longer/shorter BFQ grants weight raising to
|
||||
* interactive applications.
|
||||
* When configured for computing the duration of the weight-raising
|
||||
* for interactive queues automatically (see the comments at the
|
||||
* beginning of this file), BFQ does it using the following formula:
|
||||
* duration = (ref_rate / r) * ref_wr_duration,
|
||||
* where r is the peak rate of the device, and ref_rate and
|
||||
* ref_wr_duration are two reference parameters. In particular,
|
||||
* ref_rate is the peak rate of the reference storage device (see
|
||||
* below), and ref_wr_duration is about the maximum time needed, with
|
||||
* BFQ and while reading two files in parallel, to load typical large
|
||||
* applications on the reference device (see the comments on
|
||||
* max_service_from_wr below, for more details on how ref_wr_duration
|
||||
* is obtained). In practice, the slower/faster the device at hand
|
||||
* is, the more/less it takes to load applications with respect to the
|
||||
* reference device. Accordingly, the longer/shorter BFQ grants
|
||||
* weight raising to interactive applications.
|
||||
*
|
||||
* BFQ uses four different reference pairs (R, T), depending on:
|
||||
* . whether the device is rotational or non-rotational;
|
||||
* . whether the device is slow, such as old or portable HDDs, as well as
|
||||
* SD cards, or fast, such as newer HDDs and SSDs.
|
||||
* BFQ uses two different reference pairs (ref_rate, ref_wr_duration),
|
||||
* depending on whether the device is rotational or non-rotational.
|
||||
*
|
||||
* The device's speed class is dynamically (re)detected in
|
||||
* bfq_update_peak_rate() every time the estimated peak rate is updated.
|
||||
* In the following definitions, ref_rate[0] and ref_wr_duration[0]
|
||||
* are the reference values for a rotational device, whereas
|
||||
* ref_rate[1] and ref_wr_duration[1] are the reference values for a
|
||||
* non-rotational device. The reference rates are not the actual peak
|
||||
* rates of the devices used as a reference, but slightly lower
|
||||
* values. The reason for using slightly lower values is that the
|
||||
* peak-rate estimator tends to yield slightly lower values than the
|
||||
* actual peak rate (it can yield the actual peak rate only if there
|
||||
* is only one process doing I/O, and the process does sequential
|
||||
* I/O).
|
||||
*
|
||||
* In the following definitions, R_slow[0]/R_fast[0] and
|
||||
* T_slow[0]/T_fast[0] are the reference values for a slow/fast
|
||||
* rotational device, whereas R_slow[1]/R_fast[1] and
|
||||
* T_slow[1]/T_fast[1] are the reference values for a slow/fast
|
||||
* non-rotational device. Finally, device_speed_thresh are the
|
||||
* thresholds used to switch between speed classes. The reference
|
||||
* rates are not the actual peak rates of the devices used as a
|
||||
* reference, but slightly lower values. The reason for using these
|
||||
* slightly lower values is that the peak-rate estimator tends to
|
||||
* yield slightly lower values than the actual peak rate (it can yield
|
||||
* the actual peak rate only if there is only one process doing I/O,
|
||||
* and the process does sequential I/O).
|
||||
*
|
||||
* Both the reference peak rates and the thresholds are measured in
|
||||
* sectors/usec, left-shifted by BFQ_RATE_SHIFT.
|
||||
* The reference peak rates are measured in sectors/usec, left-shifted
|
||||
* by BFQ_RATE_SHIFT.
|
||||
*/
|
||||
static int R_slow[2] = {1000, 10700};
|
||||
static int R_fast[2] = {14000, 33000};
|
||||
static int ref_rate[2] = {14000, 33000};
|
||||
/*
|
||||
* To improve readability, a conversion function is used to initialize the
|
||||
* following arrays, which entails that they can be initialized only in a
|
||||
* function.
|
||||
* To improve readability, a conversion function is used to initialize
|
||||
* the following array, which entails that the array can be
|
||||
* initialized only in a function.
|
||||
*/
|
||||
static int T_slow[2];
|
||||
static int T_fast[2];
|
||||
static int device_speed_thresh[2];
|
||||
static int ref_wr_duration[2];
|
||||
|
||||
/*
|
||||
* BFQ uses the above-detailed, time-based weight-raising mechanism to
|
||||
|
@ -486,46 +506,6 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* See the comments on bfq_limit_depth for the purpose of
|
||||
* the depths set in the function.
|
||||
*/
|
||||
static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
|
||||
{
|
||||
bfqd->sb_shift = bt->sb.shift;
|
||||
|
||||
/*
|
||||
* In-word depths if no bfq_queue is being weight-raised:
|
||||
* leaving 25% of tags only for sync reads.
|
||||
*
|
||||
* In next formulas, right-shift the value
|
||||
* (1U<<bfqd->sb_shift), instead of computing directly
|
||||
* (1U<<(bfqd->sb_shift - something)), to be robust against
|
||||
* any possible value of bfqd->sb_shift, without having to
|
||||
* limit 'something'.
|
||||
*/
|
||||
/* no more than 50% of tags for async I/O */
|
||||
bfqd->word_depths[0][0] = max((1U<<bfqd->sb_shift)>>1, 1U);
|
||||
/*
|
||||
* no more than 75% of tags for sync writes (25% extra tags
|
||||
* w.r.t. async I/O, to prevent async I/O from starving sync
|
||||
* writes)
|
||||
*/
|
||||
bfqd->word_depths[0][1] = max(((1U<<bfqd->sb_shift) * 3)>>2, 1U);
|
||||
|
||||
/*
|
||||
* In-word depths in case some bfq_queue is being weight-
|
||||
* raised: leaving ~63% of tags for sync reads. This is the
|
||||
* highest percentage for which, in our tests, application
|
||||
* start-up times didn't suffer from any regression due to tag
|
||||
* shortage.
|
||||
*/
|
||||
/* no more than ~18% of tags for async I/O */
|
||||
bfqd->word_depths[1][0] = max(((1U<<bfqd->sb_shift) * 3)>>4, 1U);
|
||||
/* no more than ~37% of tags for sync writes (~20% extra tags) */
|
||||
bfqd->word_depths[1][1] = max(((1U<<bfqd->sb_shift) * 6)>>4, 1U);
|
||||
}
|
||||
|
||||
/*
|
||||
* Async I/O can easily starve sync I/O (both sync reads and sync
|
||||
* writes), by consuming all tags. Similarly, storms of sync writes,
|
||||
|
@ -535,25 +515,11 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
|
|||
*/
|
||||
static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
|
||||
{
|
||||
struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
|
||||
struct bfq_data *bfqd = data->q->elevator->elevator_data;
|
||||
struct sbitmap_queue *bt;
|
||||
|
||||
if (op_is_sync(op) && !op_is_write(op))
|
||||
return;
|
||||
|
||||
if (data->flags & BLK_MQ_REQ_RESERVED) {
|
||||
if (unlikely(!tags->nr_reserved_tags)) {
|
||||
WARN_ON_ONCE(1);
|
||||
return;
|
||||
}
|
||||
bt = &tags->breserved_tags;
|
||||
} else
|
||||
bt = &tags->bitmap_tags;
|
||||
|
||||
if (unlikely(bfqd->sb_shift != bt->sb.shift))
|
||||
bfq_update_depths(bfqd, bt);
|
||||
|
||||
data->shallow_depth =
|
||||
bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)];
|
||||
|
||||
|
@ -906,26 +872,30 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
|
|||
if (bfqd->bfq_wr_max_time > 0)
|
||||
return bfqd->bfq_wr_max_time;
|
||||
|
||||
dur = bfqd->RT_prod;
|
||||
dur = bfqd->rate_dur_prod;
|
||||
do_div(dur, bfqd->peak_rate);
|
||||
|
||||
/*
|
||||
* Limit duration between 3 and 13 seconds. Tests show that
|
||||
* higher values than 13 seconds often yield the opposite of
|
||||
* the desired result, i.e., worsen responsiveness by letting
|
||||
* non-interactive and non-soft-real-time applications
|
||||
* preserve weight raising for a too long time interval.
|
||||
* Limit duration between 3 and 25 seconds. The upper limit
|
||||
* has been conservatively set after the following worst case:
|
||||
* on a QEMU/KVM virtual machine
|
||||
* - running in a slow PC
|
||||
* - with a virtual disk stacked on a slow low-end 5400rpm HDD
|
||||
* - serving a heavy I/O workload, such as the sequential reading
|
||||
* of several files
|
||||
* mplayer took 23 seconds to start, if constantly weight-raised.
|
||||
*
|
||||
* As for higher values than that accomodating the above bad
|
||||
* scenario, tests show that higher values would often yield
|
||||
* the opposite of the desired result, i.e., would worsen
|
||||
* responsiveness by allowing non-interactive applications to
|
||||
* preserve weight raising for too long.
|
||||
*
|
||||
* On the other end, lower values than 3 seconds make it
|
||||
* difficult for most interactive tasks to complete their jobs
|
||||
* before weight-raising finishes.
|
||||
*/
|
||||
if (dur > msecs_to_jiffies(13000))
|
||||
dur = msecs_to_jiffies(13000);
|
||||
else if (dur < msecs_to_jiffies(3000))
|
||||
dur = msecs_to_jiffies(3000);
|
||||
|
||||
return dur;
|
||||
return clamp_val(dur, msecs_to_jiffies(3000), msecs_to_jiffies(25000));
|
||||
}
|
||||
|
||||
/* switch back from soft real-time to interactive weight raising */
|
||||
|
@ -1392,15 +1362,6 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
|
|||
return wr_or_deserves_wr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the farthest future time instant according to jiffies
|
||||
* macros.
|
||||
*/
|
||||
static unsigned long bfq_greatest_from_now(void)
|
||||
{
|
||||
return jiffies + MAX_JIFFY_OFFSET;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the farthest past time instant according to jiffies
|
||||
* macros.
|
||||
|
@ -1545,7 +1506,8 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
|
|||
in_burst = bfq_bfqq_in_large_burst(bfqq);
|
||||
soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
|
||||
!in_burst &&
|
||||
time_is_before_jiffies(bfqq->soft_rt_next_start);
|
||||
time_is_before_jiffies(bfqq->soft_rt_next_start) &&
|
||||
bfqq->dispatched == 0;
|
||||
*interactive = !in_burst && idle_for_long_time;
|
||||
wr_or_deserves_wr = bfqd->low_latency &&
|
||||
(bfqq->wr_coeff > 1 ||
|
||||
|
@ -1858,6 +1820,8 @@ static int bfq_request_merge(struct request_queue *q, struct request **req,
|
|||
return ELEVATOR_NO_MERGE;
|
||||
}
|
||||
|
||||
static struct bfq_queue *bfq_init_rq(struct request *rq);
|
||||
|
||||
static void bfq_request_merged(struct request_queue *q, struct request *req,
|
||||
enum elv_merge type)
|
||||
{
|
||||
|
@ -1866,7 +1830,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
|
|||
blk_rq_pos(req) <
|
||||
blk_rq_pos(container_of(rb_prev(&req->rb_node),
|
||||
struct request, rb_node))) {
|
||||
struct bfq_queue *bfqq = RQ_BFQQ(req);
|
||||
struct bfq_queue *bfqq = bfq_init_rq(req);
|
||||
struct bfq_data *bfqd = bfqq->bfqd;
|
||||
struct request *prev, *next_rq;
|
||||
|
||||
|
@ -1891,14 +1855,25 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called to notify the scheduler that the requests
|
||||
* rq and 'next' have been merged, with 'next' going away. BFQ
|
||||
* exploits this hook to address the following issue: if 'next' has a
|
||||
* fifo_time lower that rq, then the fifo_time of rq must be set to
|
||||
* the value of 'next', to not forget the greater age of 'next'.
|
||||
*
|
||||
* NOTE: in this function we assume that rq is in a bfq_queue, basing
|
||||
* on that rq is picked from the hash table q->elevator->hash, which,
|
||||
* in its turn, is filled only with I/O requests present in
|
||||
* bfq_queues, while BFQ is in use for the request queue q. In fact,
|
||||
* the function that fills this hash table (elv_rqhash_add) is called
|
||||
* only by bfq_insert_request.
|
||||
*/
|
||||
static void bfq_requests_merged(struct request_queue *q, struct request *rq,
|
||||
struct request *next)
|
||||
{
|
||||
struct bfq_queue *bfqq = RQ_BFQQ(rq), *next_bfqq = RQ_BFQQ(next);
|
||||
|
||||
if (!RB_EMPTY_NODE(&rq->rb_node))
|
||||
goto end;
|
||||
spin_lock_irq(&bfqq->bfqd->lock);
|
||||
struct bfq_queue *bfqq = bfq_init_rq(rq),
|
||||
*next_bfqq = bfq_init_rq(next);
|
||||
|
||||
/*
|
||||
* If next and rq belong to the same bfq_queue and next is older
|
||||
|
@ -1920,11 +1895,6 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
|
|||
if (bfqq->next_rq == next)
|
||||
bfqq->next_rq = rq;
|
||||
|
||||
bfq_remove_request(q, next);
|
||||
bfqg_stats_update_io_remove(bfqq_group(bfqq), next->cmd_flags);
|
||||
|
||||
spin_unlock_irq(&bfqq->bfqd->lock);
|
||||
end:
|
||||
bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
|
||||
}
|
||||
|
||||
|
@ -2506,37 +2476,15 @@ static unsigned long bfq_calc_max_budget(struct bfq_data *bfqd)
|
|||
/*
|
||||
* Update parameters related to throughput and responsiveness, as a
|
||||
* function of the estimated peak rate. See comments on
|
||||
* bfq_calc_max_budget(), and on T_slow and T_fast arrays.
|
||||
* bfq_calc_max_budget(), and on the ref_wr_duration array.
|
||||
*/
|
||||
static void update_thr_responsiveness_params(struct bfq_data *bfqd)
|
||||
{
|
||||
int dev_type = blk_queue_nonrot(bfqd->queue);
|
||||
|
||||
if (bfqd->bfq_user_max_budget == 0)
|
||||
if (bfqd->bfq_user_max_budget == 0) {
|
||||
bfqd->bfq_max_budget =
|
||||
bfq_calc_max_budget(bfqd);
|
||||
|
||||
if (bfqd->device_speed == BFQ_BFQD_FAST &&
|
||||
bfqd->peak_rate < device_speed_thresh[dev_type]) {
|
||||
bfqd->device_speed = BFQ_BFQD_SLOW;
|
||||
bfqd->RT_prod = R_slow[dev_type] *
|
||||
T_slow[dev_type];
|
||||
} else if (bfqd->device_speed == BFQ_BFQD_SLOW &&
|
||||
bfqd->peak_rate > device_speed_thresh[dev_type]) {
|
||||
bfqd->device_speed = BFQ_BFQD_FAST;
|
||||
bfqd->RT_prod = R_fast[dev_type] *
|
||||
T_fast[dev_type];
|
||||
bfq_log(bfqd, "new max_budget = %d", bfqd->bfq_max_budget);
|
||||
}
|
||||
|
||||
bfq_log(bfqd,
|
||||
"dev_type %s dev_speed_class = %s (%llu sects/sec), thresh %llu setcs/sec",
|
||||
dev_type == 0 ? "ROT" : "NONROT",
|
||||
bfqd->device_speed == BFQ_BFQD_FAST ? "FAST" : "SLOW",
|
||||
bfqd->device_speed == BFQ_BFQD_FAST ?
|
||||
(USEC_PER_SEC*(u64)R_fast[dev_type])>>BFQ_RATE_SHIFT :
|
||||
(USEC_PER_SEC*(u64)R_slow[dev_type])>>BFQ_RATE_SHIFT,
|
||||
(USEC_PER_SEC*(u64)device_speed_thresh[dev_type])>>
|
||||
BFQ_RATE_SHIFT);
|
||||
}
|
||||
|
||||
static void bfq_reset_rate_computation(struct bfq_data *bfqd,
|
||||
|
@ -3265,23 +3213,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
|
|||
bfqq->soft_rt_next_start =
|
||||
bfq_bfqq_softrt_next_start(bfqd, bfqq);
|
||||
else {
|
||||
/*
|
||||
* The application is still waiting for the
|
||||
* completion of one or more requests:
|
||||
* prevent it from possibly being incorrectly
|
||||
* deemed as soft real-time by setting its
|
||||
* soft_rt_next_start to infinity. In fact,
|
||||
* without this assignment, the application
|
||||
* would be incorrectly deemed as soft
|
||||
* real-time if:
|
||||
* 1) it issued a new request before the
|
||||
* completion of all its in-flight
|
||||
* requests, and
|
||||
* 2) at that time, its soft_rt_next_start
|
||||
* happened to be in the past.
|
||||
*/
|
||||
bfqq->soft_rt_next_start =
|
||||
bfq_greatest_from_now();
|
||||
/*
|
||||
* Schedule an update of soft_rt_next_start to when
|
||||
* the task may be discovered to be isochronous.
|
||||
|
@ -4540,14 +4471,12 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
|
|||
unsigned int cmd_flags) {}
|
||||
#endif
|
||||
|
||||
static void bfq_prepare_request(struct request *rq, struct bio *bio);
|
||||
|
||||
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
bool at_head)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct bfq_data *bfqd = q->elevator->elevator_data;
|
||||
struct bfq_queue *bfqq = RQ_BFQQ(rq);
|
||||
struct bfq_queue *bfqq;
|
||||
bool idle_timer_disabled = false;
|
||||
unsigned int cmd_flags;
|
||||
|
||||
|
@ -4562,24 +4491,13 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
|||
blk_mq_sched_request_inserted(rq);
|
||||
|
||||
spin_lock_irq(&bfqd->lock);
|
||||
bfqq = bfq_init_rq(rq);
|
||||
if (at_head || blk_rq_is_passthrough(rq)) {
|
||||
if (at_head)
|
||||
list_add(&rq->queuelist, &bfqd->dispatch);
|
||||
else
|
||||
list_add_tail(&rq->queuelist, &bfqd->dispatch);
|
||||
} else {
|
||||
if (WARN_ON_ONCE(!bfqq)) {
|
||||
/*
|
||||
* This should never happen. Most likely rq is
|
||||
* a requeued regular request, being
|
||||
* re-inserted without being first
|
||||
* re-prepared. Do a prepare, to avoid
|
||||
* failure.
|
||||
*/
|
||||
bfq_prepare_request(rq, rq->bio);
|
||||
bfqq = RQ_BFQQ(rq);
|
||||
}
|
||||
|
||||
} else { /* bfqq is assumed to be non null here */
|
||||
idle_timer_disabled = __bfq_insert_request(bfqd, rq);
|
||||
/*
|
||||
* Update bfqq, because, if a queue merge has occurred
|
||||
|
@ -4778,8 +4696,8 @@ static void bfq_finish_requeue_request(struct request *rq)
|
|||
|
||||
if (rq->rq_flags & RQF_STARTED)
|
||||
bfqg_stats_update_completion(bfqq_group(bfqq),
|
||||
rq_start_time_ns(rq),
|
||||
rq_io_start_time_ns(rq),
|
||||
rq->start_time_ns,
|
||||
rq->io_start_time_ns,
|
||||
rq->cmd_flags);
|
||||
|
||||
if (likely(rq->rq_flags & RQF_STARTED)) {
|
||||
|
@ -4922,11 +4840,48 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
|
|||
}
|
||||
|
||||
/*
|
||||
* Allocate bfq data structures associated with this request.
|
||||
* Only reset private fields. The actual request preparation will be
|
||||
* performed by bfq_init_rq, when rq is either inserted or merged. See
|
||||
* comments on bfq_init_rq for the reason behind this delayed
|
||||
* preparation.
|
||||
*/
|
||||
static void bfq_prepare_request(struct request *rq, struct bio *bio)
|
||||
{
|
||||
/*
|
||||
* Regardless of whether we have an icq attached, we have to
|
||||
* clear the scheduler pointers, as they might point to
|
||||
* previously allocated bic/bfqq structs.
|
||||
*/
|
||||
rq->elv.priv[0] = rq->elv.priv[1] = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* If needed, init rq, allocate bfq data structures associated with
|
||||
* rq, and increment reference counters in the destination bfq_queue
|
||||
* for rq. Return the destination bfq_queue for rq, or NULL is rq is
|
||||
* not associated with any bfq_queue.
|
||||
*
|
||||
* This function is invoked by the functions that perform rq insertion
|
||||
* or merging. One may have expected the above preparation operations
|
||||
* to be performed in bfq_prepare_request, and not delayed to when rq
|
||||
* is inserted or merged. The rationale behind this delayed
|
||||
* preparation is that, after the prepare_request hook is invoked for
|
||||
* rq, rq may still be transformed into a request with no icq, i.e., a
|
||||
* request not associated with any queue. No bfq hook is invoked to
|
||||
* signal this tranformation. As a consequence, should these
|
||||
* preparation operations be performed when the prepare_request hook
|
||||
* is invoked, and should rq be transformed one moment later, bfq
|
||||
* would end up in an inconsistent state, because it would have
|
||||
* incremented some queue counters for an rq destined to
|
||||
* transformation, without any chance to correctly lower these
|
||||
* counters back. In contrast, no transformation can still happen for
|
||||
* rq after rq has been inserted or merged. So, it is safe to execute
|
||||
* these preparation operations when rq is finally inserted or merged.
|
||||
*/
|
||||
static struct bfq_queue *bfq_init_rq(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct bio *bio = rq->bio;
|
||||
struct bfq_data *bfqd = q->elevator->elevator_data;
|
||||
struct bfq_io_cq *bic;
|
||||
const int is_sync = rq_is_sync(rq);
|
||||
|
@ -4934,20 +4889,21 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio)
|
|||
bool new_queue = false;
|
||||
bool bfqq_already_existing = false, split = false;
|
||||
|
||||
if (unlikely(!rq->elv.icq))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Even if we don't have an icq attached, we should still clear
|
||||
* the scheduler pointers, as they might point to previously
|
||||
* allocated bic/bfqq structs.
|
||||
* Assuming that elv.priv[1] is set only if everything is set
|
||||
* for this rq. This holds true, because this function is
|
||||
* invoked only for insertion or merging, and, after such
|
||||
* events, a request cannot be manipulated any longer before
|
||||
* being removed from bfq.
|
||||
*/
|
||||
if (!rq->elv.icq) {
|
||||
rq->elv.priv[0] = rq->elv.priv[1] = NULL;
|
||||
return;
|
||||
}
|
||||
if (rq->elv.priv[1])
|
||||
return rq->elv.priv[1];
|
||||
|
||||
bic = icq_to_bic(rq->elv.icq);
|
||||
|
||||
spin_lock_irq(&bfqd->lock);
|
||||
|
||||
bfq_check_ioprio_change(bic, bio);
|
||||
|
||||
bfq_bic_update_cgroup(bic, bio);
|
||||
|
@ -5006,7 +4962,7 @@ static void bfq_prepare_request(struct request *rq, struct bio *bio)
|
|||
if (unlikely(bfq_bfqq_just_created(bfqq)))
|
||||
bfq_handle_burst(bfqd, bfqq);
|
||||
|
||||
spin_unlock_irq(&bfqd->lock);
|
||||
return bfqq;
|
||||
}
|
||||
|
||||
static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
|
||||
|
@ -5105,6 +5061,64 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
|
|||
__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
|
||||
}
|
||||
|
||||
/*
|
||||
* See the comments on bfq_limit_depth for the purpose of
|
||||
* the depths set in the function. Return minimum shallow depth we'll use.
|
||||
*/
|
||||
static unsigned int bfq_update_depths(struct bfq_data *bfqd,
|
||||
struct sbitmap_queue *bt)
|
||||
{
|
||||
unsigned int i, j, min_shallow = UINT_MAX;
|
||||
|
||||
/*
|
||||
* In-word depths if no bfq_queue is being weight-raised:
|
||||
* leaving 25% of tags only for sync reads.
|
||||
*
|
||||
* In next formulas, right-shift the value
|
||||
* (1U<<bt->sb.shift), instead of computing directly
|
||||
* (1U<<(bt->sb.shift - something)), to be robust against
|
||||
* any possible value of bt->sb.shift, without having to
|
||||
* limit 'something'.
|
||||
*/
|
||||
/* no more than 50% of tags for async I/O */
|
||||
bfqd->word_depths[0][0] = max((1U << bt->sb.shift) >> 1, 1U);
|
||||
/*
|
||||
* no more than 75% of tags for sync writes (25% extra tags
|
||||
* w.r.t. async I/O, to prevent async I/O from starving sync
|
||||
* writes)
|
||||
*/
|
||||
bfqd->word_depths[0][1] = max(((1U << bt->sb.shift) * 3) >> 2, 1U);
|
||||
|
||||
/*
|
||||
* In-word depths in case some bfq_queue is being weight-
|
||||
* raised: leaving ~63% of tags for sync reads. This is the
|
||||
* highest percentage for which, in our tests, application
|
||||
* start-up times didn't suffer from any regression due to tag
|
||||
* shortage.
|
||||
*/
|
||||
/* no more than ~18% of tags for async I/O */
|
||||
bfqd->word_depths[1][0] = max(((1U << bt->sb.shift) * 3) >> 4, 1U);
|
||||
/* no more than ~37% of tags for sync writes (~20% extra tags) */
|
||||
bfqd->word_depths[1][1] = max(((1U << bt->sb.shift) * 6) >> 4, 1U);
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
for (j = 0; j < 2; j++)
|
||||
min_shallow = min(min_shallow, bfqd->word_depths[i][j]);
|
||||
|
||||
return min_shallow;
|
||||
}
|
||||
|
||||
static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
|
||||
{
|
||||
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
|
||||
struct blk_mq_tags *tags = hctx->sched_tags;
|
||||
unsigned int min_shallow;
|
||||
|
||||
min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags);
|
||||
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bfq_exit_queue(struct elevator_queue *e)
|
||||
{
|
||||
struct bfq_data *bfqd = e->elevator_data;
|
||||
|
@ -5242,14 +5256,12 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
|
|||
bfqd->wr_busy_queues = 0;
|
||||
|
||||
/*
|
||||
* Begin by assuming, optimistically, that the device is a
|
||||
* high-speed one, and that its peak rate is equal to 2/3 of
|
||||
* the highest reference rate.
|
||||
* Begin by assuming, optimistically, that the device peak
|
||||
* rate is equal to 2/3 of the highest reference rate.
|
||||
*/
|
||||
bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] *
|
||||
T_fast[blk_queue_nonrot(bfqd->queue)];
|
||||
bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)] * 2 / 3;
|
||||
bfqd->device_speed = BFQ_BFQD_FAST;
|
||||
bfqd->rate_dur_prod = ref_rate[blk_queue_nonrot(bfqd->queue)] *
|
||||
ref_wr_duration[blk_queue_nonrot(bfqd->queue)];
|
||||
bfqd->peak_rate = ref_rate[blk_queue_nonrot(bfqd->queue)] * 2 / 3;
|
||||
|
||||
spin_lock_init(&bfqd->lock);
|
||||
|
||||
|
@ -5526,6 +5538,7 @@ static struct elevator_type iosched_bfq_mq = {
|
|||
.requests_merged = bfq_requests_merged,
|
||||
.request_merged = bfq_request_merged,
|
||||
.has_work = bfq_has_work,
|
||||
.init_hctx = bfq_init_hctx,
|
||||
.init_sched = bfq_init_queue,
|
||||
.exit_sched = bfq_exit_queue,
|
||||
},
|
||||
|
@ -5556,8 +5569,8 @@ static int __init bfq_init(void)
|
|||
/*
|
||||
* Times to load large popular applications for the typical
|
||||
* systems installed on the reference devices (see the
|
||||
* comments before the definitions of the next two
|
||||
* arrays). Actually, we use slightly slower values, as the
|
||||
* comments before the definition of the next
|
||||
* array). Actually, we use slightly lower values, as the
|
||||
* estimated peak rate tends to be smaller than the actual
|
||||
* peak rate. The reason for this last fact is that estimates
|
||||
* are computed over much shorter time intervals than the long
|
||||
|
@ -5566,25 +5579,8 @@ static int __init bfq_init(void)
|
|||
* scheduler cannot rely on a peak-rate-evaluation workload to
|
||||
* be run for a long time.
|
||||
*/
|
||||
T_slow[0] = msecs_to_jiffies(3500); /* actually 4 sec */
|
||||
T_slow[1] = msecs_to_jiffies(6000); /* actually 6.5 sec */
|
||||
T_fast[0] = msecs_to_jiffies(7000); /* actually 8 sec */
|
||||
T_fast[1] = msecs_to_jiffies(2500); /* actually 3 sec */
|
||||
|
||||
/*
|
||||
* Thresholds that determine the switch between speed classes
|
||||
* (see the comments before the definition of the array
|
||||
* device_speed_thresh). These thresholds are biased towards
|
||||
* transitions to the fast class. This is safer than the
|
||||
* opposite bias. In fact, a wrong transition to the slow
|
||||
* class results in short weight-raising periods, because the
|
||||
* speed of the device then tends to be higher that the
|
||||
* reference peak rate. On the opposite end, a wrong
|
||||
* transition to the fast class tends to increase
|
||||
* weight-raising periods, because of the opposite reason.
|
||||
*/
|
||||
device_speed_thresh[0] = (4 * R_slow[0]) / 3;
|
||||
device_speed_thresh[1] = (4 * R_slow[1]) / 3;
|
||||
ref_wr_duration[0] = msecs_to_jiffies(7000); /* actually 8 sec */
|
||||
ref_wr_duration[1] = msecs_to_jiffies(2500); /* actually 3 sec */
|
||||
|
||||
ret = elv_register(&iosched_bfq_mq);
|
||||
if (ret)
|
||||
|
|
|
@ -399,11 +399,6 @@ struct bfq_io_cq {
|
|||
struct bfq_ttime saved_ttime;
|
||||
};
|
||||
|
||||
enum bfq_device_speed {
|
||||
BFQ_BFQD_FAST,
|
||||
BFQ_BFQD_SLOW,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct bfq_data - per-device data structure.
|
||||
*
|
||||
|
@ -611,12 +606,11 @@ struct bfq_data {
|
|||
/* Max service-rate for a soft real-time queue, in sectors/sec */
|
||||
unsigned int bfq_wr_max_softrt_rate;
|
||||
/*
|
||||
* Cached value of the product R*T, used for computing the
|
||||
* maximum duration of weight raising automatically.
|
||||
* Cached value of the product ref_rate*ref_wr_duration, used
|
||||
* for computing the maximum duration of weight raising
|
||||
* automatically.
|
||||
*/
|
||||
u64 RT_prod;
|
||||
/* device-speed class for the low-latency heuristic */
|
||||
enum bfq_device_speed device_speed;
|
||||
u64 rate_dur_prod;
|
||||
|
||||
/* fallback dummy bfqq for extreme OOM conditions */
|
||||
struct bfq_queue oom_bfqq;
|
||||
|
@ -635,12 +629,6 @@ struct bfq_data {
|
|||
/* bfqq associated with the task issuing current bio for merging */
|
||||
struct bfq_queue *bio_bfqq;
|
||||
|
||||
/*
|
||||
* Cached sbitmap shift, used to compute depth limits in
|
||||
* bfq_update_depths.
|
||||
*/
|
||||
unsigned int sb_shift;
|
||||
|
||||
/*
|
||||
* Depth limits used in bfq_limit_depth (see comments on the
|
||||
* function)
|
||||
|
@ -732,9 +720,9 @@ struct bfqg_stats {
|
|||
/* total time with empty current active q with other requests queued */
|
||||
struct blkg_stat empty_time;
|
||||
/* fields after this shouldn't be cleared on stat reset */
|
||||
uint64_t start_group_wait_time;
|
||||
uint64_t start_idle_time;
|
||||
uint64_t start_empty_time;
|
||||
u64 start_group_wait_time;
|
||||
u64 start_idle_time;
|
||||
u64 start_empty_time;
|
||||
uint16_t flags;
|
||||
#endif /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
|
||||
};
|
||||
|
@ -856,8 +844,8 @@ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
|
|||
unsigned int op);
|
||||
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
|
||||
void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op);
|
||||
void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
|
||||
uint64_t io_start_time, unsigned int op);
|
||||
void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
|
||||
u64 io_start_time_ns, unsigned int op);
|
||||
void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
|
||||
void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
|
||||
void bfqg_stats_update_idle_time(struct bfq_group *bfqg);
|
||||
|
|
|
@ -56,12 +56,12 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
|
|||
struct bio_set *bs = bio->bi_pool;
|
||||
unsigned inline_vecs;
|
||||
|
||||
if (!bs || !bs->bio_integrity_pool) {
|
||||
if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
|
||||
bip = kmalloc(sizeof(struct bio_integrity_payload) +
|
||||
sizeof(struct bio_vec) * nr_vecs, gfp_mask);
|
||||
inline_vecs = nr_vecs;
|
||||
} else {
|
||||
bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
|
||||
bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
|
||||
inline_vecs = BIP_INLINE_VECS;
|
||||
}
|
||||
|
||||
|
@ -74,7 +74,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
|
|||
unsigned long idx = 0;
|
||||
|
||||
bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx,
|
||||
bs->bvec_integrity_pool);
|
||||
&bs->bvec_integrity_pool);
|
||||
if (!bip->bip_vec)
|
||||
goto err;
|
||||
bip->bip_max_vcnt = bvec_nr_vecs(idx);
|
||||
|
@ -90,7 +90,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
|
|||
|
||||
return bip;
|
||||
err:
|
||||
mempool_free(bip, bs->bio_integrity_pool);
|
||||
mempool_free(bip, &bs->bio_integrity_pool);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
EXPORT_SYMBOL(bio_integrity_alloc);
|
||||
|
@ -111,10 +111,10 @@ static void bio_integrity_free(struct bio *bio)
|
|||
kfree(page_address(bip->bip_vec->bv_page) +
|
||||
bip->bip_vec->bv_offset);
|
||||
|
||||
if (bs && bs->bio_integrity_pool) {
|
||||
bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab);
|
||||
if (bs && mempool_initialized(&bs->bio_integrity_pool)) {
|
||||
bvec_free(&bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab);
|
||||
|
||||
mempool_free(bip, bs->bio_integrity_pool);
|
||||
mempool_free(bip, &bs->bio_integrity_pool);
|
||||
} else {
|
||||
kfree(bip);
|
||||
}
|
||||
|
@ -465,16 +465,15 @@ EXPORT_SYMBOL(bio_integrity_clone);
|
|||
|
||||
int bioset_integrity_create(struct bio_set *bs, int pool_size)
|
||||
{
|
||||
if (bs->bio_integrity_pool)
|
||||
if (mempool_initialized(&bs->bio_integrity_pool))
|
||||
return 0;
|
||||
|
||||
bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab);
|
||||
if (!bs->bio_integrity_pool)
|
||||
if (mempool_init_slab_pool(&bs->bio_integrity_pool,
|
||||
pool_size, bip_slab))
|
||||
return -1;
|
||||
|
||||
bs->bvec_integrity_pool = biovec_create_pool(pool_size);
|
||||
if (!bs->bvec_integrity_pool) {
|
||||
mempool_destroy(bs->bio_integrity_pool);
|
||||
if (biovec_init_pool(&bs->bvec_integrity_pool, pool_size)) {
|
||||
mempool_exit(&bs->bio_integrity_pool);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -484,8 +483,8 @@ EXPORT_SYMBOL(bioset_integrity_create);
|
|||
|
||||
void bioset_integrity_free(struct bio_set *bs)
|
||||
{
|
||||
mempool_destroy(bs->bio_integrity_pool);
|
||||
mempool_destroy(bs->bvec_integrity_pool);
|
||||
mempool_exit(&bs->bio_integrity_pool);
|
||||
mempool_exit(&bs->bvec_integrity_pool);
|
||||
}
|
||||
EXPORT_SYMBOL(bioset_integrity_free);
|
||||
|
||||
|
|
189
block/bio.c
189
block/bio.c
|
@ -53,7 +53,7 @@ static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
|
|||
* fs_bio_set is the bio_set containing bio and iovec memory pools used by
|
||||
* IO code that does not need private memory pools.
|
||||
*/
|
||||
struct bio_set *fs_bio_set;
|
||||
struct bio_set fs_bio_set;
|
||||
EXPORT_SYMBOL(fs_bio_set);
|
||||
|
||||
/*
|
||||
|
@ -254,7 +254,7 @@ static void bio_free(struct bio *bio)
|
|||
bio_uninit(bio);
|
||||
|
||||
if (bs) {
|
||||
bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
|
||||
bvec_free(&bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
|
||||
|
||||
/*
|
||||
* If we have front padding, adjust the bio pointer before freeing
|
||||
|
@ -262,7 +262,7 @@ static void bio_free(struct bio *bio)
|
|||
p = bio;
|
||||
p -= bs->front_pad;
|
||||
|
||||
mempool_free(p, bs->bio_pool);
|
||||
mempool_free(p, &bs->bio_pool);
|
||||
} else {
|
||||
/* Bio was allocated by bio_kmalloc() */
|
||||
kfree(bio);
|
||||
|
@ -454,7 +454,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
|
|||
inline_vecs = nr_iovecs;
|
||||
} else {
|
||||
/* should not use nobvec bioset for nr_iovecs > 0 */
|
||||
if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0))
|
||||
if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) &&
|
||||
nr_iovecs > 0))
|
||||
return NULL;
|
||||
/*
|
||||
* generic_make_request() converts recursion to iteration; this
|
||||
|
@ -483,11 +484,11 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
|
|||
bs->rescue_workqueue)
|
||||
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
|
||||
|
||||
p = mempool_alloc(bs->bio_pool, gfp_mask);
|
||||
p = mempool_alloc(&bs->bio_pool, gfp_mask);
|
||||
if (!p && gfp_mask != saved_gfp) {
|
||||
punt_bios_to_rescuer(bs);
|
||||
gfp_mask = saved_gfp;
|
||||
p = mempool_alloc(bs->bio_pool, gfp_mask);
|
||||
p = mempool_alloc(&bs->bio_pool, gfp_mask);
|
||||
}
|
||||
|
||||
front_pad = bs->front_pad;
|
||||
|
@ -503,11 +504,11 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
|
|||
if (nr_iovecs > inline_vecs) {
|
||||
unsigned long idx = 0;
|
||||
|
||||
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
|
||||
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
|
||||
if (!bvl && gfp_mask != saved_gfp) {
|
||||
punt_bios_to_rescuer(bs);
|
||||
gfp_mask = saved_gfp;
|
||||
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
|
||||
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
|
||||
}
|
||||
|
||||
if (unlikely(!bvl))
|
||||
|
@ -524,25 +525,25 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
|
|||
return bio;
|
||||
|
||||
err_free:
|
||||
mempool_free(p, bs->bio_pool);
|
||||
mempool_free(p, &bs->bio_pool);
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(bio_alloc_bioset);
|
||||
|
||||
void zero_fill_bio(struct bio *bio)
|
||||
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct bio_vec bv;
|
||||
struct bvec_iter iter;
|
||||
|
||||
bio_for_each_segment(bv, bio, iter) {
|
||||
__bio_for_each_segment(bv, bio, iter, start) {
|
||||
char *data = bvec_kmap_irq(&bv, &flags);
|
||||
memset(data, 0, bv.bv_len);
|
||||
flush_dcache_page(bv.bv_page);
|
||||
bvec_kunmap_irq(data, &flags);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(zero_fill_bio);
|
||||
EXPORT_SYMBOL(zero_fill_bio_iter);
|
||||
|
||||
/**
|
||||
* bio_put - release a reference to a bio
|
||||
|
@ -970,27 +971,68 @@ void bio_advance(struct bio *bio, unsigned bytes)
|
|||
}
|
||||
EXPORT_SYMBOL(bio_advance);
|
||||
|
||||
void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
|
||||
struct bio *src, struct bvec_iter *src_iter)
|
||||
{
|
||||
struct bio_vec src_bv, dst_bv;
|
||||
void *src_p, *dst_p;
|
||||
unsigned bytes;
|
||||
|
||||
while (src_iter->bi_size && dst_iter->bi_size) {
|
||||
src_bv = bio_iter_iovec(src, *src_iter);
|
||||
dst_bv = bio_iter_iovec(dst, *dst_iter);
|
||||
|
||||
bytes = min(src_bv.bv_len, dst_bv.bv_len);
|
||||
|
||||
src_p = kmap_atomic(src_bv.bv_page);
|
||||
dst_p = kmap_atomic(dst_bv.bv_page);
|
||||
|
||||
memcpy(dst_p + dst_bv.bv_offset,
|
||||
src_p + src_bv.bv_offset,
|
||||
bytes);
|
||||
|
||||
kunmap_atomic(dst_p);
|
||||
kunmap_atomic(src_p);
|
||||
|
||||
flush_dcache_page(dst_bv.bv_page);
|
||||
|
||||
bio_advance_iter(src, src_iter, bytes);
|
||||
bio_advance_iter(dst, dst_iter, bytes);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(bio_copy_data_iter);
|
||||
|
||||
/**
|
||||
* bio_copy_data - copy contents of data buffers from one chain of bios to
|
||||
* another
|
||||
* @src: source bio list
|
||||
* @dst: destination bio list
|
||||
*
|
||||
* If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
|
||||
* @src and @dst as linked lists of bios.
|
||||
* bio_copy_data - copy contents of data buffers from one bio to another
|
||||
* @src: source bio
|
||||
* @dst: destination bio
|
||||
*
|
||||
* Stops when it reaches the end of either @src or @dst - that is, copies
|
||||
* min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
|
||||
*/
|
||||
void bio_copy_data(struct bio *dst, struct bio *src)
|
||||
{
|
||||
struct bvec_iter src_iter, dst_iter;
|
||||
struct bio_vec src_bv, dst_bv;
|
||||
void *src_p, *dst_p;
|
||||
unsigned bytes;
|
||||
struct bvec_iter src_iter = src->bi_iter;
|
||||
struct bvec_iter dst_iter = dst->bi_iter;
|
||||
|
||||
src_iter = src->bi_iter;
|
||||
dst_iter = dst->bi_iter;
|
||||
bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
|
||||
}
|
||||
EXPORT_SYMBOL(bio_copy_data);
|
||||
|
||||
/**
|
||||
* bio_list_copy_data - copy contents of data buffers from one chain of bios to
|
||||
* another
|
||||
* @src: source bio list
|
||||
* @dst: destination bio list
|
||||
*
|
||||
* Stops when it reaches the end of either the @src list or @dst list - that is,
|
||||
* copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of
|
||||
* bios).
|
||||
*/
|
||||
void bio_list_copy_data(struct bio *dst, struct bio *src)
|
||||
{
|
||||
struct bvec_iter src_iter = src->bi_iter;
|
||||
struct bvec_iter dst_iter = dst->bi_iter;
|
||||
|
||||
while (1) {
|
||||
if (!src_iter.bi_size) {
|
||||
|
@ -1009,26 +1051,10 @@ void bio_copy_data(struct bio *dst, struct bio *src)
|
|||
dst_iter = dst->bi_iter;
|
||||
}
|
||||
|
||||
src_bv = bio_iter_iovec(src, src_iter);
|
||||
dst_bv = bio_iter_iovec(dst, dst_iter);
|
||||
|
||||
bytes = min(src_bv.bv_len, dst_bv.bv_len);
|
||||
|
||||
src_p = kmap_atomic(src_bv.bv_page);
|
||||
dst_p = kmap_atomic(dst_bv.bv_page);
|
||||
|
||||
memcpy(dst_p + dst_bv.bv_offset,
|
||||
src_p + src_bv.bv_offset,
|
||||
bytes);
|
||||
|
||||
kunmap_atomic(dst_p);
|
||||
kunmap_atomic(src_p);
|
||||
|
||||
bio_advance_iter(src, &src_iter, bytes);
|
||||
bio_advance_iter(dst, &dst_iter, bytes);
|
||||
bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(bio_copy_data);
|
||||
EXPORT_SYMBOL(bio_list_copy_data);
|
||||
|
||||
struct bio_map_data {
|
||||
int is_our_pages;
|
||||
|
@ -1584,6 +1610,7 @@ void bio_set_pages_dirty(struct bio *bio)
|
|||
set_page_dirty_lock(page);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
|
||||
|
||||
static void bio_release_pages(struct bio *bio)
|
||||
{
|
||||
|
@ -1667,6 +1694,7 @@ void bio_check_pages_dirty(struct bio *bio)
|
|||
bio_put(bio);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
|
||||
|
||||
void generic_start_io_acct(struct request_queue *q, int rw,
|
||||
unsigned long sectors, struct hd_struct *part)
|
||||
|
@ -1749,6 +1777,9 @@ again:
|
|||
if (!bio_integrity_endio(bio))
|
||||
return;
|
||||
|
||||
if (WARN_ONCE(bio->bi_next, "driver left bi_next not NULL"))
|
||||
bio->bi_next = NULL;
|
||||
|
||||
/*
|
||||
* Need to have a real endio function for chained bios, otherwise
|
||||
* various corner cases will break (like stacking block devices that
|
||||
|
@ -1848,30 +1879,38 @@ EXPORT_SYMBOL_GPL(bio_trim);
|
|||
* create memory pools for biovec's in a bio_set.
|
||||
* use the global biovec slabs created for general use.
|
||||
*/
|
||||
mempool_t *biovec_create_pool(int pool_entries)
|
||||
int biovec_init_pool(mempool_t *pool, int pool_entries)
|
||||
{
|
||||
struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX;
|
||||
|
||||
return mempool_create_slab_pool(pool_entries, bp->slab);
|
||||
return mempool_init_slab_pool(pool, pool_entries, bp->slab);
|
||||
}
|
||||
|
||||
void bioset_free(struct bio_set *bs)
|
||||
/*
|
||||
* bioset_exit - exit a bioset initialized with bioset_init()
|
||||
*
|
||||
* May be called on a zeroed but uninitialized bioset (i.e. allocated with
|
||||
* kzalloc()).
|
||||
*/
|
||||
void bioset_exit(struct bio_set *bs)
|
||||
{
|
||||
if (bs->rescue_workqueue)
|
||||
destroy_workqueue(bs->rescue_workqueue);
|
||||
bs->rescue_workqueue = NULL;
|
||||
|
||||
mempool_destroy(bs->bio_pool);
|
||||
mempool_destroy(bs->bvec_pool);
|
||||
mempool_exit(&bs->bio_pool);
|
||||
mempool_exit(&bs->bvec_pool);
|
||||
|
||||
bioset_integrity_free(bs);
|
||||
bio_put_slab(bs);
|
||||
|
||||
kfree(bs);
|
||||
if (bs->bio_slab)
|
||||
bio_put_slab(bs);
|
||||
bs->bio_slab = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(bioset_free);
|
||||
EXPORT_SYMBOL(bioset_exit);
|
||||
|
||||
/**
|
||||
* bioset_create - Create a bio_set
|
||||
* bioset_init - Initialize a bio_set
|
||||
* @bs: pool to initialize
|
||||
* @pool_size: Number of bio and bio_vecs to cache in the mempool
|
||||
* @front_pad: Number of bytes to allocate in front of the returned bio
|
||||
* @flags: Flags to modify behavior, currently %BIOSET_NEED_BVECS
|
||||
|
@ -1890,16 +1929,12 @@ EXPORT_SYMBOL(bioset_free);
|
|||
* dispatch queued requests when the mempool runs out of space.
|
||||
*
|
||||
*/
|
||||
struct bio_set *bioset_create(unsigned int pool_size,
|
||||
unsigned int front_pad,
|
||||
int flags)
|
||||
int bioset_init(struct bio_set *bs,
|
||||
unsigned int pool_size,
|
||||
unsigned int front_pad,
|
||||
int flags)
|
||||
{
|
||||
unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
|
||||
struct bio_set *bs;
|
||||
|
||||
bs = kzalloc(sizeof(*bs), GFP_KERNEL);
|
||||
if (!bs)
|
||||
return NULL;
|
||||
|
||||
bs->front_pad = front_pad;
|
||||
|
||||
|
@ -1908,34 +1943,29 @@ struct bio_set *bioset_create(unsigned int pool_size,
|
|||
INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
|
||||
|
||||
bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
|
||||
if (!bs->bio_slab) {
|
||||
kfree(bs);
|
||||
return NULL;
|
||||
}
|
||||
if (!bs->bio_slab)
|
||||
return -ENOMEM;
|
||||
|
||||
bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
|
||||
if (!bs->bio_pool)
|
||||
if (mempool_init_slab_pool(&bs->bio_pool, pool_size, bs->bio_slab))
|
||||
goto bad;
|
||||
|
||||
if (flags & BIOSET_NEED_BVECS) {
|
||||
bs->bvec_pool = biovec_create_pool(pool_size);
|
||||
if (!bs->bvec_pool)
|
||||
goto bad;
|
||||
}
|
||||
if ((flags & BIOSET_NEED_BVECS) &&
|
||||
biovec_init_pool(&bs->bvec_pool, pool_size))
|
||||
goto bad;
|
||||
|
||||
if (!(flags & BIOSET_NEED_RESCUER))
|
||||
return bs;
|
||||
return 0;
|
||||
|
||||
bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
|
||||
if (!bs->rescue_workqueue)
|
||||
goto bad;
|
||||
|
||||
return bs;
|
||||
return 0;
|
||||
bad:
|
||||
bioset_free(bs);
|
||||
return NULL;
|
||||
bioset_exit(bs);
|
||||
return -ENOMEM;
|
||||
}
|
||||
EXPORT_SYMBOL(bioset_create);
|
||||
EXPORT_SYMBOL(bioset_init);
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
|
||||
|
@ -2020,11 +2050,10 @@ static int __init init_bio(void)
|
|||
bio_integrity_init();
|
||||
biovec_init_slabs();
|
||||
|
||||
fs_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
|
||||
if (!fs_bio_set)
|
||||
if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS))
|
||||
panic("bio: can't allocate bios\n");
|
||||
|
||||
if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
|
||||
if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE))
|
||||
panic("bio: can't create integrity pool\n");
|
||||
|
||||
return 0;
|
||||
|
|
120
block/blk-core.c
120
block/blk-core.c
|
@ -196,15 +196,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
|
|||
RB_CLEAR_NODE(&rq->rb_node);
|
||||
rq->tag = -1;
|
||||
rq->internal_tag = -1;
|
||||
rq->start_time = jiffies;
|
||||
set_start_time_ns(rq);
|
||||
rq->start_time_ns = ktime_get_ns();
|
||||
rq->part = NULL;
|
||||
seqcount_init(&rq->gstate_seq);
|
||||
u64_stats_init(&rq->aborted_gstate_sync);
|
||||
/*
|
||||
* See comment of blk_mq_init_request
|
||||
*/
|
||||
WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_rq_init);
|
||||
|
||||
|
@ -280,6 +273,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
|
|||
bio_advance(bio, nbytes);
|
||||
|
||||
/* don't actually finish bio if it's part of flush sequence */
|
||||
/*
|
||||
* XXX this code looks suspicious - it's not consistent with advancing
|
||||
* req->bio in caller
|
||||
*/
|
||||
if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
@ -360,7 +357,6 @@ EXPORT_SYMBOL(blk_start_queue_async);
|
|||
void blk_start_queue(struct request_queue *q)
|
||||
{
|
||||
lockdep_assert_held(q->queue_lock);
|
||||
WARN_ON(!in_interrupt() && !irqs_disabled());
|
||||
WARN_ON_ONCE(q->mq_ops);
|
||||
|
||||
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
|
||||
|
@ -996,18 +992,24 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
|
|||
spinlock_t *lock)
|
||||
{
|
||||
struct request_queue *q;
|
||||
int ret;
|
||||
|
||||
q = kmem_cache_alloc_node(blk_requestq_cachep,
|
||||
gfp_mask | __GFP_ZERO, node_id);
|
||||
if (!q)
|
||||
return NULL;
|
||||
|
||||
INIT_LIST_HEAD(&q->queue_head);
|
||||
q->last_merge = NULL;
|
||||
q->end_sector = 0;
|
||||
q->boundary_rq = NULL;
|
||||
|
||||
q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
|
||||
if (q->id < 0)
|
||||
goto fail_q;
|
||||
|
||||
q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
|
||||
if (!q->bio_split)
|
||||
ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
|
||||
if (ret)
|
||||
goto fail_id;
|
||||
|
||||
q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id);
|
||||
|
@ -1079,7 +1081,7 @@ fail_bdi:
|
|||
fail_stats:
|
||||
bdi_put(q->backing_dev_info);
|
||||
fail_split:
|
||||
bioset_free(q->bio_split);
|
||||
bioset_exit(&q->bio_split);
|
||||
fail_id:
|
||||
ida_simple_remove(&blk_queue_ida, q->id);
|
||||
fail_q:
|
||||
|
@ -1173,16 +1175,8 @@ int blk_init_allocated_queue(struct request_queue *q)
|
|||
|
||||
q->sg_reserved_size = INT_MAX;
|
||||
|
||||
/* Protect q->elevator from elevator_change */
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
|
||||
/* init elevator */
|
||||
if (elevator_init(q, NULL)) {
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
if (elevator_init(q))
|
||||
goto out_exit_flush_rq;
|
||||
}
|
||||
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return 0;
|
||||
|
||||
out_exit_flush_rq:
|
||||
|
@ -1334,6 +1328,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
|
|||
* @op: operation and flags
|
||||
* @bio: bio to allocate request for (can be %NULL)
|
||||
* @flags: BLQ_MQ_REQ_* flags
|
||||
* @gfp_mask: allocator flags
|
||||
*
|
||||
* Get a free request from @q. This function may fail under memory
|
||||
* pressure or if @q is dead.
|
||||
|
@ -1343,7 +1338,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
|
|||
* Returns request pointer on success, with @q->queue_lock *not held*.
|
||||
*/
|
||||
static struct request *__get_request(struct request_list *rl, unsigned int op,
|
||||
struct bio *bio, blk_mq_req_flags_t flags)
|
||||
struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp_mask)
|
||||
{
|
||||
struct request_queue *q = rl->q;
|
||||
struct request *rq;
|
||||
|
@ -1352,8 +1347,6 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
|
|||
struct io_cq *icq = NULL;
|
||||
const bool is_sync = op_is_sync(op);
|
||||
int may_queue;
|
||||
gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
|
||||
__GFP_DIRECT_RECLAIM;
|
||||
req_flags_t rq_flags = RQF_ALLOCED;
|
||||
|
||||
lockdep_assert_held(q->queue_lock);
|
||||
|
@ -1517,8 +1510,9 @@ rq_starved:
|
|||
* @op: operation and flags
|
||||
* @bio: bio to allocate request for (can be %NULL)
|
||||
* @flags: BLK_MQ_REQ_* flags.
|
||||
* @gfp: allocator flags
|
||||
*
|
||||
* Get a free request from @q. If %__GFP_DIRECT_RECLAIM is set in @gfp_mask,
|
||||
* Get a free request from @q. If %BLK_MQ_REQ_NOWAIT is set in @flags,
|
||||
* this function keeps retrying under memory pressure and fails iff @q is dead.
|
||||
*
|
||||
* Must be called with @q->queue_lock held and,
|
||||
|
@ -1526,7 +1520,7 @@ rq_starved:
|
|||
* Returns request pointer on success, with @q->queue_lock *not held*.
|
||||
*/
|
||||
static struct request *get_request(struct request_queue *q, unsigned int op,
|
||||
struct bio *bio, blk_mq_req_flags_t flags)
|
||||
struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp)
|
||||
{
|
||||
const bool is_sync = op_is_sync(op);
|
||||
DEFINE_WAIT(wait);
|
||||
|
@ -1538,7 +1532,7 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
|
|||
|
||||
rl = blk_get_rl(q, bio); /* transferred to @rq on success */
|
||||
retry:
|
||||
rq = __get_request(rl, op, bio, flags);
|
||||
rq = __get_request(rl, op, bio, flags, gfp);
|
||||
if (!IS_ERR(rq))
|
||||
return rq;
|
||||
|
||||
|
@ -1579,8 +1573,7 @@ static struct request *blk_old_get_request(struct request_queue *q,
|
|||
unsigned int op, blk_mq_req_flags_t flags)
|
||||
{
|
||||
struct request *rq;
|
||||
gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
|
||||
__GFP_DIRECT_RECLAIM;
|
||||
gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC : GFP_NOIO;
|
||||
int ret = 0;
|
||||
|
||||
WARN_ON_ONCE(q->mq_ops);
|
||||
|
@ -1592,7 +1585,7 @@ static struct request *blk_old_get_request(struct request_queue *q,
|
|||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
spin_lock_irq(q->queue_lock);
|
||||
rq = get_request(q, op, NULL, flags);
|
||||
rq = get_request(q, op, NULL, flags, gfp_mask);
|
||||
if (IS_ERR(rq)) {
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
blk_queue_exit(q);
|
||||
|
@ -1607,13 +1600,13 @@ static struct request *blk_old_get_request(struct request_queue *q,
|
|||
}
|
||||
|
||||
/**
|
||||
* blk_get_request_flags - allocate a request
|
||||
* blk_get_request - allocate a request
|
||||
* @q: request queue to allocate a request for
|
||||
* @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC.
|
||||
* @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT.
|
||||
*/
|
||||
struct request *blk_get_request_flags(struct request_queue *q, unsigned int op,
|
||||
blk_mq_req_flags_t flags)
|
||||
struct request *blk_get_request(struct request_queue *q, unsigned int op,
|
||||
blk_mq_req_flags_t flags)
|
||||
{
|
||||
struct request *req;
|
||||
|
||||
|
@ -1632,14 +1625,6 @@ struct request *blk_get_request_flags(struct request_queue *q, unsigned int op,
|
|||
|
||||
return req;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_get_request_flags);
|
||||
|
||||
struct request *blk_get_request(struct request_queue *q, unsigned int op,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
return blk_get_request_flags(q, op, gfp_mask & __GFP_DIRECT_RECLAIM ?
|
||||
0 : BLK_MQ_REQ_NOWAIT);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_get_request);
|
||||
|
||||
/**
|
||||
|
@ -1660,7 +1645,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
|
|||
blk_delete_timer(rq);
|
||||
blk_clear_rq_complete(rq);
|
||||
trace_block_rq_requeue(q, rq);
|
||||
wbt_requeue(q->rq_wb, &rq->issue_stat);
|
||||
wbt_requeue(q->rq_wb, rq);
|
||||
|
||||
if (rq->rq_flags & RQF_QUEUED)
|
||||
blk_queue_end_tag(q, rq);
|
||||
|
@ -1767,7 +1752,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
|
|||
/* this is a bio leak */
|
||||
WARN_ON(req->bio != NULL);
|
||||
|
||||
wbt_done(q->rq_wb, &req->issue_stat);
|
||||
wbt_done(q->rq_wb, req);
|
||||
|
||||
/*
|
||||
* Request may not have originated from ll_rw_blk. if not,
|
||||
|
@ -2066,7 +2051,7 @@ get_rq:
|
|||
* Returns with the queue unlocked.
|
||||
*/
|
||||
blk_queue_enter_live(q);
|
||||
req = get_request(q, bio->bi_opf, bio, 0);
|
||||
req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO);
|
||||
if (IS_ERR(req)) {
|
||||
blk_queue_exit(q);
|
||||
__wbt_done(q->rq_wb, wb_acct);
|
||||
|
@ -2078,7 +2063,7 @@ get_rq:
|
|||
goto out_unlock;
|
||||
}
|
||||
|
||||
wbt_track(&req->issue_stat, wb_acct);
|
||||
wbt_track(req, wb_acct);
|
||||
|
||||
/*
|
||||
* After dropping the lock and possibly sleeping here, our request
|
||||
|
@ -2392,7 +2377,9 @@ blk_qc_t generic_make_request(struct bio *bio)
|
|||
|
||||
if (bio->bi_opf & REQ_NOWAIT)
|
||||
flags = BLK_MQ_REQ_NOWAIT;
|
||||
if (blk_queue_enter(q, flags) < 0) {
|
||||
if (bio_flagged(bio, BIO_QUEUE_ENTERED))
|
||||
blk_queue_enter_live(q);
|
||||
else if (blk_queue_enter(q, flags) < 0) {
|
||||
if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT))
|
||||
bio_wouldblock_error(bio);
|
||||
else
|
||||
|
@ -2727,7 +2714,7 @@ void blk_account_io_completion(struct request *req, unsigned int bytes)
|
|||
}
|
||||
}
|
||||
|
||||
void blk_account_io_done(struct request *req)
|
||||
void blk_account_io_done(struct request *req, u64 now)
|
||||
{
|
||||
/*
|
||||
* Account IO completion. flush_rq isn't accounted as a
|
||||
|
@ -2735,11 +2722,12 @@ void blk_account_io_done(struct request *req)
|
|||
* containing request is enough.
|
||||
*/
|
||||
if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
|
||||
unsigned long duration = jiffies - req->start_time;
|
||||
unsigned long duration;
|
||||
const int rw = rq_data_dir(req);
|
||||
struct hd_struct *part;
|
||||
int cpu;
|
||||
|
||||
duration = nsecs_to_jiffies(now - req->start_time_ns);
|
||||
cpu = part_stat_lock();
|
||||
part = req->part;
|
||||
|
||||
|
@ -2970,10 +2958,8 @@ static void blk_dequeue_request(struct request *rq)
|
|||
* and to it is freed is accounted as io that is in progress at
|
||||
* the driver side.
|
||||
*/
|
||||
if (blk_account_rq(rq)) {
|
||||
if (blk_account_rq(rq))
|
||||
q->in_flight[rq_is_sync(rq)]++;
|
||||
set_io_start_time_ns(rq);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2992,9 +2978,12 @@ void blk_start_request(struct request *req)
|
|||
blk_dequeue_request(req);
|
||||
|
||||
if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
|
||||
blk_stat_set_issue(&req->issue_stat, blk_rq_sectors(req));
|
||||
req->io_start_time_ns = ktime_get_ns();
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
req->throtl_size = blk_rq_sectors(req);
|
||||
#endif
|
||||
req->rq_flags |= RQF_STATS;
|
||||
wbt_issue(req->q->rq_wb, &req->issue_stat);
|
||||
wbt_issue(req->q->rq_wb, req);
|
||||
}
|
||||
|
||||
BUG_ON(blk_rq_is_complete(req));
|
||||
|
@ -3092,8 +3081,10 @@ bool blk_update_request(struct request *req, blk_status_t error,
|
|||
struct bio *bio = req->bio;
|
||||
unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
|
||||
|
||||
if (bio_bytes == bio->bi_iter.bi_size)
|
||||
if (bio_bytes == bio->bi_iter.bi_size) {
|
||||
req->bio = bio->bi_next;
|
||||
bio->bi_next = NULL;
|
||||
}
|
||||
|
||||
/* Completion has already been traced */
|
||||
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
|
||||
|
@ -3190,12 +3181,13 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
|
|||
void blk_finish_request(struct request *req, blk_status_t error)
|
||||
{
|
||||
struct request_queue *q = req->q;
|
||||
u64 now = ktime_get_ns();
|
||||
|
||||
lockdep_assert_held(req->q->queue_lock);
|
||||
WARN_ON_ONCE(q->mq_ops);
|
||||
|
||||
if (req->rq_flags & RQF_STATS)
|
||||
blk_stat_add(req);
|
||||
blk_stat_add(req, now);
|
||||
|
||||
if (req->rq_flags & RQF_QUEUED)
|
||||
blk_queue_end_tag(q, req);
|
||||
|
@ -3210,10 +3202,10 @@ void blk_finish_request(struct request *req, blk_status_t error)
|
|||
if (req->rq_flags & RQF_DONTPREP)
|
||||
blk_unprep_request(req);
|
||||
|
||||
blk_account_io_done(req);
|
||||
blk_account_io_done(req, now);
|
||||
|
||||
if (req->end_io) {
|
||||
wbt_done(req->q->rq_wb, &req->issue_stat);
|
||||
wbt_done(req->q->rq_wb, req);
|
||||
req->end_io(req, error);
|
||||
} else {
|
||||
if (blk_bidi_rq(req))
|
||||
|
@ -3519,7 +3511,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
|
|||
struct bio *bio, *bio_src;
|
||||
|
||||
if (!bs)
|
||||
bs = fs_bio_set;
|
||||
bs = &fs_bio_set;
|
||||
|
||||
__rq_for_each_bio(bio_src, rq_src) {
|
||||
bio = bio_clone_fast(bio_src, gfp_mask, bs);
|
||||
|
@ -3630,7 +3622,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
|
|||
blk_run_queue_async(q);
|
||||
else
|
||||
__blk_run_queue(q);
|
||||
spin_unlock(q->queue_lock);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
|
||||
static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
|
||||
|
@ -3678,7 +3670,6 @@ EXPORT_SYMBOL(blk_check_plugged);
|
|||
void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
||||
{
|
||||
struct request_queue *q;
|
||||
unsigned long flags;
|
||||
struct request *rq;
|
||||
LIST_HEAD(list);
|
||||
unsigned int depth;
|
||||
|
@ -3698,11 +3689,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
|||
q = NULL;
|
||||
depth = 0;
|
||||
|
||||
/*
|
||||
* Save and disable interrupts here, to avoid doing it for every
|
||||
* queue lock we have to take.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
while (!list_empty(&list)) {
|
||||
rq = list_entry_rq(list.next);
|
||||
list_del_init(&rq->queuelist);
|
||||
|
@ -3715,7 +3701,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
|||
queue_unplugged(q, depth, from_schedule);
|
||||
q = rq->q;
|
||||
depth = 0;
|
||||
spin_lock(q->queue_lock);
|
||||
spin_lock_irq(q->queue_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3742,8 +3728,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
|||
*/
|
||||
if (q)
|
||||
queue_unplugged(q, depth, from_schedule);
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void blk_finish_plug(struct blk_plug *plug)
|
||||
|
|
|
@ -333,34 +333,34 @@ static ssize_t integrity_device_show(struct blk_integrity *bi, char *page)
|
|||
}
|
||||
|
||||
static struct integrity_sysfs_entry integrity_format_entry = {
|
||||
.attr = { .name = "format", .mode = S_IRUGO },
|
||||
.attr = { .name = "format", .mode = 0444 },
|
||||
.show = integrity_format_show,
|
||||
};
|
||||
|
||||
static struct integrity_sysfs_entry integrity_tag_size_entry = {
|
||||
.attr = { .name = "tag_size", .mode = S_IRUGO },
|
||||
.attr = { .name = "tag_size", .mode = 0444 },
|
||||
.show = integrity_tag_size_show,
|
||||
};
|
||||
|
||||
static struct integrity_sysfs_entry integrity_interval_entry = {
|
||||
.attr = { .name = "protection_interval_bytes", .mode = S_IRUGO },
|
||||
.attr = { .name = "protection_interval_bytes", .mode = 0444 },
|
||||
.show = integrity_interval_show,
|
||||
};
|
||||
|
||||
static struct integrity_sysfs_entry integrity_verify_entry = {
|
||||
.attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = { .name = "read_verify", .mode = 0644 },
|
||||
.show = integrity_verify_show,
|
||||
.store = integrity_verify_store,
|
||||
};
|
||||
|
||||
static struct integrity_sysfs_entry integrity_generate_entry = {
|
||||
.attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = { .name = "write_generate", .mode = 0644 },
|
||||
.show = integrity_generate_show,
|
||||
.store = integrity_generate_store,
|
||||
};
|
||||
|
||||
static struct integrity_sysfs_entry integrity_device_entry = {
|
||||
.attr = { .name = "device_is_integrity_capable", .mode = S_IRUGO },
|
||||
.attr = { .name = "device_is_integrity_capable", .mode = 0444 },
|
||||
.show = integrity_device_show,
|
||||
};
|
||||
|
||||
|
|
|
@ -62,10 +62,16 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
|||
unsigned int req_sects;
|
||||
sector_t end_sect, tmp;
|
||||
|
||||
/* Make sure bi_size doesn't overflow */
|
||||
req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);
|
||||
/*
|
||||
* Issue in chunks of the user defined max discard setting,
|
||||
* ensuring that bi_size doesn't overflow
|
||||
*/
|
||||
req_sects = min_t(sector_t, nr_sects,
|
||||
q->limits.max_discard_sectors);
|
||||
if (req_sects > UINT_MAX >> 9)
|
||||
req_sects = UINT_MAX >> 9;
|
||||
|
||||
/**
|
||||
/*
|
||||
* If splitting a request, and the next starting sector would be
|
||||
* misaligned, stop the discard at the previous aligned sector.
|
||||
*/
|
||||
|
|
|
@ -188,16 +188,16 @@ void blk_queue_split(struct request_queue *q, struct bio **bio)
|
|||
switch (bio_op(*bio)) {
|
||||
case REQ_OP_DISCARD:
|
||||
case REQ_OP_SECURE_ERASE:
|
||||
split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs);
|
||||
split = blk_bio_discard_split(q, *bio, &q->bio_split, &nsegs);
|
||||
break;
|
||||
case REQ_OP_WRITE_ZEROES:
|
||||
split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs);
|
||||
split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split, &nsegs);
|
||||
break;
|
||||
case REQ_OP_WRITE_SAME:
|
||||
split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs);
|
||||
split = blk_bio_write_same_split(q, *bio, &q->bio_split, &nsegs);
|
||||
break;
|
||||
default:
|
||||
split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
|
||||
split = blk_bio_segment_split(q, *bio, &q->bio_split, &nsegs);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -210,6 +210,16 @@ void blk_queue_split(struct request_queue *q, struct bio **bio)
|
|||
/* there isn't chance to merge the splitted bio */
|
||||
split->bi_opf |= REQ_NOMERGE;
|
||||
|
||||
/*
|
||||
* Since we're recursing into make_request here, ensure
|
||||
* that we mark this bio as already having entered the queue.
|
||||
* If not, and the queue is going away, we can get stuck
|
||||
* forever on waiting for the queue reference to drop. But
|
||||
* that will never happen, as we're already holding a
|
||||
* reference to it.
|
||||
*/
|
||||
bio_set_flag(*bio, BIO_QUEUE_ENTERED);
|
||||
|
||||
bio_chain(split, *bio);
|
||||
trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
|
||||
generic_make_request(*bio);
|
||||
|
@ -724,13 +734,12 @@ static struct request *attempt_merge(struct request_queue *q,
|
|||
}
|
||||
|
||||
/*
|
||||
* At this point we have either done a back merge
|
||||
* or front merge. We need the smaller start_time of
|
||||
* the merged requests to be the current request
|
||||
* for accounting purposes.
|
||||
* At this point we have either done a back merge or front merge. We
|
||||
* need the smaller start_time_ns of the merged requests to be the
|
||||
* current request for accounting purposes.
|
||||
*/
|
||||
if (time_after(req->start_time, next->start_time))
|
||||
req->start_time = next->start_time;
|
||||
if (next->start_time_ns < req->start_time_ns)
|
||||
req->start_time_ns = next->start_time_ns;
|
||||
|
||||
req->biotail->bi_next = next->bio;
|
||||
req->biotail = next->biotail;
|
||||
|
|
|
@ -344,7 +344,6 @@ static const char *const rqf_name[] = {
|
|||
RQF_NAME(STATS),
|
||||
RQF_NAME(SPECIAL_PAYLOAD),
|
||||
RQF_NAME(ZONE_WRITE_LOCKED),
|
||||
RQF_NAME(MQ_TIMEOUT_EXPIRED),
|
||||
RQF_NAME(MQ_POLL_SLEPT),
|
||||
};
|
||||
#undef RQF_NAME
|
||||
|
|
|
@ -268,19 +268,16 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
|
|||
EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
|
||||
|
||||
/*
|
||||
* Reverse check our software queue for entries that we could potentially
|
||||
* merge with. Currently includes a hand-wavy stop count of 8, to not spend
|
||||
* too much time checking for merges.
|
||||
* Iterate list of requests and see if we can merge this bio with any
|
||||
* of them.
|
||||
*/
|
||||
static bool blk_mq_attempt_merge(struct request_queue *q,
|
||||
struct blk_mq_ctx *ctx, struct bio *bio)
|
||||
bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
|
||||
struct bio *bio)
|
||||
{
|
||||
struct request *rq;
|
||||
int checked = 8;
|
||||
|
||||
lockdep_assert_held(&ctx->lock);
|
||||
|
||||
list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
|
||||
list_for_each_entry_reverse(rq, list, queuelist) {
|
||||
bool merged = false;
|
||||
|
||||
if (!checked--)
|
||||
|
@ -305,13 +302,30 @@ static bool blk_mq_attempt_merge(struct request_queue *q,
|
|||
continue;
|
||||
}
|
||||
|
||||
if (merged)
|
||||
ctx->rq_merged++;
|
||||
return merged;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge);
|
||||
|
||||
/*
|
||||
* Reverse check our software queue for entries that we could potentially
|
||||
* merge with. Currently includes a hand-wavy stop count of 8, to not spend
|
||||
* too much time checking for merges.
|
||||
*/
|
||||
static bool blk_mq_attempt_merge(struct request_queue *q,
|
||||
struct blk_mq_ctx *ctx, struct bio *bio)
|
||||
{
|
||||
lockdep_assert_held(&ctx->lock);
|
||||
|
||||
if (blk_mq_bio_list_merge(q, &ctx->rq_list, bio)) {
|
||||
ctx->rq_merged++;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
|
@ -571,6 +585,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
|
|||
|
||||
if (!e) {
|
||||
q->elevator = NULL;
|
||||
q->nr_requests = q->tag_set->queue_depth;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -633,14 +648,3 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
|
|||
blk_mq_sched_tags_teardown(q);
|
||||
q->elevator = NULL;
|
||||
}
|
||||
|
||||
int blk_mq_sched_init(struct request_queue *q)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
ret = elevator_init(q, NULL);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -33,8 +33,6 @@ int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
|||
void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
|
||||
unsigned int hctx_idx);
|
||||
|
||||
int blk_mq_sched_init(struct request_queue *q);
|
||||
|
||||
static inline bool
|
||||
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
|
|
|
@ -166,15 +166,15 @@ static struct attribute *default_ctx_attrs[] = {
|
|||
};
|
||||
|
||||
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
|
||||
.attr = {.name = "nr_tags", .mode = S_IRUGO },
|
||||
.attr = {.name = "nr_tags", .mode = 0444 },
|
||||
.show = blk_mq_hw_sysfs_nr_tags_show,
|
||||
};
|
||||
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = {
|
||||
.attr = {.name = "nr_reserved_tags", .mode = S_IRUGO },
|
||||
.attr = {.name = "nr_reserved_tags", .mode = 0444 },
|
||||
.show = blk_mq_hw_sysfs_nr_reserved_tags_show,
|
||||
};
|
||||
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
|
||||
.attr = {.name = "cpu_list", .mode = S_IRUGO },
|
||||
.attr = {.name = "cpu_list", .mode = 0444 },
|
||||
.show = blk_mq_hw_sysfs_cpus_show,
|
||||
};
|
||||
|
||||
|
|
|
@ -134,6 +134,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
|
|||
ws = bt_wait_ptr(bt, data->hctx);
|
||||
drop_ctx = data->ctx == NULL;
|
||||
do {
|
||||
struct sbitmap_queue *bt_prev;
|
||||
|
||||
/*
|
||||
* We're out of tags on this hardware queue, kick any
|
||||
* pending IO submits before going to sleep waiting for
|
||||
|
@ -159,6 +161,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
|
|||
if (data->ctx)
|
||||
blk_mq_put_ctx(data->ctx);
|
||||
|
||||
bt_prev = bt;
|
||||
io_schedule();
|
||||
|
||||
data->ctx = blk_mq_get_ctx(data->q);
|
||||
|
@ -170,6 +173,15 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
|
|||
bt = &tags->bitmap_tags;
|
||||
|
||||
finish_wait(&ws->wait, &wait);
|
||||
|
||||
/*
|
||||
* If destination hw queue is changed, fake wake up on
|
||||
* previous queue for compensating the wake up miss, so
|
||||
* other allocations on previous queue won't be starved.
|
||||
*/
|
||||
if (bt != bt_prev)
|
||||
sbitmap_queue_wake_up(bt_prev);
|
||||
|
||||
ws = bt_wait_ptr(bt, data->hctx);
|
||||
} while (1);
|
||||
|
||||
|
@ -259,7 +271,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
|||
* test and set the bit before assining ->rqs[].
|
||||
*/
|
||||
rq = tags->rqs[bitnr];
|
||||
if (rq)
|
||||
if (rq && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
|
||||
iter_data->fn(rq, iter_data->data, reserved);
|
||||
|
||||
return true;
|
||||
|
|
344
block/blk-mq.c
344
block/blk-mq.c
|
@ -309,7 +309,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
|
|||
RB_CLEAR_NODE(&rq->rb_node);
|
||||
rq->rq_disk = NULL;
|
||||
rq->part = NULL;
|
||||
rq->start_time = jiffies;
|
||||
rq->start_time_ns = ktime_get_ns();
|
||||
rq->io_start_time_ns = 0;
|
||||
rq->nr_phys_segments = 0;
|
||||
#if defined(CONFIG_BLK_DEV_INTEGRITY)
|
||||
rq->nr_integrity_segments = 0;
|
||||
|
@ -328,11 +329,10 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
|
|||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
rq->rl = NULL;
|
||||
set_start_time_ns(rq);
|
||||
rq->io_start_time_ns = 0;
|
||||
#endif
|
||||
|
||||
data->ctx->rq_dispatched[op_is_sync(op)]++;
|
||||
refcount_set(&rq->ref, 1);
|
||||
return rq;
|
||||
}
|
||||
|
||||
|
@ -361,9 +361,11 @@ static struct request *blk_mq_get_request(struct request_queue *q,
|
|||
|
||||
/*
|
||||
* Flush requests are special and go directly to the
|
||||
* dispatch list.
|
||||
* dispatch list. Don't include reserved tags in the
|
||||
* limiting, as it isn't useful.
|
||||
*/
|
||||
if (!op_is_flush(op) && e->type->ops.mq.limit_depth)
|
||||
if (!op_is_flush(op) && e->type->ops.mq.limit_depth &&
|
||||
!(data->flags & BLK_MQ_REQ_RESERVED))
|
||||
e->type->ops.mq.limit_depth(op, data);
|
||||
}
|
||||
|
||||
|
@ -464,13 +466,27 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
|
||||
|
||||
static void __blk_mq_free_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
const int sched_tag = rq->internal_tag;
|
||||
|
||||
if (rq->tag != -1)
|
||||
blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
|
||||
if (sched_tag != -1)
|
||||
blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
|
||||
blk_mq_sched_restart(hctx);
|
||||
blk_queue_exit(q);
|
||||
}
|
||||
|
||||
void blk_mq_free_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct elevator_queue *e = q->elevator;
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
const int sched_tag = rq->internal_tag;
|
||||
|
||||
if (rq->rq_flags & RQF_ELVPRIV) {
|
||||
if (e && e->type->ops.mq.finish_request)
|
||||
|
@ -488,27 +504,30 @@ void blk_mq_free_request(struct request *rq)
|
|||
if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
|
||||
laptop_io_completion(q->backing_dev_info);
|
||||
|
||||
wbt_done(q->rq_wb, &rq->issue_stat);
|
||||
wbt_done(q->rq_wb, rq);
|
||||
|
||||
if (blk_rq_rl(rq))
|
||||
blk_put_rl(blk_rq_rl(rq));
|
||||
|
||||
blk_mq_rq_update_state(rq, MQ_RQ_IDLE);
|
||||
if (rq->tag != -1)
|
||||
blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
|
||||
if (sched_tag != -1)
|
||||
blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
|
||||
blk_mq_sched_restart(hctx);
|
||||
blk_queue_exit(q);
|
||||
WRITE_ONCE(rq->state, MQ_RQ_IDLE);
|
||||
if (refcount_dec_and_test(&rq->ref))
|
||||
__blk_mq_free_request(rq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_free_request);
|
||||
|
||||
inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
|
||||
{
|
||||
blk_account_io_done(rq);
|
||||
u64 now = ktime_get_ns();
|
||||
|
||||
if (rq->rq_flags & RQF_STATS) {
|
||||
blk_mq_poll_stats_start(rq->q);
|
||||
blk_stat_add(rq, now);
|
||||
}
|
||||
|
||||
blk_account_io_done(rq, now);
|
||||
|
||||
if (rq->end_io) {
|
||||
wbt_done(rq->q->rq_wb, &rq->issue_stat);
|
||||
wbt_done(rq->q->rq_wb, rq);
|
||||
rq->end_io(rq, error);
|
||||
} else {
|
||||
if (unlikely(blk_bidi_rq(rq)))
|
||||
|
@ -539,15 +558,12 @@ static void __blk_mq_complete_request(struct request *rq)
|
|||
bool shared = false;
|
||||
int cpu;
|
||||
|
||||
WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT);
|
||||
blk_mq_rq_update_state(rq, MQ_RQ_COMPLETE);
|
||||
if (cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) !=
|
||||
MQ_RQ_IN_FLIGHT)
|
||||
return;
|
||||
|
||||
if (rq->internal_tag != -1)
|
||||
blk_mq_sched_completed_request(rq);
|
||||
if (rq->rq_flags & RQF_STATS) {
|
||||
blk_mq_poll_stats_start(rq->q);
|
||||
blk_stat_add(rq);
|
||||
}
|
||||
|
||||
if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
|
||||
rq->q->softirq_done_fn(rq);
|
||||
|
@ -589,36 +605,6 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
|
|||
*srcu_idx = srcu_read_lock(hctx->srcu);
|
||||
}
|
||||
|
||||
static void blk_mq_rq_update_aborted_gstate(struct request *rq, u64 gstate)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* blk_mq_rq_aborted_gstate() is used from the completion path and
|
||||
* can thus be called from irq context. u64_stats_fetch in the
|
||||
* middle of update on the same CPU leads to lockup. Disable irq
|
||||
* while updating.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
u64_stats_update_begin(&rq->aborted_gstate_sync);
|
||||
rq->aborted_gstate = gstate;
|
||||
u64_stats_update_end(&rq->aborted_gstate_sync);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static u64 blk_mq_rq_aborted_gstate(struct request *rq)
|
||||
{
|
||||
unsigned int start;
|
||||
u64 aborted_gstate;
|
||||
|
||||
do {
|
||||
start = u64_stats_fetch_begin(&rq->aborted_gstate_sync);
|
||||
aborted_gstate = rq->aborted_gstate;
|
||||
} while (u64_stats_fetch_retry(&rq->aborted_gstate_sync, start));
|
||||
|
||||
return aborted_gstate;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_complete_request - end I/O on a request
|
||||
* @rq: the request being processed
|
||||
|
@ -629,28 +615,9 @@ static u64 blk_mq_rq_aborted_gstate(struct request *rq)
|
|||
**/
|
||||
void blk_mq_complete_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
|
||||
int srcu_idx;
|
||||
|
||||
if (unlikely(blk_should_fake_timeout(q)))
|
||||
if (unlikely(blk_should_fake_timeout(rq->q)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If @rq->aborted_gstate equals the current instance, timeout is
|
||||
* claiming @rq and we lost. This is synchronized through
|
||||
* hctx_lock(). See blk_mq_timeout_work() for details.
|
||||
*
|
||||
* Completion path never blocks and we can directly use RCU here
|
||||
* instead of hctx_lock() which can be either RCU or SRCU.
|
||||
* However, that would complicate paths which want to synchronize
|
||||
* against us. Let stay in sync with the issue path so that
|
||||
* hctx_lock() covers both issue and completion paths.
|
||||
*/
|
||||
hctx_lock(hctx, &srcu_idx);
|
||||
if (blk_mq_rq_aborted_gstate(rq) != rq->gstate)
|
||||
__blk_mq_complete_request(rq);
|
||||
hctx_unlock(hctx, srcu_idx);
|
||||
__blk_mq_complete_request(rq);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_complete_request);
|
||||
|
||||
|
@ -669,32 +636,18 @@ void blk_mq_start_request(struct request *rq)
|
|||
trace_block_rq_issue(q, rq);
|
||||
|
||||
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
|
||||
blk_stat_set_issue(&rq->issue_stat, blk_rq_sectors(rq));
|
||||
rq->io_start_time_ns = ktime_get_ns();
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
rq->throtl_size = blk_rq_sectors(rq);
|
||||
#endif
|
||||
rq->rq_flags |= RQF_STATS;
|
||||
wbt_issue(q->rq_wb, &rq->issue_stat);
|
||||
wbt_issue(q->rq_wb, rq);
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
|
||||
|
||||
/*
|
||||
* Mark @rq in-flight which also advances the generation number,
|
||||
* and register for timeout. Protect with a seqcount to allow the
|
||||
* timeout path to read both @rq->gstate and @rq->deadline
|
||||
* coherently.
|
||||
*
|
||||
* This is the only place where a request is marked in-flight. If
|
||||
* the timeout path reads an in-flight @rq->gstate, the
|
||||
* @rq->deadline it reads together under @rq->gstate_seq is
|
||||
* guaranteed to be the matching one.
|
||||
*/
|
||||
preempt_disable();
|
||||
write_seqcount_begin(&rq->gstate_seq);
|
||||
|
||||
blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT);
|
||||
blk_add_timer(rq);
|
||||
|
||||
write_seqcount_end(&rq->gstate_seq);
|
||||
preempt_enable();
|
||||
WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
|
||||
|
||||
if (q->dma_drain_size && blk_rq_bytes(rq)) {
|
||||
/*
|
||||
|
@ -707,11 +660,6 @@ void blk_mq_start_request(struct request *rq)
|
|||
}
|
||||
EXPORT_SYMBOL(blk_mq_start_request);
|
||||
|
||||
/*
|
||||
* When we reach here because queue is busy, it's safe to change the state
|
||||
* to IDLE without checking @rq->aborted_gstate because we should still be
|
||||
* holding the RCU read lock and thus protected against timeout.
|
||||
*/
|
||||
static void __blk_mq_requeue_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
|
@ -719,10 +667,10 @@ static void __blk_mq_requeue_request(struct request *rq)
|
|||
blk_mq_put_driver_tag(rq);
|
||||
|
||||
trace_block_rq_requeue(q, rq);
|
||||
wbt_requeue(q->rq_wb, &rq->issue_stat);
|
||||
wbt_requeue(q->rq_wb, rq);
|
||||
|
||||
if (blk_mq_rq_state(rq) != MQ_RQ_IDLE) {
|
||||
blk_mq_rq_update_state(rq, MQ_RQ_IDLE);
|
||||
if (blk_mq_request_started(rq)) {
|
||||
WRITE_ONCE(rq->state, MQ_RQ_IDLE);
|
||||
if (q->dma_drain_size && blk_rq_bytes(rq))
|
||||
rq->nr_phys_segments--;
|
||||
}
|
||||
|
@ -820,101 +768,79 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
|
|||
}
|
||||
EXPORT_SYMBOL(blk_mq_tag_to_rq);
|
||||
|
||||
struct blk_mq_timeout_data {
|
||||
unsigned long next;
|
||||
unsigned int next_set;
|
||||
unsigned int nr_expired;
|
||||
};
|
||||
|
||||
static void blk_mq_rq_timed_out(struct request *req, bool reserved)
|
||||
{
|
||||
const struct blk_mq_ops *ops = req->q->mq_ops;
|
||||
enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
|
||||
if (req->q->mq_ops->timeout) {
|
||||
enum blk_eh_timer_return ret;
|
||||
|
||||
req->rq_flags |= RQF_MQ_TIMEOUT_EXPIRED;
|
||||
|
||||
if (ops->timeout)
|
||||
ret = ops->timeout(req, reserved);
|
||||
|
||||
switch (ret) {
|
||||
case BLK_EH_HANDLED:
|
||||
__blk_mq_complete_request(req);
|
||||
break;
|
||||
case BLK_EH_RESET_TIMER:
|
||||
/*
|
||||
* As nothing prevents from completion happening while
|
||||
* ->aborted_gstate is set, this may lead to ignored
|
||||
* completions and further spurious timeouts.
|
||||
*/
|
||||
blk_mq_rq_update_aborted_gstate(req, 0);
|
||||
blk_add_timer(req);
|
||||
break;
|
||||
case BLK_EH_NOT_HANDLED:
|
||||
break;
|
||||
default:
|
||||
printk(KERN_ERR "block: bad eh return: %d\n", ret);
|
||||
break;
|
||||
ret = req->q->mq_ops->timeout(req, reserved);
|
||||
if (ret == BLK_EH_DONE)
|
||||
return;
|
||||
WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
|
||||
}
|
||||
|
||||
blk_add_timer(req);
|
||||
}
|
||||
|
||||
static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
|
||||
{
|
||||
unsigned long deadline;
|
||||
|
||||
if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
|
||||
return false;
|
||||
|
||||
deadline = blk_rq_deadline(rq);
|
||||
if (time_after_eq(jiffies, deadline))
|
||||
return true;
|
||||
|
||||
if (*next == 0)
|
||||
*next = deadline;
|
||||
else if (time_after(*next, deadline))
|
||||
*next = deadline;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq, void *priv, bool reserved)
|
||||
{
|
||||
struct blk_mq_timeout_data *data = priv;
|
||||
unsigned long gstate, deadline;
|
||||
int start;
|
||||
unsigned long *next = priv;
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED)
|
||||
/*
|
||||
* Just do a quick check if it is expired before locking the request in
|
||||
* so we're not unnecessarilly synchronizing across CPUs.
|
||||
*/
|
||||
if (!blk_mq_req_expired(rq, next))
|
||||
return;
|
||||
|
||||
/* read coherent snapshots of @rq->state_gen and @rq->deadline */
|
||||
while (true) {
|
||||
start = read_seqcount_begin(&rq->gstate_seq);
|
||||
gstate = READ_ONCE(rq->gstate);
|
||||
deadline = blk_rq_deadline(rq);
|
||||
if (!read_seqcount_retry(&rq->gstate_seq, start))
|
||||
break;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
/* if in-flight && overdue, mark for abortion */
|
||||
if ((gstate & MQ_RQ_STATE_MASK) == MQ_RQ_IN_FLIGHT &&
|
||||
time_after_eq(jiffies, deadline)) {
|
||||
blk_mq_rq_update_aborted_gstate(rq, gstate);
|
||||
data->nr_expired++;
|
||||
hctx->nr_expired++;
|
||||
} else if (!data->next_set || time_after(data->next, deadline)) {
|
||||
data->next = deadline;
|
||||
data->next_set = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void blk_mq_terminate_expired(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq, void *priv, bool reserved)
|
||||
{
|
||||
/*
|
||||
* We marked @rq->aborted_gstate and waited for RCU. If there were
|
||||
* completions that we lost to, they would have finished and
|
||||
* updated @rq->gstate by now; otherwise, the completion path is
|
||||
* now guaranteed to see @rq->aborted_gstate and yield. If
|
||||
* @rq->aborted_gstate still matches @rq->gstate, @rq is ours.
|
||||
* We have reason to believe the request may be expired. Take a
|
||||
* reference on the request to lock this request lifetime into its
|
||||
* currently allocated context to prevent it from being reallocated in
|
||||
* the event the completion by-passes this timeout handler.
|
||||
*
|
||||
* If the reference was already released, then the driver beat the
|
||||
* timeout handler to posting a natural completion.
|
||||
*/
|
||||
if (!(rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) &&
|
||||
READ_ONCE(rq->gstate) == rq->aborted_gstate)
|
||||
if (!refcount_inc_not_zero(&rq->ref))
|
||||
return;
|
||||
|
||||
/*
|
||||
* The request is now locked and cannot be reallocated underneath the
|
||||
* timeout handler's processing. Re-verify this exact request is truly
|
||||
* expired; if it is not expired, then the request was completed and
|
||||
* reallocated as a new request.
|
||||
*/
|
||||
if (blk_mq_req_expired(rq, next))
|
||||
blk_mq_rq_timed_out(rq, reserved);
|
||||
if (refcount_dec_and_test(&rq->ref))
|
||||
__blk_mq_free_request(rq);
|
||||
}
|
||||
|
||||
static void blk_mq_timeout_work(struct work_struct *work)
|
||||
{
|
||||
struct request_queue *q =
|
||||
container_of(work, struct request_queue, timeout_work);
|
||||
struct blk_mq_timeout_data data = {
|
||||
.next = 0,
|
||||
.next_set = 0,
|
||||
.nr_expired = 0,
|
||||
};
|
||||
unsigned long next = 0;
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int i;
|
||||
|
||||
|
@ -934,39 +860,10 @@ static void blk_mq_timeout_work(struct work_struct *work)
|
|||
if (!percpu_ref_tryget(&q->q_usage_counter))
|
||||
return;
|
||||
|
||||
/* scan for the expired ones and set their ->aborted_gstate */
|
||||
blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
|
||||
blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next);
|
||||
|
||||
if (data.nr_expired) {
|
||||
bool has_rcu = false;
|
||||
|
||||
/*
|
||||
* Wait till everyone sees ->aborted_gstate. The
|
||||
* sequential waits for SRCUs aren't ideal. If this ever
|
||||
* becomes a problem, we can add per-hw_ctx rcu_head and
|
||||
* wait in parallel.
|
||||
*/
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
if (!hctx->nr_expired)
|
||||
continue;
|
||||
|
||||
if (!(hctx->flags & BLK_MQ_F_BLOCKING))
|
||||
has_rcu = true;
|
||||
else
|
||||
synchronize_srcu(hctx->srcu);
|
||||
|
||||
hctx->nr_expired = 0;
|
||||
}
|
||||
if (has_rcu)
|
||||
synchronize_rcu();
|
||||
|
||||
/* terminate the ones we won */
|
||||
blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL);
|
||||
}
|
||||
|
||||
if (data.next_set) {
|
||||
data.next = blk_rq_timeout(round_jiffies_up(data.next));
|
||||
mod_timer(&q->timeout, data.next);
|
||||
if (next != 0) {
|
||||
mod_timer(&q->timeout, next);
|
||||
} else {
|
||||
/*
|
||||
* Request timeouts are handled as a forward rolling timer. If
|
||||
|
@ -1029,7 +926,7 @@ static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
|
|||
struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
|
||||
|
||||
spin_lock(&ctx->lock);
|
||||
if (unlikely(!list_empty(&ctx->rq_list))) {
|
||||
if (!list_empty(&ctx->rq_list)) {
|
||||
dispatch_data->rq = list_entry_rq(ctx->rq_list.next);
|
||||
list_del_init(&dispatch_data->rq->queuelist);
|
||||
if (list_empty(&ctx->rq_list))
|
||||
|
@ -1716,15 +1613,6 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
|
|||
blk_account_io_start(rq, true);
|
||||
}
|
||||
|
||||
static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_ctx *ctx,
|
||||
struct request *rq)
|
||||
{
|
||||
spin_lock(&ctx->lock);
|
||||
__blk_mq_insert_request(hctx, rq, false);
|
||||
spin_unlock(&ctx->lock);
|
||||
}
|
||||
|
||||
static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
|
||||
{
|
||||
if (rq->tag != -1)
|
||||
|
@ -1882,7 +1770,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
|||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
wbt_track(&rq->issue_stat, wb_acct);
|
||||
wbt_track(rq, wb_acct);
|
||||
|
||||
cookie = request_to_qc_t(data.hctx, rq);
|
||||
|
||||
|
@ -1949,15 +1837,10 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
|||
blk_mq_put_ctx(data.ctx);
|
||||
blk_mq_bio_to_request(rq, bio);
|
||||
blk_mq_try_issue_directly(data.hctx, rq, &cookie);
|
||||
} else if (q->elevator) {
|
||||
blk_mq_put_ctx(data.ctx);
|
||||
blk_mq_bio_to_request(rq, bio);
|
||||
blk_mq_sched_insert_request(rq, false, true, true);
|
||||
} else {
|
||||
blk_mq_put_ctx(data.ctx);
|
||||
blk_mq_bio_to_request(rq, bio);
|
||||
blk_mq_queue_io(data.hctx, data.ctx, rq);
|
||||
blk_mq_run_hw_queue(data.hctx, true);
|
||||
blk_mq_sched_insert_request(rq, false, true, true);
|
||||
}
|
||||
|
||||
return cookie;
|
||||
|
@ -2056,15 +1939,7 @@ static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
|
|||
return ret;
|
||||
}
|
||||
|
||||
seqcount_init(&rq->gstate_seq);
|
||||
u64_stats_init(&rq->aborted_gstate_sync);
|
||||
/*
|
||||
* start gstate with gen 1 instead of 0, otherwise it will be equal
|
||||
* to aborted_gstate, and be identified timed out by
|
||||
* blk_mq_terminate_expired.
|
||||
*/
|
||||
WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
|
||||
|
||||
WRITE_ONCE(rq->state, MQ_RQ_IDLE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2365,6 +2240,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
|
|||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
cpumask_clear(hctx->cpumask);
|
||||
hctx->nr_ctx = 0;
|
||||
hctx->dispatch_from = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2697,7 +2573,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
|||
if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
|
||||
int ret;
|
||||
|
||||
ret = blk_mq_sched_init(q);
|
||||
ret = elevator_init_mq(q);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
|
|
@ -30,20 +30,6 @@ struct blk_mq_ctx {
|
|||
struct kobject kobj;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
/*
|
||||
* Bits for request->gstate. The lower two bits carry MQ_RQ_* state value
|
||||
* and the upper bits the generation number.
|
||||
*/
|
||||
enum mq_rq_state {
|
||||
MQ_RQ_IDLE = 0,
|
||||
MQ_RQ_IN_FLIGHT = 1,
|
||||
MQ_RQ_COMPLETE = 2,
|
||||
|
||||
MQ_RQ_STATE_BITS = 2,
|
||||
MQ_RQ_STATE_MASK = (1 << MQ_RQ_STATE_BITS) - 1,
|
||||
MQ_RQ_GEN_INC = 1 << MQ_RQ_STATE_BITS,
|
||||
};
|
||||
|
||||
void blk_mq_freeze_queue(struct request_queue *q);
|
||||
void blk_mq_free_queue(struct request_queue *q);
|
||||
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
|
||||
|
@ -107,33 +93,9 @@ void blk_mq_release(struct request_queue *q);
|
|||
* blk_mq_rq_state() - read the current MQ_RQ_* state of a request
|
||||
* @rq: target request.
|
||||
*/
|
||||
static inline int blk_mq_rq_state(struct request *rq)
|
||||
static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
|
||||
{
|
||||
return READ_ONCE(rq->gstate) & MQ_RQ_STATE_MASK;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_rq_update_state() - set the current MQ_RQ_* state of a request
|
||||
* @rq: target request.
|
||||
* @state: new state to set.
|
||||
*
|
||||
* Set @rq's state to @state. The caller is responsible for ensuring that
|
||||
* there are no other updaters. A request can transition into IN_FLIGHT
|
||||
* only from IDLE and doing so increments the generation number.
|
||||
*/
|
||||
static inline void blk_mq_rq_update_state(struct request *rq,
|
||||
enum mq_rq_state state)
|
||||
{
|
||||
u64 old_val = READ_ONCE(rq->gstate);
|
||||
u64 new_val = (old_val & ~MQ_RQ_STATE_MASK) | state;
|
||||
|
||||
if (state == MQ_RQ_IN_FLIGHT) {
|
||||
WARN_ON_ONCE((old_val & MQ_RQ_STATE_MASK) != MQ_RQ_IDLE);
|
||||
new_val += MQ_RQ_GEN_INC;
|
||||
}
|
||||
|
||||
/* avoid exposing interim values */
|
||||
WRITE_ONCE(rq->gstate, new_val);
|
||||
return READ_ONCE(rq->state);
|
||||
}
|
||||
|
||||
static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
|
||||
|
|
|
@ -47,19 +47,15 @@ static void __blk_stat_add(struct blk_rq_stat *stat, u64 value)
|
|||
stat->nr_samples++;
|
||||
}
|
||||
|
||||
void blk_stat_add(struct request *rq)
|
||||
void blk_stat_add(struct request *rq, u64 now)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct blk_stat_callback *cb;
|
||||
struct blk_rq_stat *stat;
|
||||
int bucket;
|
||||
u64 now, value;
|
||||
u64 value;
|
||||
|
||||
now = __blk_stat_time(ktime_to_ns(ktime_get()));
|
||||
if (now < blk_stat_time(&rq->issue_stat))
|
||||
return;
|
||||
|
||||
value = now - blk_stat_time(&rq->issue_stat);
|
||||
value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
|
||||
|
||||
blk_throtl_stat_add(rq, value);
|
||||
|
||||
|
|
|
@ -8,21 +8,6 @@
|
|||
#include <linux/rcupdate.h>
|
||||
#include <linux/timer.h>
|
||||
|
||||
/*
|
||||
* from upper:
|
||||
* 3 bits: reserved for other usage
|
||||
* 12 bits: size
|
||||
* 49 bits: time
|
||||
*/
|
||||
#define BLK_STAT_RES_BITS 3
|
||||
#define BLK_STAT_SIZE_BITS 12
|
||||
#define BLK_STAT_RES_SHIFT (64 - BLK_STAT_RES_BITS)
|
||||
#define BLK_STAT_SIZE_SHIFT (BLK_STAT_RES_SHIFT - BLK_STAT_SIZE_BITS)
|
||||
#define BLK_STAT_TIME_MASK ((1ULL << BLK_STAT_SIZE_SHIFT) - 1)
|
||||
#define BLK_STAT_SIZE_MASK \
|
||||
(((1ULL << BLK_STAT_SIZE_BITS) - 1) << BLK_STAT_SIZE_SHIFT)
|
||||
#define BLK_STAT_RES_MASK (~((1ULL << BLK_STAT_RES_SHIFT) - 1))
|
||||
|
||||
/**
|
||||
* struct blk_stat_callback - Block statistics callback.
|
||||
*
|
||||
|
@ -80,35 +65,7 @@ struct blk_stat_callback {
|
|||
struct blk_queue_stats *blk_alloc_queue_stats(void);
|
||||
void blk_free_queue_stats(struct blk_queue_stats *);
|
||||
|
||||
void blk_stat_add(struct request *);
|
||||
|
||||
static inline u64 __blk_stat_time(u64 time)
|
||||
{
|
||||
return time & BLK_STAT_TIME_MASK;
|
||||
}
|
||||
|
||||
static inline u64 blk_stat_time(struct blk_issue_stat *stat)
|
||||
{
|
||||
return __blk_stat_time(stat->stat);
|
||||
}
|
||||
|
||||
static inline sector_t blk_capped_size(sector_t size)
|
||||
{
|
||||
return size & ((1ULL << BLK_STAT_SIZE_BITS) - 1);
|
||||
}
|
||||
|
||||
static inline sector_t blk_stat_size(struct blk_issue_stat *stat)
|
||||
{
|
||||
return (stat->stat & BLK_STAT_SIZE_MASK) >> BLK_STAT_SIZE_SHIFT;
|
||||
}
|
||||
|
||||
static inline void blk_stat_set_issue(struct blk_issue_stat *stat,
|
||||
sector_t size)
|
||||
{
|
||||
stat->stat = (stat->stat & BLK_STAT_RES_MASK) |
|
||||
(ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK) |
|
||||
(((u64)blk_capped_size(size)) << BLK_STAT_SIZE_SHIFT);
|
||||
}
|
||||
void blk_stat_add(struct request *rq, u64 now);
|
||||
|
||||
/* record time/size info in request but not add a callback */
|
||||
void blk_stat_enable_accounting(struct request_queue *q);
|
||||
|
|
|
@ -491,188 +491,198 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page,
|
|||
return count;
|
||||
}
|
||||
|
||||
static ssize_t queue_fua_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return sprintf(page, "%u\n", test_bit(QUEUE_FLAG_FUA, &q->queue_flags));
|
||||
}
|
||||
|
||||
static ssize_t queue_dax_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(blk_queue_dax(q), page);
|
||||
}
|
||||
|
||||
static struct queue_sysfs_entry queue_requests_entry = {
|
||||
.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = {.name = "nr_requests", .mode = 0644 },
|
||||
.show = queue_requests_show,
|
||||
.store = queue_requests_store,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_ra_entry = {
|
||||
.attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = {.name = "read_ahead_kb", .mode = 0644 },
|
||||
.show = queue_ra_show,
|
||||
.store = queue_ra_store,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_max_sectors_entry = {
|
||||
.attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = {.name = "max_sectors_kb", .mode = 0644 },
|
||||
.show = queue_max_sectors_show,
|
||||
.store = queue_max_sectors_store,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
|
||||
.attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
|
||||
.attr = {.name = "max_hw_sectors_kb", .mode = 0444 },
|
||||
.show = queue_max_hw_sectors_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_max_segments_entry = {
|
||||
.attr = {.name = "max_segments", .mode = S_IRUGO },
|
||||
.attr = {.name = "max_segments", .mode = 0444 },
|
||||
.show = queue_max_segments_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_max_discard_segments_entry = {
|
||||
.attr = {.name = "max_discard_segments", .mode = S_IRUGO },
|
||||
.attr = {.name = "max_discard_segments", .mode = 0444 },
|
||||
.show = queue_max_discard_segments_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_max_integrity_segments_entry = {
|
||||
.attr = {.name = "max_integrity_segments", .mode = S_IRUGO },
|
||||
.attr = {.name = "max_integrity_segments", .mode = 0444 },
|
||||
.show = queue_max_integrity_segments_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_max_segment_size_entry = {
|
||||
.attr = {.name = "max_segment_size", .mode = S_IRUGO },
|
||||
.attr = {.name = "max_segment_size", .mode = 0444 },
|
||||
.show = queue_max_segment_size_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_iosched_entry = {
|
||||
.attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = {.name = "scheduler", .mode = 0644 },
|
||||
.show = elv_iosched_show,
|
||||
.store = elv_iosched_store,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_hw_sector_size_entry = {
|
||||
.attr = {.name = "hw_sector_size", .mode = S_IRUGO },
|
||||
.attr = {.name = "hw_sector_size", .mode = 0444 },
|
||||
.show = queue_logical_block_size_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_logical_block_size_entry = {
|
||||
.attr = {.name = "logical_block_size", .mode = S_IRUGO },
|
||||
.attr = {.name = "logical_block_size", .mode = 0444 },
|
||||
.show = queue_logical_block_size_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_physical_block_size_entry = {
|
||||
.attr = {.name = "physical_block_size", .mode = S_IRUGO },
|
||||
.attr = {.name = "physical_block_size", .mode = 0444 },
|
||||
.show = queue_physical_block_size_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_chunk_sectors_entry = {
|
||||
.attr = {.name = "chunk_sectors", .mode = S_IRUGO },
|
||||
.attr = {.name = "chunk_sectors", .mode = 0444 },
|
||||
.show = queue_chunk_sectors_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_io_min_entry = {
|
||||
.attr = {.name = "minimum_io_size", .mode = S_IRUGO },
|
||||
.attr = {.name = "minimum_io_size", .mode = 0444 },
|
||||
.show = queue_io_min_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_io_opt_entry = {
|
||||
.attr = {.name = "optimal_io_size", .mode = S_IRUGO },
|
||||
.attr = {.name = "optimal_io_size", .mode = 0444 },
|
||||
.show = queue_io_opt_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_discard_granularity_entry = {
|
||||
.attr = {.name = "discard_granularity", .mode = S_IRUGO },
|
||||
.attr = {.name = "discard_granularity", .mode = 0444 },
|
||||
.show = queue_discard_granularity_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_discard_max_hw_entry = {
|
||||
.attr = {.name = "discard_max_hw_bytes", .mode = S_IRUGO },
|
||||
.attr = {.name = "discard_max_hw_bytes", .mode = 0444 },
|
||||
.show = queue_discard_max_hw_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_discard_max_entry = {
|
||||
.attr = {.name = "discard_max_bytes", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = {.name = "discard_max_bytes", .mode = 0644 },
|
||||
.show = queue_discard_max_show,
|
||||
.store = queue_discard_max_store,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
|
||||
.attr = {.name = "discard_zeroes_data", .mode = S_IRUGO },
|
||||
.attr = {.name = "discard_zeroes_data", .mode = 0444 },
|
||||
.show = queue_discard_zeroes_data_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_write_same_max_entry = {
|
||||
.attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
|
||||
.attr = {.name = "write_same_max_bytes", .mode = 0444 },
|
||||
.show = queue_write_same_max_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_write_zeroes_max_entry = {
|
||||
.attr = {.name = "write_zeroes_max_bytes", .mode = S_IRUGO },
|
||||
.attr = {.name = "write_zeroes_max_bytes", .mode = 0444 },
|
||||
.show = queue_write_zeroes_max_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_nonrot_entry = {
|
||||
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = {.name = "rotational", .mode = 0644 },
|
||||
.show = queue_show_nonrot,
|
||||
.store = queue_store_nonrot,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_zoned_entry = {
|
||||
.attr = {.name = "zoned", .mode = S_IRUGO },
|
||||
.attr = {.name = "zoned", .mode = 0444 },
|
||||
.show = queue_zoned_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_nomerges_entry = {
|
||||
.attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = {.name = "nomerges", .mode = 0644 },
|
||||
.show = queue_nomerges_show,
|
||||
.store = queue_nomerges_store,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_rq_affinity_entry = {
|
||||
.attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = {.name = "rq_affinity", .mode = 0644 },
|
||||
.show = queue_rq_affinity_show,
|
||||
.store = queue_rq_affinity_store,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_iostats_entry = {
|
||||
.attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = {.name = "iostats", .mode = 0644 },
|
||||
.show = queue_show_iostats,
|
||||
.store = queue_store_iostats,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_random_entry = {
|
||||
.attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = {.name = "add_random", .mode = 0644 },
|
||||
.show = queue_show_random,
|
||||
.store = queue_store_random,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_poll_entry = {
|
||||
.attr = {.name = "io_poll", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = {.name = "io_poll", .mode = 0644 },
|
||||
.show = queue_poll_show,
|
||||
.store = queue_poll_store,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_poll_delay_entry = {
|
||||
.attr = {.name = "io_poll_delay", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = {.name = "io_poll_delay", .mode = 0644 },
|
||||
.show = queue_poll_delay_show,
|
||||
.store = queue_poll_delay_store,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_wc_entry = {
|
||||
.attr = {.name = "write_cache", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = {.name = "write_cache", .mode = 0644 },
|
||||
.show = queue_wc_show,
|
||||
.store = queue_wc_store,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_fua_entry = {
|
||||
.attr = {.name = "fua", .mode = 0444 },
|
||||
.show = queue_fua_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_dax_entry = {
|
||||
.attr = {.name = "dax", .mode = S_IRUGO },
|
||||
.attr = {.name = "dax", .mode = 0444 },
|
||||
.show = queue_dax_show,
|
||||
};
|
||||
|
||||
static struct queue_sysfs_entry queue_wb_lat_entry = {
|
||||
.attr = {.name = "wbt_lat_usec", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = {.name = "wbt_lat_usec", .mode = 0644 },
|
||||
.show = queue_wb_lat_show,
|
||||
.store = queue_wb_lat_store,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
static struct queue_sysfs_entry throtl_sample_time_entry = {
|
||||
.attr = {.name = "throttle_sample_time", .mode = S_IRUGO | S_IWUSR },
|
||||
.attr = {.name = "throttle_sample_time", .mode = 0644 },
|
||||
.show = blk_throtl_sample_time_show,
|
||||
.store = blk_throtl_sample_time_store,
|
||||
};
|
||||
|
@ -708,6 +718,7 @@ static struct attribute *default_attrs[] = {
|
|||
&queue_random_entry.attr,
|
||||
&queue_poll_entry.attr,
|
||||
&queue_wc_entry.attr,
|
||||
&queue_fua_entry.attr,
|
||||
&queue_dax_entry.attr,
|
||||
&queue_wb_lat_entry.attr,
|
||||
&queue_poll_delay_entry.attr,
|
||||
|
@ -813,8 +824,7 @@ static void __blk_release_queue(struct work_struct *work)
|
|||
if (q->mq_ops)
|
||||
blk_mq_debugfs_unregister(q);
|
||||
|
||||
if (q->bio_split)
|
||||
bioset_free(q->bio_split);
|
||||
bioset_exit(&q->bio_split);
|
||||
|
||||
ida_simple_remove(&blk_queue_ida, q->id);
|
||||
call_rcu(&q->rcu_head, blk_free_queue_rcu);
|
||||
|
|
|
@ -36,8 +36,6 @@ static int throtl_quantum = 32;
|
|||
*/
|
||||
#define LATENCY_FILTERED_HD (1000L) /* 1ms */
|
||||
|
||||
#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
|
||||
|
||||
static struct blkcg_policy blkcg_policy_throtl;
|
||||
|
||||
/* A workqueue to queue throttle related work */
|
||||
|
@ -821,7 +819,7 @@ static bool throtl_slice_used(struct throtl_grp *tg, bool rw)
|
|||
if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw]))
|
||||
return false;
|
||||
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Trim the used slices and adjust slice start accordingly */
|
||||
|
@ -931,7 +929,7 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
|
|||
|
||||
if (wait)
|
||||
*wait = jiffy_wait;
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
|
||||
|
@ -974,7 +972,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
|
|||
jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed);
|
||||
if (wait)
|
||||
*wait = jiffy_wait;
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1024,7 +1022,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
|
|||
tg_with_in_iops_limit(tg, bio, &iops_wait)) {
|
||||
if (wait)
|
||||
*wait = 0;
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
max_wait = max(bps_wait, iops_wait);
|
||||
|
@ -1035,7 +1033,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
|
|||
if (time_before(tg->slice_end[rw], jiffies + max_wait))
|
||||
throtl_extend_slice(tg, rw, jiffies + max_wait);
|
||||
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
|
||||
|
@ -1209,7 +1207,7 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
|
|||
|
||||
while (1) {
|
||||
struct throtl_grp *tg = throtl_rb_first(parent_sq);
|
||||
struct throtl_service_queue *sq = &tg->service_queue;
|
||||
struct throtl_service_queue *sq;
|
||||
|
||||
if (!tg)
|
||||
break;
|
||||
|
@ -1221,6 +1219,7 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
|
|||
|
||||
nr_disp += throtl_dispatch_tg(tg);
|
||||
|
||||
sq = &tg->service_queue;
|
||||
if (sq->nr_queued[0] || sq->nr_queued[1])
|
||||
tg_update_disptime(tg);
|
||||
|
||||
|
@ -2139,7 +2138,7 @@ static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
|
|||
bio->bi_cg_private = tg;
|
||||
blkg_get(tg_to_blkg(tg));
|
||||
}
|
||||
blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
|
||||
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -2251,7 +2250,7 @@ out:
|
|||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
if (throttled || !td->track_bio_latency)
|
||||
bio->bi_issue_stat.stat |= SKIP_LATENCY;
|
||||
bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
|
||||
#endif
|
||||
return throttled;
|
||||
}
|
||||
|
@ -2281,8 +2280,7 @@ void blk_throtl_stat_add(struct request *rq, u64 time_ns)
|
|||
struct request_queue *q = rq->q;
|
||||
struct throtl_data *td = q->td;
|
||||
|
||||
throtl_track_latency(td, blk_stat_size(&rq->issue_stat),
|
||||
req_op(rq), time_ns >> 10);
|
||||
throtl_track_latency(td, rq->throtl_size, req_op(rq), time_ns >> 10);
|
||||
}
|
||||
|
||||
void blk_throtl_bio_endio(struct bio *bio)
|
||||
|
@ -2302,8 +2300,8 @@ void blk_throtl_bio_endio(struct bio *bio)
|
|||
finish_time_ns = ktime_get_ns();
|
||||
tg->last_finish_time = finish_time_ns >> 10;
|
||||
|
||||
start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
|
||||
finish_time = __blk_stat_time(finish_time_ns) >> 10;
|
||||
start_time = bio_issue_time(&bio->bi_issue) >> 10;
|
||||
finish_time = __bio_issue_time(finish_time_ns) >> 10;
|
||||
if (!start_time || finish_time <= start_time) {
|
||||
blkg_put(tg_to_blkg(tg));
|
||||
return;
|
||||
|
@ -2311,16 +2309,15 @@ void blk_throtl_bio_endio(struct bio *bio)
|
|||
|
||||
lat = finish_time - start_time;
|
||||
/* this is only for bio based driver */
|
||||
if (!(bio->bi_issue_stat.stat & SKIP_LATENCY))
|
||||
throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat),
|
||||
bio_op(bio), lat);
|
||||
if (!(bio->bi_issue.value & BIO_ISSUE_THROTL_SKIP_LATENCY))
|
||||
throtl_track_latency(tg->td, bio_issue_size(&bio->bi_issue),
|
||||
bio_op(bio), lat);
|
||||
|
||||
if (tg->latency_target && lat >= tg->td->filtered_latency) {
|
||||
int bucket;
|
||||
unsigned int threshold;
|
||||
|
||||
bucket = request_bucket_index(
|
||||
blk_stat_size(&bio->bi_issue_stat));
|
||||
bucket = request_bucket_index(bio_issue_size(&bio->bi_issue));
|
||||
threshold = tg->td->avg_buckets[rw][bucket].latency +
|
||||
tg->latency_target;
|
||||
if (lat > threshold)
|
||||
|
|
|
@ -86,14 +86,11 @@ static void blk_rq_timed_out(struct request *req)
|
|||
if (q->rq_timed_out_fn)
|
||||
ret = q->rq_timed_out_fn(req);
|
||||
switch (ret) {
|
||||
case BLK_EH_HANDLED:
|
||||
__blk_complete_request(req);
|
||||
break;
|
||||
case BLK_EH_RESET_TIMER:
|
||||
blk_add_timer(req);
|
||||
blk_clear_rq_complete(req);
|
||||
break;
|
||||
case BLK_EH_NOT_HANDLED:
|
||||
case BLK_EH_DONE:
|
||||
/*
|
||||
* LLD handles this for now but in the future
|
||||
* we can send a request msg to abort the command
|
||||
|
@ -214,7 +211,6 @@ void blk_add_timer(struct request *req)
|
|||
req->timeout = q->rq_timeout;
|
||||
|
||||
blk_rq_set_deadline(req, jiffies + req->timeout);
|
||||
req->rq_flags &= ~RQF_MQ_TIMEOUT_EXPIRED;
|
||||
|
||||
/*
|
||||
* Only the non-mq case needs to add the request to a protected list.
|
||||
|
|
129
block/blk-wbt.c
129
block/blk-wbt.c
|
@ -29,6 +29,26 @@
|
|||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/wbt.h>
|
||||
|
||||
static inline void wbt_clear_state(struct request *rq)
|
||||
{
|
||||
rq->wbt_flags = 0;
|
||||
}
|
||||
|
||||
static inline enum wbt_flags wbt_flags(struct request *rq)
|
||||
{
|
||||
return rq->wbt_flags;
|
||||
}
|
||||
|
||||
static inline bool wbt_is_tracked(struct request *rq)
|
||||
{
|
||||
return rq->wbt_flags & WBT_TRACKED;
|
||||
}
|
||||
|
||||
static inline bool wbt_is_read(struct request *rq)
|
||||
{
|
||||
return rq->wbt_flags & WBT_READ;
|
||||
}
|
||||
|
||||
enum {
|
||||
/*
|
||||
* Default setting, we'll scale up (to 75% of QD max) or down (min 1)
|
||||
|
@ -101,9 +121,15 @@ static bool wb_recent_wait(struct rq_wb *rwb)
|
|||
return time_before(jiffies, wb->dirty_sleep + HZ);
|
||||
}
|
||||
|
||||
static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, bool is_kswapd)
|
||||
static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
|
||||
enum wbt_flags wb_acct)
|
||||
{
|
||||
return &rwb->rq_wait[is_kswapd];
|
||||
if (wb_acct & WBT_KSWAPD)
|
||||
return &rwb->rq_wait[WBT_RWQ_KSWAPD];
|
||||
else if (wb_acct & WBT_DISCARD)
|
||||
return &rwb->rq_wait[WBT_RWQ_DISCARD];
|
||||
|
||||
return &rwb->rq_wait[WBT_RWQ_BG];
|
||||
}
|
||||
|
||||
static void rwb_wake_all(struct rq_wb *rwb)
|
||||
|
@ -126,7 +152,7 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
|
|||
if (!(wb_acct & WBT_TRACKED))
|
||||
return;
|
||||
|
||||
rqw = get_rq_wait(rwb, wb_acct & WBT_KSWAPD);
|
||||
rqw = get_rq_wait(rwb, wb_acct);
|
||||
inflight = atomic_dec_return(&rqw->inflight);
|
||||
|
||||
/*
|
||||
|
@ -139,10 +165,13 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
|
|||
}
|
||||
|
||||
/*
|
||||
* If the device does write back caching, drop further down
|
||||
* before we wake people up.
|
||||
* For discards, our limit is always the background. For writes, if
|
||||
* the device does write back caching, drop further down before we
|
||||
* wake people up.
|
||||
*/
|
||||
if (rwb->wc && !wb_recent_wait(rwb))
|
||||
if (wb_acct & WBT_DISCARD)
|
||||
limit = rwb->wb_background;
|
||||
else if (rwb->wc && !wb_recent_wait(rwb))
|
||||
limit = 0;
|
||||
else
|
||||
limit = rwb->wb_normal;
|
||||
|
@ -165,24 +194,24 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
|
|||
* Called on completion of a request. Note that it's also called when
|
||||
* a request is merged, when the request gets freed.
|
||||
*/
|
||||
void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat)
|
||||
void wbt_done(struct rq_wb *rwb, struct request *rq)
|
||||
{
|
||||
if (!rwb)
|
||||
return;
|
||||
|
||||
if (!wbt_is_tracked(stat)) {
|
||||
if (rwb->sync_cookie == stat) {
|
||||
if (!wbt_is_tracked(rq)) {
|
||||
if (rwb->sync_cookie == rq) {
|
||||
rwb->sync_issue = 0;
|
||||
rwb->sync_cookie = NULL;
|
||||
}
|
||||
|
||||
if (wbt_is_read(stat))
|
||||
if (wbt_is_read(rq))
|
||||
wb_timestamp(rwb, &rwb->last_comp);
|
||||
} else {
|
||||
WARN_ON_ONCE(stat == rwb->sync_cookie);
|
||||
__wbt_done(rwb, wbt_stat_to_mask(stat));
|
||||
WARN_ON_ONCE(rq == rwb->sync_cookie);
|
||||
__wbt_done(rwb, wbt_flags(rq));
|
||||
}
|
||||
wbt_clear_state(stat);
|
||||
wbt_clear_state(rq);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -479,6 +508,9 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
|
|||
{
|
||||
unsigned int limit;
|
||||
|
||||
if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD)
|
||||
return rwb->wb_background;
|
||||
|
||||
/*
|
||||
* At this point we know it's a buffered write. If this is
|
||||
* kswapd trying to free memory, or REQ_SYNC is set, then
|
||||
|
@ -529,11 +561,12 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
|
|||
* Block if we will exceed our limit, or if we are currently waiting for
|
||||
* the timer to kick off queuing again.
|
||||
*/
|
||||
static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
|
||||
static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
|
||||
unsigned long rw, spinlock_t *lock)
|
||||
__releases(lock)
|
||||
__acquires(lock)
|
||||
{
|
||||
struct rq_wait *rqw = get_rq_wait(rwb, current_is_kswapd());
|
||||
struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
|
||||
DEFINE_WAIT(wait);
|
||||
|
||||
if (may_queue(rwb, rqw, &wait, rw))
|
||||
|
@ -559,21 +592,20 @@ static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
|
|||
|
||||
static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
|
||||
{
|
||||
const int op = bio_op(bio);
|
||||
|
||||
/*
|
||||
* If not a WRITE, do nothing
|
||||
*/
|
||||
if (op != REQ_OP_WRITE)
|
||||
switch (bio_op(bio)) {
|
||||
case REQ_OP_WRITE:
|
||||
/*
|
||||
* Don't throttle WRITE_ODIRECT
|
||||
*/
|
||||
if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) ==
|
||||
(REQ_SYNC | REQ_IDLE))
|
||||
return false;
|
||||
/* fallthrough */
|
||||
case REQ_OP_DISCARD:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Don't throttle WRITE_ODIRECT
|
||||
*/
|
||||
if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) == (REQ_SYNC | REQ_IDLE))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -584,7 +616,7 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
|
|||
*/
|
||||
enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
|
||||
{
|
||||
unsigned int ret = 0;
|
||||
enum wbt_flags ret = 0;
|
||||
|
||||
if (!rwb_enabled(rwb))
|
||||
return 0;
|
||||
|
@ -598,41 +630,42 @@ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
|
|||
return ret;
|
||||
}
|
||||
|
||||
__wbt_wait(rwb, bio->bi_opf, lock);
|
||||
if (current_is_kswapd())
|
||||
ret |= WBT_KSWAPD;
|
||||
if (bio_op(bio) == REQ_OP_DISCARD)
|
||||
ret |= WBT_DISCARD;
|
||||
|
||||
__wbt_wait(rwb, ret, bio->bi_opf, lock);
|
||||
|
||||
if (!blk_stat_is_active(rwb->cb))
|
||||
rwb_arm_timer(rwb);
|
||||
|
||||
if (current_is_kswapd())
|
||||
ret |= WBT_KSWAPD;
|
||||
|
||||
return ret | WBT_TRACKED;
|
||||
}
|
||||
|
||||
void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat)
|
||||
void wbt_issue(struct rq_wb *rwb, struct request *rq)
|
||||
{
|
||||
if (!rwb_enabled(rwb))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Track sync issue, in case it takes a long time to complete. Allows
|
||||
* us to react quicker, if a sync IO takes a long time to complete.
|
||||
* Note that this is just a hint. 'stat' can go away when the
|
||||
* request completes, so it's important we never dereference it. We
|
||||
* only use the address to compare with, which is why we store the
|
||||
* sync_issue time locally.
|
||||
* Track sync issue, in case it takes a long time to complete. Allows us
|
||||
* to react quicker, if a sync IO takes a long time to complete. Note
|
||||
* that this is just a hint. The request can go away when it completes,
|
||||
* so it's important we never dereference it. We only use the address to
|
||||
* compare with, which is why we store the sync_issue time locally.
|
||||
*/
|
||||
if (wbt_is_read(stat) && !rwb->sync_issue) {
|
||||
rwb->sync_cookie = stat;
|
||||
rwb->sync_issue = blk_stat_time(stat);
|
||||
if (wbt_is_read(rq) && !rwb->sync_issue) {
|
||||
rwb->sync_cookie = rq;
|
||||
rwb->sync_issue = rq->io_start_time_ns;
|
||||
}
|
||||
}
|
||||
|
||||
void wbt_requeue(struct rq_wb *rwb, struct blk_issue_stat *stat)
|
||||
void wbt_requeue(struct rq_wb *rwb, struct request *rq)
|
||||
{
|
||||
if (!rwb_enabled(rwb))
|
||||
return;
|
||||
if (stat == rwb->sync_cookie) {
|
||||
if (rq == rwb->sync_cookie) {
|
||||
rwb->sync_issue = 0;
|
||||
rwb->sync_cookie = NULL;
|
||||
}
|
||||
|
@ -701,7 +734,7 @@ static int wbt_data_dir(const struct request *rq)
|
|||
|
||||
if (op == REQ_OP_READ)
|
||||
return READ;
|
||||
else if (op == REQ_OP_WRITE || op == REQ_OP_FLUSH)
|
||||
else if (op_is_write(op))
|
||||
return WRITE;
|
||||
|
||||
/* don't account */
|
||||
|
@ -713,8 +746,6 @@ int wbt_init(struct request_queue *q)
|
|||
struct rq_wb *rwb;
|
||||
int i;
|
||||
|
||||
BUILD_BUG_ON(WBT_NR_BITS > BLK_STAT_RES_BITS);
|
||||
|
||||
rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
|
||||
if (!rwb)
|
||||
return -ENOMEM;
|
||||
|
|
|
@ -14,12 +14,16 @@ enum wbt_flags {
|
|||
WBT_TRACKED = 1, /* write, tracked for throttling */
|
||||
WBT_READ = 2, /* read */
|
||||
WBT_KSWAPD = 4, /* write, from kswapd */
|
||||
WBT_DISCARD = 8, /* discard */
|
||||
|
||||
WBT_NR_BITS = 3, /* number of bits */
|
||||
WBT_NR_BITS = 4, /* number of bits */
|
||||
};
|
||||
|
||||
enum {
|
||||
WBT_NUM_RWQ = 2,
|
||||
WBT_RWQ_BG = 0,
|
||||
WBT_RWQ_KSWAPD,
|
||||
WBT_RWQ_DISCARD,
|
||||
WBT_NUM_RWQ,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -31,31 +35,6 @@ enum {
|
|||
WBT_STATE_ON_MANUAL = 2,
|
||||
};
|
||||
|
||||
static inline void wbt_clear_state(struct blk_issue_stat *stat)
|
||||
{
|
||||
stat->stat &= ~BLK_STAT_RES_MASK;
|
||||
}
|
||||
|
||||
static inline enum wbt_flags wbt_stat_to_mask(struct blk_issue_stat *stat)
|
||||
{
|
||||
return (stat->stat & BLK_STAT_RES_MASK) >> BLK_STAT_RES_SHIFT;
|
||||
}
|
||||
|
||||
static inline void wbt_track(struct blk_issue_stat *stat, enum wbt_flags wb_acct)
|
||||
{
|
||||
stat->stat |= ((u64) wb_acct) << BLK_STAT_RES_SHIFT;
|
||||
}
|
||||
|
||||
static inline bool wbt_is_tracked(struct blk_issue_stat *stat)
|
||||
{
|
||||
return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_TRACKED;
|
||||
}
|
||||
|
||||
static inline bool wbt_is_read(struct blk_issue_stat *stat)
|
||||
{
|
||||
return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_READ;
|
||||
}
|
||||
|
||||
struct rq_wait {
|
||||
wait_queue_head_t wait;
|
||||
atomic_t inflight;
|
||||
|
@ -84,7 +63,7 @@ struct rq_wb {
|
|||
|
||||
struct blk_stat_callback *cb;
|
||||
|
||||
s64 sync_issue;
|
||||
u64 sync_issue;
|
||||
void *sync_cookie;
|
||||
|
||||
unsigned int wc;
|
||||
|
@ -109,14 +88,19 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
|
|||
|
||||
#ifdef CONFIG_BLK_WBT
|
||||
|
||||
static inline void wbt_track(struct request *rq, enum wbt_flags flags)
|
||||
{
|
||||
rq->wbt_flags |= flags;
|
||||
}
|
||||
|
||||
void __wbt_done(struct rq_wb *, enum wbt_flags);
|
||||
void wbt_done(struct rq_wb *, struct blk_issue_stat *);
|
||||
void wbt_done(struct rq_wb *, struct request *);
|
||||
enum wbt_flags wbt_wait(struct rq_wb *, struct bio *, spinlock_t *);
|
||||
int wbt_init(struct request_queue *);
|
||||
void wbt_exit(struct request_queue *);
|
||||
void wbt_update_limits(struct rq_wb *);
|
||||
void wbt_requeue(struct rq_wb *, struct blk_issue_stat *);
|
||||
void wbt_issue(struct rq_wb *, struct blk_issue_stat *);
|
||||
void wbt_requeue(struct rq_wb *, struct request *);
|
||||
void wbt_issue(struct rq_wb *, struct request *);
|
||||
void wbt_disable_default(struct request_queue *);
|
||||
void wbt_enable_default(struct request_queue *);
|
||||
|
||||
|
@ -127,10 +111,13 @@ u64 wbt_default_latency_nsec(struct request_queue *);
|
|||
|
||||
#else
|
||||
|
||||
static inline void wbt_track(struct request *rq, enum wbt_flags flags)
|
||||
{
|
||||
}
|
||||
static inline void __wbt_done(struct rq_wb *rwb, enum wbt_flags flags)
|
||||
{
|
||||
}
|
||||
static inline void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat)
|
||||
static inline void wbt_done(struct rq_wb *rwb, struct request *rq)
|
||||
{
|
||||
}
|
||||
static inline enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio,
|
||||
|
@ -148,10 +135,10 @@ static inline void wbt_exit(struct request_queue *q)
|
|||
static inline void wbt_update_limits(struct rq_wb *rwb)
|
||||
{
|
||||
}
|
||||
static inline void wbt_requeue(struct rq_wb *rwb, struct blk_issue_stat *stat)
|
||||
static inline void wbt_requeue(struct rq_wb *rwb, struct request *rq)
|
||||
{
|
||||
}
|
||||
static inline void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat)
|
||||
static inline void wbt_issue(struct rq_wb *rwb, struct request *rq)
|
||||
{
|
||||
}
|
||||
static inline void wbt_disable_default(struct request_queue *q)
|
||||
|
|
|
@ -328,7 +328,11 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
|
|||
if (!rep.nr_zones)
|
||||
return -EINVAL;
|
||||
|
||||
zones = kcalloc(rep.nr_zones, sizeof(struct blk_zone), GFP_KERNEL);
|
||||
if (rep.nr_zones > INT_MAX / sizeof(struct blk_zone))
|
||||
return -ERANGE;
|
||||
|
||||
zones = kvmalloc(rep.nr_zones * sizeof(struct blk_zone),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!zones)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -350,7 +354,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
|
|||
}
|
||||
|
||||
out:
|
||||
kfree(zones);
|
||||
kvfree(zones);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -186,7 +186,7 @@ unsigned int blk_plug_queued_count(struct request_queue *q);
|
|||
|
||||
void blk_account_io_start(struct request *req, bool new_io);
|
||||
void blk_account_io_completion(struct request *req, unsigned int bytes);
|
||||
void blk_account_io_done(struct request *req);
|
||||
void blk_account_io_done(struct request *req, u64 now);
|
||||
|
||||
/*
|
||||
* EH timer and IO completion will both attempt to 'grab' the request, make
|
||||
|
@ -231,6 +231,9 @@ static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq
|
|||
e->type->ops.sq.elevator_deactivate_req_fn(q, rq);
|
||||
}
|
||||
|
||||
int elevator_init(struct request_queue *);
|
||||
int elevator_init_mq(struct request_queue *q);
|
||||
void elevator_exit(struct request_queue *, struct elevator_queue *);
|
||||
int elv_register_queue(struct request_queue *q);
|
||||
void elv_unregister_queue(struct request_queue *q);
|
||||
|
||||
|
|
|
@ -28,28 +28,29 @@
|
|||
#define POOL_SIZE 64
|
||||
#define ISA_POOL_SIZE 16
|
||||
|
||||
static struct bio_set *bounce_bio_set, *bounce_bio_split;
|
||||
static mempool_t *page_pool, *isa_page_pool;
|
||||
static struct bio_set bounce_bio_set, bounce_bio_split;
|
||||
static mempool_t page_pool, isa_page_pool;
|
||||
|
||||
#if defined(CONFIG_HIGHMEM)
|
||||
static __init int init_emergency_pool(void)
|
||||
{
|
||||
int ret;
|
||||
#if defined(CONFIG_HIGHMEM) && !defined(CONFIG_MEMORY_HOTPLUG)
|
||||
if (max_pfn <= max_low_pfn)
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
page_pool = mempool_create_page_pool(POOL_SIZE, 0);
|
||||
BUG_ON(!page_pool);
|
||||
ret = mempool_init_page_pool(&page_pool, POOL_SIZE, 0);
|
||||
BUG_ON(ret);
|
||||
pr_info("pool size: %d pages\n", POOL_SIZE);
|
||||
|
||||
bounce_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
|
||||
BUG_ON(!bounce_bio_set);
|
||||
if (bioset_integrity_create(bounce_bio_set, BIO_POOL_SIZE))
|
||||
ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
|
||||
BUG_ON(ret);
|
||||
if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE))
|
||||
BUG_ON(1);
|
||||
|
||||
bounce_bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
|
||||
BUG_ON(!bounce_bio_split);
|
||||
ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -63,14 +64,11 @@ __initcall(init_emergency_pool);
|
|||
*/
|
||||
static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned char *vto;
|
||||
|
||||
local_irq_save(flags);
|
||||
vto = kmap_atomic(to->bv_page);
|
||||
memcpy(vto + to->bv_offset, vfrom, to->bv_len);
|
||||
kunmap_atomic(vto);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
#else /* CONFIG_HIGHMEM */
|
||||
|
@ -94,12 +92,14 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
|
|||
*/
|
||||
int init_emergency_isa_pool(void)
|
||||
{
|
||||
if (isa_page_pool)
|
||||
int ret;
|
||||
|
||||
if (mempool_initialized(&isa_page_pool))
|
||||
return 0;
|
||||
|
||||
isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
|
||||
mempool_free_pages, (void *) 0);
|
||||
BUG_ON(!isa_page_pool);
|
||||
ret = mempool_init(&isa_page_pool, ISA_POOL_SIZE, mempool_alloc_pages_isa,
|
||||
mempool_free_pages, (void *) 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE);
|
||||
return 0;
|
||||
|
@ -166,13 +166,13 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool)
|
|||
|
||||
static void bounce_end_io_write(struct bio *bio)
|
||||
{
|
||||
bounce_end_io(bio, page_pool);
|
||||
bounce_end_io(bio, &page_pool);
|
||||
}
|
||||
|
||||
static void bounce_end_io_write_isa(struct bio *bio)
|
||||
{
|
||||
|
||||
bounce_end_io(bio, isa_page_pool);
|
||||
bounce_end_io(bio, &isa_page_pool);
|
||||
}
|
||||
|
||||
static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
|
||||
|
@ -187,12 +187,12 @@ static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
|
|||
|
||||
static void bounce_end_io_read(struct bio *bio)
|
||||
{
|
||||
__bounce_end_io_read(bio, page_pool);
|
||||
__bounce_end_io_read(bio, &page_pool);
|
||||
}
|
||||
|
||||
static void bounce_end_io_read_isa(struct bio *bio)
|
||||
{
|
||||
__bounce_end_io_read(bio, isa_page_pool);
|
||||
__bounce_end_io_read(bio, &isa_page_pool);
|
||||
}
|
||||
|
||||
static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
|
||||
|
@ -217,13 +217,13 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
|
|||
return;
|
||||
|
||||
if (!passthrough && sectors < bio_sectors(*bio_orig)) {
|
||||
bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
|
||||
bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
|
||||
bio_chain(bio, *bio_orig);
|
||||
generic_make_request(*bio_orig);
|
||||
*bio_orig = bio;
|
||||
}
|
||||
bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
|
||||
bounce_bio_set);
|
||||
&bounce_bio_set);
|
||||
|
||||
bio_for_each_segment_all(to, bio, i) {
|
||||
struct page *page = to->bv_page;
|
||||
|
@ -250,7 +250,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
|
|||
|
||||
bio->bi_flags |= (1 << BIO_BOUNCED);
|
||||
|
||||
if (pool == page_pool) {
|
||||
if (pool == &page_pool) {
|
||||
bio->bi_end_io = bounce_end_io_write;
|
||||
if (rw == READ)
|
||||
bio->bi_end_io = bounce_end_io_read;
|
||||
|
@ -282,10 +282,10 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
|
|||
if (!(q->bounce_gfp & GFP_DMA)) {
|
||||
if (q->limits.bounce_pfn >= blk_max_pfn)
|
||||
return;
|
||||
pool = page_pool;
|
||||
pool = &page_pool;
|
||||
} else {
|
||||
BUG_ON(!isa_page_pool);
|
||||
pool = isa_page_pool;
|
||||
BUG_ON(!mempool_initialized(&isa_page_pool));
|
||||
pool = &isa_page_pool;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -303,11 +303,9 @@ static void bsg_exit_rq(struct request_queue *q, struct request *req)
|
|||
* @name: device to give bsg device
|
||||
* @job_fn: bsg job handler
|
||||
* @dd_job_size: size of LLD data needed for each job
|
||||
* @release: @dev release function
|
||||
*/
|
||||
struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
|
||||
bsg_job_fn *job_fn, int dd_job_size,
|
||||
void (*release)(struct device *))
|
||||
bsg_job_fn *job_fn, int dd_job_size)
|
||||
{
|
||||
struct request_queue *q;
|
||||
int ret;
|
||||
|
@ -331,7 +329,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
|
|||
blk_queue_softirq_done(q, bsg_softirq_done);
|
||||
blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
|
||||
|
||||
ret = bsg_register_queue(q, dev, name, &bsg_transport_ops, release);
|
||||
ret = bsg_register_queue(q, dev, name, &bsg_transport_ops);
|
||||
if (ret) {
|
||||
printk(KERN_ERR "%s: bsg interface failed to "
|
||||
"initialize - register queue\n", dev->kobj.name);
|
||||
|
|
44
block/bsg.c
44
block/bsg.c
|
@ -226,8 +226,7 @@ bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
|
|||
return ERR_PTR(ret);
|
||||
|
||||
rq = blk_get_request(q, hdr->dout_xfer_len ?
|
||||
REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
|
||||
GFP_KERNEL);
|
||||
REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
|
||||
if (IS_ERR(rq))
|
||||
return rq;
|
||||
|
||||
|
@ -249,7 +248,7 @@ bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
|
|||
goto out;
|
||||
}
|
||||
|
||||
next_rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
|
||||
next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
|
||||
if (IS_ERR(next_rq)) {
|
||||
ret = PTR_ERR(next_rq);
|
||||
goto out;
|
||||
|
@ -650,18 +649,6 @@ static struct bsg_device *bsg_alloc_device(void)
|
|||
return bd;
|
||||
}
|
||||
|
||||
static void bsg_kref_release_function(struct kref *kref)
|
||||
{
|
||||
struct bsg_class_device *bcd =
|
||||
container_of(kref, struct bsg_class_device, ref);
|
||||
struct device *parent = bcd->parent;
|
||||
|
||||
if (bcd->release)
|
||||
bcd->release(bcd->parent);
|
||||
|
||||
put_device(parent);
|
||||
}
|
||||
|
||||
static int bsg_put_device(struct bsg_device *bd)
|
||||
{
|
||||
int ret = 0, do_free;
|
||||
|
@ -694,7 +681,6 @@ static int bsg_put_device(struct bsg_device *bd)
|
|||
|
||||
kfree(bd);
|
||||
out:
|
||||
kref_put(&q->bsg_dev.ref, bsg_kref_release_function);
|
||||
if (do_free)
|
||||
blk_put_queue(q);
|
||||
return ret;
|
||||
|
@ -760,8 +746,6 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
|
|||
*/
|
||||
mutex_lock(&bsg_mutex);
|
||||
bcd = idr_find(&bsg_minor_idr, iminor(inode));
|
||||
if (bcd)
|
||||
kref_get(&bcd->ref);
|
||||
mutex_unlock(&bsg_mutex);
|
||||
|
||||
if (!bcd)
|
||||
|
@ -772,8 +756,6 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
|
|||
return bd;
|
||||
|
||||
bd = bsg_add_device(inode, bcd->queue, file);
|
||||
if (IS_ERR(bd))
|
||||
kref_put(&bcd->ref, bsg_kref_release_function);
|
||||
|
||||
return bd;
|
||||
}
|
||||
|
@ -913,25 +895,17 @@ void bsg_unregister_queue(struct request_queue *q)
|
|||
sysfs_remove_link(&q->kobj, "bsg");
|
||||
device_unregister(bcd->class_dev);
|
||||
bcd->class_dev = NULL;
|
||||
kref_put(&bcd->ref, bsg_kref_release_function);
|
||||
mutex_unlock(&bsg_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bsg_unregister_queue);
|
||||
|
||||
int bsg_register_queue(struct request_queue *q, struct device *parent,
|
||||
const char *name, const struct bsg_ops *ops,
|
||||
void (*release)(struct device *))
|
||||
const char *name, const struct bsg_ops *ops)
|
||||
{
|
||||
struct bsg_class_device *bcd;
|
||||
dev_t dev;
|
||||
int ret;
|
||||
struct device *class_dev = NULL;
|
||||
const char *devname;
|
||||
|
||||
if (name)
|
||||
devname = name;
|
||||
else
|
||||
devname = dev_name(parent);
|
||||
|
||||
/*
|
||||
* we need a proper transport to send commands, not a stacked device
|
||||
|
@ -955,15 +929,12 @@ int bsg_register_queue(struct request_queue *q, struct device *parent,
|
|||
|
||||
bcd->minor = ret;
|
||||
bcd->queue = q;
|
||||
bcd->parent = get_device(parent);
|
||||
bcd->release = release;
|
||||
bcd->ops = ops;
|
||||
kref_init(&bcd->ref);
|
||||
dev = MKDEV(bsg_major, bcd->minor);
|
||||
class_dev = device_create(bsg_class, parent, dev, NULL, "%s", devname);
|
||||
class_dev = device_create(bsg_class, parent, dev, NULL, "%s", name);
|
||||
if (IS_ERR(class_dev)) {
|
||||
ret = PTR_ERR(class_dev);
|
||||
goto put_dev;
|
||||
goto idr_remove;
|
||||
}
|
||||
bcd->class_dev = class_dev;
|
||||
|
||||
|
@ -978,8 +949,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent,
|
|||
|
||||
unregister_class_dev:
|
||||
device_unregister(class_dev);
|
||||
put_dev:
|
||||
put_device(parent);
|
||||
idr_remove:
|
||||
idr_remove(&bsg_minor_idr, bcd->minor);
|
||||
unlock:
|
||||
mutex_unlock(&bsg_mutex);
|
||||
|
@ -993,7 +963,7 @@ int bsg_scsi_register_queue(struct request_queue *q, struct device *parent)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
return bsg_register_queue(q, parent, NULL, &bsg_scsi_ops, NULL);
|
||||
return bsg_register_queue(q, parent, dev_name(parent), &bsg_scsi_ops);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bsg_scsi_register_queue);
|
||||
|
||||
|
|
|
@ -210,9 +210,9 @@ struct cfqg_stats {
|
|||
/* total time with empty current active q with other requests queued */
|
||||
struct blkg_stat empty_time;
|
||||
/* fields after this shouldn't be cleared on stat reset */
|
||||
uint64_t start_group_wait_time;
|
||||
uint64_t start_idle_time;
|
||||
uint64_t start_empty_time;
|
||||
u64 start_group_wait_time;
|
||||
u64 start_idle_time;
|
||||
u64 start_empty_time;
|
||||
uint16_t flags;
|
||||
#endif /* CONFIG_DEBUG_BLK_CGROUP */
|
||||
#endif /* CONFIG_CFQ_GROUP_IOSCHED */
|
||||
|
@ -491,13 +491,13 @@ CFQG_FLAG_FNS(empty)
|
|||
/* This should be called with the queue_lock held. */
|
||||
static void cfqg_stats_update_group_wait_time(struct cfqg_stats *stats)
|
||||
{
|
||||
unsigned long long now;
|
||||
u64 now;
|
||||
|
||||
if (!cfqg_stats_waiting(stats))
|
||||
return;
|
||||
|
||||
now = sched_clock();
|
||||
if (time_after64(now, stats->start_group_wait_time))
|
||||
now = ktime_get_ns();
|
||||
if (now > stats->start_group_wait_time)
|
||||
blkg_stat_add(&stats->group_wait_time,
|
||||
now - stats->start_group_wait_time);
|
||||
cfqg_stats_clear_waiting(stats);
|
||||
|
@ -513,20 +513,20 @@ static void cfqg_stats_set_start_group_wait_time(struct cfq_group *cfqg,
|
|||
return;
|
||||
if (cfqg == curr_cfqg)
|
||||
return;
|
||||
stats->start_group_wait_time = sched_clock();
|
||||
stats->start_group_wait_time = ktime_get_ns();
|
||||
cfqg_stats_mark_waiting(stats);
|
||||
}
|
||||
|
||||
/* This should be called with the queue_lock held. */
|
||||
static void cfqg_stats_end_empty_time(struct cfqg_stats *stats)
|
||||
{
|
||||
unsigned long long now;
|
||||
u64 now;
|
||||
|
||||
if (!cfqg_stats_empty(stats))
|
||||
return;
|
||||
|
||||
now = sched_clock();
|
||||
if (time_after64(now, stats->start_empty_time))
|
||||
now = ktime_get_ns();
|
||||
if (now > stats->start_empty_time)
|
||||
blkg_stat_add(&stats->empty_time,
|
||||
now - stats->start_empty_time);
|
||||
cfqg_stats_clear_empty(stats);
|
||||
|
@ -552,7 +552,7 @@ static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg)
|
|||
if (cfqg_stats_empty(stats))
|
||||
return;
|
||||
|
||||
stats->start_empty_time = sched_clock();
|
||||
stats->start_empty_time = ktime_get_ns();
|
||||
cfqg_stats_mark_empty(stats);
|
||||
}
|
||||
|
||||
|
@ -561,9 +561,9 @@ static void cfqg_stats_update_idle_time(struct cfq_group *cfqg)
|
|||
struct cfqg_stats *stats = &cfqg->stats;
|
||||
|
||||
if (cfqg_stats_idling(stats)) {
|
||||
unsigned long long now = sched_clock();
|
||||
u64 now = ktime_get_ns();
|
||||
|
||||
if (time_after64(now, stats->start_idle_time))
|
||||
if (now > stats->start_idle_time)
|
||||
blkg_stat_add(&stats->idle_time,
|
||||
now - stats->start_idle_time);
|
||||
cfqg_stats_clear_idling(stats);
|
||||
|
@ -576,7 +576,7 @@ static void cfqg_stats_set_start_idle_time(struct cfq_group *cfqg)
|
|||
|
||||
BUG_ON(cfqg_stats_idling(stats));
|
||||
|
||||
stats->start_idle_time = sched_clock();
|
||||
stats->start_idle_time = ktime_get_ns();
|
||||
cfqg_stats_mark_idling(stats);
|
||||
}
|
||||
|
||||
|
@ -701,17 +701,19 @@ static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
|
|||
}
|
||||
|
||||
static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
|
||||
uint64_t start_time, uint64_t io_start_time,
|
||||
unsigned int op)
|
||||
u64 start_time_ns,
|
||||
u64 io_start_time_ns,
|
||||
unsigned int op)
|
||||
{
|
||||
struct cfqg_stats *stats = &cfqg->stats;
|
||||
unsigned long long now = sched_clock();
|
||||
u64 now = ktime_get_ns();
|
||||
|
||||
if (time_after64(now, io_start_time))
|
||||
blkg_rwstat_add(&stats->service_time, op, now - io_start_time);
|
||||
if (time_after64(io_start_time, start_time))
|
||||
if (now > io_start_time_ns)
|
||||
blkg_rwstat_add(&stats->service_time, op,
|
||||
now - io_start_time_ns);
|
||||
if (io_start_time_ns > start_time_ns)
|
||||
blkg_rwstat_add(&stats->wait_time, op,
|
||||
io_start_time - start_time);
|
||||
io_start_time_ns - start_time_ns);
|
||||
}
|
||||
|
||||
/* @stats = 0 */
|
||||
|
@ -797,8 +799,9 @@ static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg,
|
|||
static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
|
||||
unsigned int op) { }
|
||||
static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
|
||||
uint64_t start_time, uint64_t io_start_time,
|
||||
unsigned int op) { }
|
||||
u64 start_time_ns,
|
||||
u64 io_start_time_ns,
|
||||
unsigned int op) { }
|
||||
|
||||
#endif /* CONFIG_CFQ_GROUP_IOSCHED */
|
||||
|
||||
|
@ -4225,8 +4228,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
|
|||
cfqd->rq_in_driver--;
|
||||
cfqq->dispatched--;
|
||||
(RQ_CFQG(rq))->dispatched--;
|
||||
cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq),
|
||||
rq_io_start_time_ns(rq), rq->cmd_flags);
|
||||
cfqg_stats_update_completion(cfqq->cfqg, rq->start_time_ns,
|
||||
rq->io_start_time_ns, rq->cmd_flags);
|
||||
|
||||
cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
|
||||
|
||||
|
@ -4242,16 +4245,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
|
|||
cfqq_type(cfqq));
|
||||
|
||||
st->ttime.last_end_request = now;
|
||||
/*
|
||||
* We have to do this check in jiffies since start_time is in
|
||||
* jiffies and it is not trivial to convert to ns. If
|
||||
* cfq_fifo_expire[1] ever comes close to 1 jiffie, this test
|
||||
* will become problematic but so far we are fine (the default
|
||||
* is 128 ms).
|
||||
*/
|
||||
if (!time_after(rq->start_time +
|
||||
nsecs_to_jiffies(cfqd->cfq_fifo_expire[1]),
|
||||
jiffies))
|
||||
if (rq->start_time_ns + cfqd->cfq_fifo_expire[1] <= now)
|
||||
cfqd->last_delayed_sync = now;
|
||||
}
|
||||
|
||||
|
@ -4792,7 +4786,7 @@ USEC_STORE_FUNCTION(cfq_target_latency_us_store, &cfqd->cfq_target_latency, 1, U
|
|||
#undef USEC_STORE_FUNCTION
|
||||
|
||||
#define CFQ_ATTR(name) \
|
||||
__ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
|
||||
__ATTR(name, 0644, cfq_##name##_show, cfq_##name##_store)
|
||||
|
||||
static struct elv_fs_entry cfq_attrs[] = {
|
||||
CFQ_ATTR(quantum),
|
||||
|
|
|
@ -512,8 +512,7 @@ STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
|
|||
#undef STORE_FUNCTION
|
||||
|
||||
#define DD_ATTR(name) \
|
||||
__ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
|
||||
deadline_##name##_store)
|
||||
__ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
|
||||
|
||||
static struct elv_fs_entry deadline_attrs[] = {
|
||||
DD_ATTR(read_expire),
|
||||
|
|
101
block/elevator.c
101
block/elevator.c
|
@ -199,76 +199,46 @@ static void elevator_release(struct kobject *kobj)
|
|||
kfree(e);
|
||||
}
|
||||
|
||||
int elevator_init(struct request_queue *q, char *name)
|
||||
/*
|
||||
* Use the default elevator specified by config boot param for non-mq devices,
|
||||
* or by config option. Don't try to load modules as we could be running off
|
||||
* async and request_module() isn't allowed from async.
|
||||
*/
|
||||
int elevator_init(struct request_queue *q)
|
||||
{
|
||||
struct elevator_type *e = NULL;
|
||||
int err;
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* q->sysfs_lock must be held to provide mutual exclusion between
|
||||
* elevator_switch() and here.
|
||||
*/
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
if (unlikely(q->elevator))
|
||||
return 0;
|
||||
goto out_unlock;
|
||||
|
||||
INIT_LIST_HEAD(&q->queue_head);
|
||||
q->last_merge = NULL;
|
||||
q->end_sector = 0;
|
||||
q->boundary_rq = NULL;
|
||||
|
||||
if (name) {
|
||||
e = elevator_get(q, name, true);
|
||||
if (!e)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the default elevator specified by config boot param for
|
||||
* non-mq devices, or by config option. Don't try to load modules
|
||||
* as we could be running off async and request_module() isn't
|
||||
* allowed from async.
|
||||
*/
|
||||
if (!e && !q->mq_ops && *chosen_elevator) {
|
||||
if (*chosen_elevator) {
|
||||
e = elevator_get(q, chosen_elevator, false);
|
||||
if (!e)
|
||||
printk(KERN_ERR "I/O scheduler %s not found\n",
|
||||
chosen_elevator);
|
||||
}
|
||||
|
||||
if (!e)
|
||||
e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false);
|
||||
if (!e) {
|
||||
/*
|
||||
* For blk-mq devices, we default to using mq-deadline,
|
||||
* if available, for single queue devices. If deadline
|
||||
* isn't available OR we have multiple queues, default
|
||||
* to "none".
|
||||
*/
|
||||
if (q->mq_ops) {
|
||||
if (q->nr_hw_queues == 1)
|
||||
e = elevator_get(q, "mq-deadline", false);
|
||||
if (!e)
|
||||
return 0;
|
||||
} else
|
||||
e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false);
|
||||
|
||||
if (!e) {
|
||||
printk(KERN_ERR
|
||||
"Default I/O scheduler not found. " \
|
||||
"Using noop.\n");
|
||||
e = elevator_get(q, "noop", false);
|
||||
}
|
||||
printk(KERN_ERR
|
||||
"Default I/O scheduler not found. Using noop.\n");
|
||||
e = elevator_get(q, "noop", false);
|
||||
}
|
||||
|
||||
if (e->uses_mq)
|
||||
err = blk_mq_init_sched(q, e);
|
||||
else
|
||||
err = e->ops.sq.elevator_init_fn(q, e);
|
||||
err = e->ops.sq.elevator_init_fn(q, e);
|
||||
if (err)
|
||||
elevator_put(e);
|
||||
out_unlock:
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL(elevator_init);
|
||||
|
||||
void elevator_exit(struct request_queue *q, struct elevator_queue *e)
|
||||
{
|
||||
|
@ -281,7 +251,6 @@ void elevator_exit(struct request_queue *q, struct elevator_queue *e)
|
|||
|
||||
kobject_put(&e->kobj);
|
||||
}
|
||||
EXPORT_SYMBOL(elevator_exit);
|
||||
|
||||
static inline void __elv_rqhash_del(struct request *rq)
|
||||
{
|
||||
|
@ -1004,6 +973,40 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* For blk-mq devices, we default to using mq-deadline, if available, for single
|
||||
* queue devices. If deadline isn't available OR we have multiple queues,
|
||||
* default to "none".
|
||||
*/
|
||||
int elevator_init_mq(struct request_queue *q)
|
||||
{
|
||||
struct elevator_type *e;
|
||||
int err = 0;
|
||||
|
||||
if (q->nr_hw_queues != 1)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* q->sysfs_lock must be held to provide mutual exclusion between
|
||||
* elevator_switch() and here.
|
||||
*/
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
if (unlikely(q->elevator))
|
||||
goto out_unlock;
|
||||
|
||||
e = elevator_get(q, "mq-deadline", false);
|
||||
if (!e)
|
||||
goto out_unlock;
|
||||
|
||||
err = blk_mq_init_sched(q, e);
|
||||
if (err)
|
||||
elevator_put(e);
|
||||
out_unlock:
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* switch to new_e io scheduler. be careful not to introduce deadlocks -
|
||||
* we don't free the old io scheduler, before we have allocated what we
|
||||
|
|
|
@ -1139,28 +1139,25 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
|
|||
return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
|
||||
static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
|
||||
static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
|
||||
static DEVICE_ATTR(hidden, S_IRUGO, disk_hidden_show, NULL);
|
||||
static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
|
||||
static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
|
||||
static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
|
||||
static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show,
|
||||
NULL);
|
||||
static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
|
||||
static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
|
||||
static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
|
||||
static DEVICE_ATTR(badblocks, S_IRUGO | S_IWUSR, disk_badblocks_show,
|
||||
disk_badblocks_store);
|
||||
static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
|
||||
static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
|
||||
static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
|
||||
static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL);
|
||||
static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL);
|
||||
static DEVICE_ATTR(size, 0444, part_size_show, NULL);
|
||||
static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL);
|
||||
static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL);
|
||||
static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
|
||||
static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
|
||||
static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
|
||||
static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
|
||||
#ifdef CONFIG_FAIL_MAKE_REQUEST
|
||||
static struct device_attribute dev_attr_fail =
|
||||
__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
|
||||
__ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
|
||||
#endif
|
||||
#ifdef CONFIG_FAIL_IO_TIMEOUT
|
||||
static struct device_attribute dev_attr_fail_timeout =
|
||||
__ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show,
|
||||
part_timeout_store);
|
||||
__ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
|
||||
#endif
|
||||
|
||||
static struct attribute *disk_attrs[] = {
|
||||
|
@ -1924,9 +1921,9 @@ static ssize_t disk_events_poll_msecs_store(struct device *dev,
|
|||
return count;
|
||||
}
|
||||
|
||||
static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL);
|
||||
static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL);
|
||||
static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR,
|
||||
static const DEVICE_ATTR(events, 0444, disk_events_show, NULL);
|
||||
static const DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL);
|
||||
static const DEVICE_ATTR(events_poll_msecs, 0644,
|
||||
disk_events_poll_msecs_show,
|
||||
disk_events_poll_msecs_store);
|
||||
|
||||
|
|
|
@ -72,6 +72,19 @@ static const unsigned int kyber_batch_size[] = {
|
|||
[KYBER_OTHER] = 8,
|
||||
};
|
||||
|
||||
/*
|
||||
* There is a same mapping between ctx & hctx and kcq & khd,
|
||||
* we use request->mq_ctx->index_hw to index the kcq in khd.
|
||||
*/
|
||||
struct kyber_ctx_queue {
|
||||
/*
|
||||
* Used to ensure operations on rq_list and kcq_map to be an atmoic one.
|
||||
* Also protect the rqs on rq_list when merge.
|
||||
*/
|
||||
spinlock_t lock;
|
||||
struct list_head rq_list[KYBER_NUM_DOMAINS];
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
struct kyber_queue_data {
|
||||
struct request_queue *q;
|
||||
|
||||
|
@ -99,6 +112,8 @@ struct kyber_hctx_data {
|
|||
struct list_head rqs[KYBER_NUM_DOMAINS];
|
||||
unsigned int cur_domain;
|
||||
unsigned int batching;
|
||||
struct kyber_ctx_queue *kcqs;
|
||||
struct sbitmap kcq_map[KYBER_NUM_DOMAINS];
|
||||
wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
|
||||
struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
|
||||
atomic_t wait_index[KYBER_NUM_DOMAINS];
|
||||
|
@ -107,10 +122,8 @@ struct kyber_hctx_data {
|
|||
static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
|
||||
void *key);
|
||||
|
||||
static int rq_sched_domain(const struct request *rq)
|
||||
static unsigned int kyber_sched_domain(unsigned int op)
|
||||
{
|
||||
unsigned int op = rq->cmd_flags;
|
||||
|
||||
if ((op & REQ_OP_MASK) == REQ_OP_READ)
|
||||
return KYBER_READ;
|
||||
else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op))
|
||||
|
@ -284,6 +297,11 @@ static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd)
|
|||
return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
|
||||
}
|
||||
|
||||
static int kyber_bucket_fn(const struct request *rq)
|
||||
{
|
||||
return kyber_sched_domain(rq->cmd_flags);
|
||||
}
|
||||
|
||||
static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
|
||||
{
|
||||
struct kyber_queue_data *kqd;
|
||||
|
@ -297,7 +315,7 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
|
|||
goto err;
|
||||
kqd->q = q;
|
||||
|
||||
kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, rq_sched_domain,
|
||||
kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, kyber_bucket_fn,
|
||||
KYBER_NUM_DOMAINS, kqd);
|
||||
if (!kqd->cb)
|
||||
goto err_kqd;
|
||||
|
@ -376,8 +394,18 @@ static void kyber_exit_sched(struct elevator_queue *e)
|
|||
kfree(kqd);
|
||||
}
|
||||
|
||||
static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
spin_lock_init(&kcq->lock);
|
||||
for (i = 0; i < KYBER_NUM_DOMAINS; i++)
|
||||
INIT_LIST_HEAD(&kcq->rq_list[i]);
|
||||
}
|
||||
|
||||
static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
|
||||
{
|
||||
struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
|
||||
struct kyber_hctx_data *khd;
|
||||
int i;
|
||||
|
||||
|
@ -385,6 +413,24 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
|
|||
if (!khd)
|
||||
return -ENOMEM;
|
||||
|
||||
khd->kcqs = kmalloc_array_node(hctx->nr_ctx,
|
||||
sizeof(struct kyber_ctx_queue),
|
||||
GFP_KERNEL, hctx->numa_node);
|
||||
if (!khd->kcqs)
|
||||
goto err_khd;
|
||||
|
||||
for (i = 0; i < hctx->nr_ctx; i++)
|
||||
kyber_ctx_queue_init(&khd->kcqs[i]);
|
||||
|
||||
for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
|
||||
if (sbitmap_init_node(&khd->kcq_map[i], hctx->nr_ctx,
|
||||
ilog2(8), GFP_KERNEL, hctx->numa_node)) {
|
||||
while (--i >= 0)
|
||||
sbitmap_free(&khd->kcq_map[i]);
|
||||
goto err_kcqs;
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock_init(&khd->lock);
|
||||
|
||||
for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
|
||||
|
@ -400,12 +446,26 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
|
|||
khd->batching = 0;
|
||||
|
||||
hctx->sched_data = khd;
|
||||
sbitmap_queue_min_shallow_depth(&hctx->sched_tags->bitmap_tags,
|
||||
kqd->async_depth);
|
||||
|
||||
return 0;
|
||||
|
||||
err_kcqs:
|
||||
kfree(khd->kcqs);
|
||||
err_khd:
|
||||
kfree(khd);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void kyber_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
|
||||
{
|
||||
struct kyber_hctx_data *khd = hctx->sched_data;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < KYBER_NUM_DOMAINS; i++)
|
||||
sbitmap_free(&khd->kcq_map[i]);
|
||||
kfree(khd->kcqs);
|
||||
kfree(hctx->sched_data);
|
||||
}
|
||||
|
||||
|
@ -427,7 +487,7 @@ static void rq_clear_domain_token(struct kyber_queue_data *kqd,
|
|||
|
||||
nr = rq_get_domain_token(rq);
|
||||
if (nr != -1) {
|
||||
sched_domain = rq_sched_domain(rq);
|
||||
sched_domain = kyber_sched_domain(rq->cmd_flags);
|
||||
sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr,
|
||||
rq->mq_ctx->cpu);
|
||||
}
|
||||
|
@ -446,11 +506,51 @@ static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
|
|||
}
|
||||
}
|
||||
|
||||
static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio)
|
||||
{
|
||||
struct kyber_hctx_data *khd = hctx->sched_data;
|
||||
struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue);
|
||||
struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw];
|
||||
unsigned int sched_domain = kyber_sched_domain(bio->bi_opf);
|
||||
struct list_head *rq_list = &kcq->rq_list[sched_domain];
|
||||
bool merged;
|
||||
|
||||
spin_lock(&kcq->lock);
|
||||
merged = blk_mq_bio_list_merge(hctx->queue, rq_list, bio);
|
||||
spin_unlock(&kcq->lock);
|
||||
blk_mq_put_ctx(ctx);
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
static void kyber_prepare_request(struct request *rq, struct bio *bio)
|
||||
{
|
||||
rq_set_domain_token(rq, -1);
|
||||
}
|
||||
|
||||
static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *rq_list, bool at_head)
|
||||
{
|
||||
struct kyber_hctx_data *khd = hctx->sched_data;
|
||||
struct request *rq, *next;
|
||||
|
||||
list_for_each_entry_safe(rq, next, rq_list, queuelist) {
|
||||
unsigned int sched_domain = kyber_sched_domain(rq->cmd_flags);
|
||||
struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw];
|
||||
struct list_head *head = &kcq->rq_list[sched_domain];
|
||||
|
||||
spin_lock(&kcq->lock);
|
||||
if (at_head)
|
||||
list_move(&rq->queuelist, head);
|
||||
else
|
||||
list_move_tail(&rq->queuelist, head);
|
||||
sbitmap_set_bit(&khd->kcq_map[sched_domain],
|
||||
rq->mq_ctx->index_hw);
|
||||
blk_mq_sched_request_inserted(rq);
|
||||
spin_unlock(&kcq->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void kyber_finish_request(struct request *rq)
|
||||
{
|
||||
struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
|
||||
|
@ -469,7 +569,7 @@ static void kyber_completed_request(struct request *rq)
|
|||
* Check if this request met our latency goal. If not, quickly gather
|
||||
* some statistics and start throttling.
|
||||
*/
|
||||
sched_domain = rq_sched_domain(rq);
|
||||
sched_domain = kyber_sched_domain(rq->cmd_flags);
|
||||
switch (sched_domain) {
|
||||
case KYBER_READ:
|
||||
target = kqd->read_lat_nsec;
|
||||
|
@ -485,29 +585,48 @@ static void kyber_completed_request(struct request *rq)
|
|||
if (blk_stat_is_active(kqd->cb))
|
||||
return;
|
||||
|
||||
now = __blk_stat_time(ktime_to_ns(ktime_get()));
|
||||
if (now < blk_stat_time(&rq->issue_stat))
|
||||
now = ktime_get_ns();
|
||||
if (now < rq->io_start_time_ns)
|
||||
return;
|
||||
|
||||
latency = now - blk_stat_time(&rq->issue_stat);
|
||||
latency = now - rq->io_start_time_ns;
|
||||
|
||||
if (latency > target)
|
||||
blk_stat_activate_msecs(kqd->cb, 10);
|
||||
}
|
||||
|
||||
static void kyber_flush_busy_ctxs(struct kyber_hctx_data *khd,
|
||||
struct blk_mq_hw_ctx *hctx)
|
||||
struct flush_kcq_data {
|
||||
struct kyber_hctx_data *khd;
|
||||
unsigned int sched_domain;
|
||||
struct list_head *list;
|
||||
};
|
||||
|
||||
static bool flush_busy_kcq(struct sbitmap *sb, unsigned int bitnr, void *data)
|
||||
{
|
||||
LIST_HEAD(rq_list);
|
||||
struct request *rq, *next;
|
||||
struct flush_kcq_data *flush_data = data;
|
||||
struct kyber_ctx_queue *kcq = &flush_data->khd->kcqs[bitnr];
|
||||
|
||||
blk_mq_flush_busy_ctxs(hctx, &rq_list);
|
||||
list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
|
||||
unsigned int sched_domain;
|
||||
spin_lock(&kcq->lock);
|
||||
list_splice_tail_init(&kcq->rq_list[flush_data->sched_domain],
|
||||
flush_data->list);
|
||||
sbitmap_clear_bit(sb, bitnr);
|
||||
spin_unlock(&kcq->lock);
|
||||
|
||||
sched_domain = rq_sched_domain(rq);
|
||||
list_move_tail(&rq->queuelist, &khd->rqs[sched_domain]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void kyber_flush_busy_kcqs(struct kyber_hctx_data *khd,
|
||||
unsigned int sched_domain,
|
||||
struct list_head *list)
|
||||
{
|
||||
struct flush_kcq_data data = {
|
||||
.khd = khd,
|
||||
.sched_domain = sched_domain,
|
||||
.list = list,
|
||||
};
|
||||
|
||||
sbitmap_for_each_set(&khd->kcq_map[sched_domain],
|
||||
flush_busy_kcq, &data);
|
||||
}
|
||||
|
||||
static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
|
||||
|
@ -570,26 +689,23 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
|
|||
static struct request *
|
||||
kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
|
||||
struct kyber_hctx_data *khd,
|
||||
struct blk_mq_hw_ctx *hctx,
|
||||
bool *flushed)
|
||||
struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct list_head *rqs;
|
||||
struct request *rq;
|
||||
int nr;
|
||||
|
||||
rqs = &khd->rqs[khd->cur_domain];
|
||||
rq = list_first_entry_or_null(rqs, struct request, queuelist);
|
||||
|
||||
/*
|
||||
* If there wasn't already a pending request and we haven't flushed the
|
||||
* software queues yet, flush the software queues and check again.
|
||||
* If we already have a flushed request, then we just need to get a
|
||||
* token for it. Otherwise, if there are pending requests in the kcqs,
|
||||
* flush the kcqs, but only if we can get a token. If not, we should
|
||||
* leave the requests in the kcqs so that they can be merged. Note that
|
||||
* khd->lock serializes the flushes, so if we observed any bit set in
|
||||
* the kcq_map, we will always get a request.
|
||||
*/
|
||||
if (!rq && !*flushed) {
|
||||
kyber_flush_busy_ctxs(khd, hctx);
|
||||
*flushed = true;
|
||||
rq = list_first_entry_or_null(rqs, struct request, queuelist);
|
||||
}
|
||||
|
||||
rq = list_first_entry_or_null(rqs, struct request, queuelist);
|
||||
if (rq) {
|
||||
nr = kyber_get_domain_token(kqd, khd, hctx);
|
||||
if (nr >= 0) {
|
||||
|
@ -598,6 +714,16 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
|
|||
list_del_init(&rq->queuelist);
|
||||
return rq;
|
||||
}
|
||||
} else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) {
|
||||
nr = kyber_get_domain_token(kqd, khd, hctx);
|
||||
if (nr >= 0) {
|
||||
kyber_flush_busy_kcqs(khd, khd->cur_domain, rqs);
|
||||
rq = list_first_entry(rqs, struct request, queuelist);
|
||||
khd->batching++;
|
||||
rq_set_domain_token(rq, nr);
|
||||
list_del_init(&rq->queuelist);
|
||||
return rq;
|
||||
}
|
||||
}
|
||||
|
||||
/* There were either no pending requests or no tokens. */
|
||||
|
@ -608,7 +734,6 @@ static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
|
|||
{
|
||||
struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
|
||||
struct kyber_hctx_data *khd = hctx->sched_data;
|
||||
bool flushed = false;
|
||||
struct request *rq;
|
||||
int i;
|
||||
|
||||
|
@ -619,7 +744,7 @@ static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
|
|||
* from the batch.
|
||||
*/
|
||||
if (khd->batching < kyber_batch_size[khd->cur_domain]) {
|
||||
rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
|
||||
rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
|
||||
if (rq)
|
||||
goto out;
|
||||
}
|
||||
|
@ -640,7 +765,7 @@ static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
|
|||
else
|
||||
khd->cur_domain++;
|
||||
|
||||
rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
|
||||
rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
|
||||
if (rq)
|
||||
goto out;
|
||||
}
|
||||
|
@ -657,10 +782,12 @@ static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
|
||||
if (!list_empty_careful(&khd->rqs[i]))
|
||||
if (!list_empty_careful(&khd->rqs[i]) ||
|
||||
sbitmap_any_bit_set(&khd->kcq_map[i]))
|
||||
return true;
|
||||
}
|
||||
return sbitmap_any_bit_set(&hctx->ctx_map);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#define KYBER_LAT_SHOW_STORE(op) \
|
||||
|
@ -831,7 +958,9 @@ static struct elevator_type kyber_sched = {
|
|||
.init_hctx = kyber_init_hctx,
|
||||
.exit_hctx = kyber_exit_hctx,
|
||||
.limit_depth = kyber_limit_depth,
|
||||
.bio_merge = kyber_bio_merge,
|
||||
.prepare_request = kyber_prepare_request,
|
||||
.insert_requests = kyber_insert_requests,
|
||||
.finish_request = kyber_finish_request,
|
||||
.requeue_request = kyber_finish_request,
|
||||
.completed_request = kyber_completed_request,
|
||||
|
|
|
@ -630,8 +630,7 @@ STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
|
|||
#undef STORE_FUNCTION
|
||||
|
||||
#define DD_ATTR(name) \
|
||||
__ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
|
||||
deadline_##name##_store)
|
||||
__ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
|
||||
|
||||
static struct elv_fs_entry deadline_attrs[] = {
|
||||
DD_ATTR(read_expire),
|
||||
|
|
|
@ -179,18 +179,17 @@ ssize_t part_fail_store(struct device *dev,
|
|||
}
|
||||
#endif
|
||||
|
||||
static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
|
||||
static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
|
||||
static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
|
||||
static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL);
|
||||
static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
|
||||
static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
|
||||
NULL);
|
||||
static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
|
||||
static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
|
||||
static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
|
||||
static DEVICE_ATTR(start, 0444, part_start_show, NULL);
|
||||
static DEVICE_ATTR(size, 0444, part_size_show, NULL);
|
||||
static DEVICE_ATTR(ro, 0444, part_ro_show, NULL);
|
||||
static DEVICE_ATTR(alignment_offset, 0444, part_alignment_offset_show, NULL);
|
||||
static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL);
|
||||
static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
|
||||
static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
|
||||
#ifdef CONFIG_FAIL_MAKE_REQUEST
|
||||
static struct device_attribute dev_attr_fail =
|
||||
__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
|
||||
__ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
|
||||
#endif
|
||||
|
||||
static struct attribute *part_attrs[] = {
|
||||
|
@ -291,8 +290,7 @@ static ssize_t whole_disk_show(struct device *dev,
|
|||
{
|
||||
return 0;
|
||||
}
|
||||
static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
|
||||
whole_disk_show, NULL);
|
||||
static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
|
||||
|
||||
/*
|
||||
* Must be called either with bd_mutex held, before a disk can be opened or
|
||||
|
@ -518,7 +516,7 @@ rescan:
|
|||
|
||||
if (disk->fops->revalidate_disk)
|
||||
disk->fops->revalidate_disk(disk);
|
||||
check_disk_size_change(disk, bdev);
|
||||
check_disk_size_change(disk, bdev, true);
|
||||
bdev->bd_invalidated = 0;
|
||||
if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
|
||||
return 0;
|
||||
|
@ -643,7 +641,7 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
|
|||
return res;
|
||||
|
||||
set_capacity(disk, 0);
|
||||
check_disk_size_change(disk, bdev);
|
||||
check_disk_size_change(disk, bdev, false);
|
||||
bdev->bd_invalidated = 0;
|
||||
/* tell userspace that the media / partition table may have changed */
|
||||
kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
|
||||
|
|
|
@ -321,8 +321,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
|
|||
at_head = 1;
|
||||
|
||||
ret = -ENOMEM;
|
||||
rq = blk_get_request(q, writing ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
|
||||
GFP_KERNEL);
|
||||
rq = blk_get_request(q, writing ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
|
||||
if (IS_ERR(rq))
|
||||
return PTR_ERR(rq);
|
||||
req = scsi_req(rq);
|
||||
|
@ -449,8 +448,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
|
|||
|
||||
}
|
||||
|
||||
rq = blk_get_request(q, in_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
|
||||
__GFP_RECLAIM);
|
||||
rq = blk_get_request(q, in_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
|
||||
if (IS_ERR(rq)) {
|
||||
err = PTR_ERR(rq);
|
||||
goto error_free_buffer;
|
||||
|
@ -501,7 +499,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
|
|||
break;
|
||||
}
|
||||
|
||||
if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_RECLAIM)) {
|
||||
if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, GFP_NOIO)) {
|
||||
err = DRIVER_ERROR << 24;
|
||||
goto error;
|
||||
}
|
||||
|
@ -538,7 +536,7 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
|
|||
struct request *rq;
|
||||
int err;
|
||||
|
||||
rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM);
|
||||
rq = blk_get_request(q, REQ_OP_SCSI_OUT, 0);
|
||||
if (IS_ERR(rq))
|
||||
return PTR_ERR(rq);
|
||||
rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
|
||||
|
|
|
@ -500,57 +500,6 @@ void ata_eh_release(struct ata_port *ap)
|
|||
mutex_unlock(&ap->host->eh_mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* ata_scsi_timed_out - SCSI layer time out callback
|
||||
* @cmd: timed out SCSI command
|
||||
*
|
||||
* Handles SCSI layer timeout. We race with normal completion of
|
||||
* the qc for @cmd. If the qc is already gone, we lose and let
|
||||
* the scsi command finish (EH_HANDLED). Otherwise, the qc has
|
||||
* timed out and EH should be invoked. Prevent ata_qc_complete()
|
||||
* from finishing it by setting EH_SCHEDULED and return
|
||||
* EH_NOT_HANDLED.
|
||||
*
|
||||
* TODO: kill this function once old EH is gone.
|
||||
*
|
||||
* LOCKING:
|
||||
* Called from timer context
|
||||
*
|
||||
* RETURNS:
|
||||
* EH_HANDLED or EH_NOT_HANDLED
|
||||
*/
|
||||
enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
|
||||
{
|
||||
struct Scsi_Host *host = cmd->device->host;
|
||||
struct ata_port *ap = ata_shost_to_port(host);
|
||||
unsigned long flags;
|
||||
struct ata_queued_cmd *qc;
|
||||
enum blk_eh_timer_return ret;
|
||||
|
||||
DPRINTK("ENTER\n");
|
||||
|
||||
if (ap->ops->error_handler) {
|
||||
ret = BLK_EH_NOT_HANDLED;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = BLK_EH_HANDLED;
|
||||
spin_lock_irqsave(ap->lock, flags);
|
||||
qc = ata_qc_from_tag(ap, ap->link.active_tag);
|
||||
if (qc) {
|
||||
WARN_ON(qc->scsicmd != cmd);
|
||||
qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
|
||||
qc->err_mask |= AC_ERR_TIMEOUT;
|
||||
ret = BLK_EH_NOT_HANDLED;
|
||||
}
|
||||
spin_unlock_irqrestore(ap->lock, flags);
|
||||
|
||||
out:
|
||||
DPRINTK("EXIT, ret=%d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(ata_scsi_timed_out);
|
||||
|
||||
static void ata_eh_unload(struct ata_port *ap)
|
||||
{
|
||||
struct ata_link *link;
|
||||
|
|
|
@ -1179,7 +1179,6 @@ static bool DAC960_V1_EnableMemoryMailboxInterface(DAC960_Controller_T
|
|||
|
||||
if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
|
||||
return DAC960_Failure(Controller, "DMA mask out of range");
|
||||
Controller->BounceBufferLimit = DMA_BIT_MASK(32);
|
||||
|
||||
if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) {
|
||||
CommandMailboxesSize = 0;
|
||||
|
@ -1380,11 +1379,8 @@ static bool DAC960_V2_EnableMemoryMailboxInterface(DAC960_Controller_T
|
|||
dma_addr_t CommandMailboxDMA;
|
||||
DAC960_V2_CommandStatus_T CommandStatus;
|
||||
|
||||
if (!pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(64)))
|
||||
Controller->BounceBufferLimit = DMA_BIT_MASK(64);
|
||||
else if (!pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
|
||||
Controller->BounceBufferLimit = DMA_BIT_MASK(32);
|
||||
else
|
||||
if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(64)) &&
|
||||
pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
|
||||
return DAC960_Failure(Controller, "DMA mask out of range");
|
||||
|
||||
/* This is a temporary dma mapping, used only in the scope of this function */
|
||||
|
@ -2540,7 +2536,6 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
|
|||
continue;
|
||||
}
|
||||
Controller->RequestQueue[n] = RequestQueue;
|
||||
blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit);
|
||||
RequestQueue->queuedata = Controller;
|
||||
blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit);
|
||||
blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand);
|
||||
|
@ -6594,7 +6589,7 @@ static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller)
|
|||
DAC960_ProcDirectoryEntry);
|
||||
proc_create_data("initial_status", 0, ControllerProcEntry, &dac960_initial_status_proc_fops, Controller);
|
||||
proc_create_data("current_status", 0, ControllerProcEntry, &dac960_current_status_proc_fops, Controller);
|
||||
proc_create_data("user_command", S_IWUSR | S_IRUSR, ControllerProcEntry, &dac960_user_command_proc_fops, Controller);
|
||||
proc_create_data("user_command", 0600, ControllerProcEntry, &dac960_user_command_proc_fops, Controller);
|
||||
Controller->ControllerProcEntry = ControllerProcEntry;
|
||||
}
|
||||
|
||||
|
|
|
@ -2295,7 +2295,6 @@ typedef struct DAC960_Controller
|
|||
unsigned short MaxBlocksPerCommand;
|
||||
unsigned short ControllerScatterGatherLimit;
|
||||
unsigned short DriverScatterGatherLimit;
|
||||
u64 BounceBufferLimit;
|
||||
unsigned int CombinedStatusBufferLength;
|
||||
unsigned int InitialStatusLength;
|
||||
unsigned int CurrentStatusLength;
|
||||
|
|
|
@ -159,14 +159,14 @@ static int aoe_debugfs_open(struct inode *inode, struct file *file)
|
|||
return single_open(file, aoedisk_debugfs_show, inode->i_private);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
|
||||
static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
|
||||
static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
|
||||
static DEVICE_ATTR(state, 0444, aoedisk_show_state, NULL);
|
||||
static DEVICE_ATTR(mac, 0444, aoedisk_show_mac, NULL);
|
||||
static DEVICE_ATTR(netif, 0444, aoedisk_show_netif, NULL);
|
||||
static struct device_attribute dev_attr_firmware_version = {
|
||||
.attr = { .name = "firmware-version", .mode = S_IRUGO },
|
||||
.attr = { .name = "firmware-version", .mode = 0444 },
|
||||
.show = aoedisk_show_fwver,
|
||||
};
|
||||
static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL);
|
||||
static DEVICE_ATTR(payload, 0444, aoedisk_show_payload, NULL);
|
||||
|
||||
static struct attribute *aoe_attrs[] = {
|
||||
&dev_attr_state.attr,
|
||||
|
@ -388,7 +388,6 @@ aoeblk_gdalloc(void *vp)
|
|||
d->aoemajor, d->aoeminor);
|
||||
goto err_mempool;
|
||||
}
|
||||
blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
|
||||
|
||||
spin_lock_irqsave(&d->lock, flags);
|
||||
WARN_ON(!(d->flags & DEVFL_GD_NOW));
|
||||
|
|
|
@ -1032,8 +1032,9 @@ bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
|
|||
iter.bi_size = cnt;
|
||||
|
||||
__bio_for_each_segment(bv, bio, iter, iter) {
|
||||
char *p = page_address(bv.bv_page) + bv.bv_offset;
|
||||
char *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
|
||||
skb_copy_bits(skb, soff, p, bv.bv_len);
|
||||
kunmap_atomic(p);
|
||||
soff += bv.bv_len;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -331,15 +331,15 @@ static const struct block_device_operations brd_fops = {
|
|||
* And now the modules code and kernel interface.
|
||||
*/
|
||||
static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
|
||||
module_param(rd_nr, int, S_IRUGO);
|
||||
module_param(rd_nr, int, 0444);
|
||||
MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
|
||||
|
||||
unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
|
||||
module_param(rd_size, ulong, S_IRUGO);
|
||||
module_param(rd_size, ulong, 0444);
|
||||
MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
|
||||
|
||||
static int max_part = 1;
|
||||
module_param(max_part, int, S_IRUGO);
|
||||
module_param(max_part, int, 0444);
|
||||
MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@ -402,6 +402,10 @@ static struct brd_device *brd_alloc(int i)
|
|||
set_capacity(disk, rd_size * 2);
|
||||
disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
|
||||
|
||||
/* Tell the block layer that this is not a rotational device */
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
|
||||
|
||||
return brd;
|
||||
|
||||
out_free_queue:
|
||||
|
|
|
@ -977,7 +977,7 @@ static void drbd_bm_endio(struct bio *bio)
|
|||
bm_page_unlock_io(device, idx);
|
||||
|
||||
if (ctx->flags & BM_AIO_COPY_PAGES)
|
||||
mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool);
|
||||
mempool_free(bio->bi_io_vec[0].bv_page, &drbd_md_io_page_pool);
|
||||
|
||||
bio_put(bio);
|
||||
|
||||
|
@ -1014,7 +1014,8 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
|
|||
bm_set_page_unchanged(b->bm_pages[page_nr]);
|
||||
|
||||
if (ctx->flags & BM_AIO_COPY_PAGES) {
|
||||
page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_RECLAIM);
|
||||
page = mempool_alloc(&drbd_md_io_page_pool,
|
||||
GFP_NOIO | __GFP_HIGHMEM);
|
||||
copy_highpage(page, b->bm_pages[page_nr]);
|
||||
bm_store_page_idx(page, page_nr);
|
||||
} else
|
||||
|
|
|
@ -481,9 +481,9 @@ void drbd_debugfs_resource_add(struct drbd_resource *resource)
|
|||
goto fail;
|
||||
resource->debugfs_res_connections = dentry;
|
||||
|
||||
dentry = debugfs_create_file("in_flight_summary", S_IRUSR|S_IRGRP,
|
||||
resource->debugfs_res, resource,
|
||||
&in_flight_summary_fops);
|
||||
dentry = debugfs_create_file("in_flight_summary", 0440,
|
||||
resource->debugfs_res, resource,
|
||||
&in_flight_summary_fops);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
resource->debugfs_res_in_flight_summary = dentry;
|
||||
|
@ -645,16 +645,16 @@ void drbd_debugfs_connection_add(struct drbd_connection *connection)
|
|||
goto fail;
|
||||
connection->debugfs_conn = dentry;
|
||||
|
||||
dentry = debugfs_create_file("callback_history", S_IRUSR|S_IRGRP,
|
||||
connection->debugfs_conn, connection,
|
||||
&connection_callback_history_fops);
|
||||
dentry = debugfs_create_file("callback_history", 0440,
|
||||
connection->debugfs_conn, connection,
|
||||
&connection_callback_history_fops);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
connection->debugfs_conn_callback_history = dentry;
|
||||
|
||||
dentry = debugfs_create_file("oldest_requests", S_IRUSR|S_IRGRP,
|
||||
connection->debugfs_conn, connection,
|
||||
&connection_oldest_requests_fops);
|
||||
dentry = debugfs_create_file("oldest_requests", 0440,
|
||||
connection->debugfs_conn, connection,
|
||||
&connection_oldest_requests_fops);
|
||||
if (IS_ERR_OR_NULL(dentry))
|
||||
goto fail;
|
||||
connection->debugfs_conn_oldest_requests = dentry;
|
||||
|
@ -824,7 +824,7 @@ void drbd_debugfs_device_add(struct drbd_device *device)
|
|||
device->debugfs_minor = dentry;
|
||||
|
||||
#define DCF(name) do { \
|
||||
dentry = debugfs_create_file(#name, S_IRUSR|S_IRGRP, \
|
||||
dentry = debugfs_create_file(#name, 0440, \
|
||||
device->debugfs_vol, device, \
|
||||
&device_ ## name ## _fops); \
|
||||
if (IS_ERR_OR_NULL(dentry)) \
|
||||
|
|
|
@ -1405,8 +1405,8 @@ extern struct kmem_cache *drbd_request_cache;
|
|||
extern struct kmem_cache *drbd_ee_cache; /* peer requests */
|
||||
extern struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
|
||||
extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
|
||||
extern mempool_t *drbd_request_mempool;
|
||||
extern mempool_t *drbd_ee_mempool;
|
||||
extern mempool_t drbd_request_mempool;
|
||||
extern mempool_t drbd_ee_mempool;
|
||||
|
||||
/* drbd's page pool, used to buffer data received from the peer,
|
||||
* or data requested by the peer.
|
||||
|
@ -1432,16 +1432,16 @@ extern wait_queue_head_t drbd_pp_wait;
|
|||
* 128 should be plenty, currently we probably can get away with as few as 1.
|
||||
*/
|
||||
#define DRBD_MIN_POOL_PAGES 128
|
||||
extern mempool_t *drbd_md_io_page_pool;
|
||||
extern mempool_t drbd_md_io_page_pool;
|
||||
|
||||
/* We also need to make sure we get a bio
|
||||
* when we need it for housekeeping purposes */
|
||||
extern struct bio_set *drbd_md_io_bio_set;
|
||||
extern struct bio_set drbd_md_io_bio_set;
|
||||
/* to allocate from that set */
|
||||
extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
|
||||
|
||||
/* And a bio_set for cloning */
|
||||
extern struct bio_set *drbd_io_bio_set;
|
||||
extern struct bio_set drbd_io_bio_set;
|
||||
|
||||
extern struct mutex resources_mutex;
|
||||
|
||||
|
|
|
@ -124,11 +124,11 @@ struct kmem_cache *drbd_request_cache;
|
|||
struct kmem_cache *drbd_ee_cache; /* peer requests */
|
||||
struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
|
||||
struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
|
||||
mempool_t *drbd_request_mempool;
|
||||
mempool_t *drbd_ee_mempool;
|
||||
mempool_t *drbd_md_io_page_pool;
|
||||
struct bio_set *drbd_md_io_bio_set;
|
||||
struct bio_set *drbd_io_bio_set;
|
||||
mempool_t drbd_request_mempool;
|
||||
mempool_t drbd_ee_mempool;
|
||||
mempool_t drbd_md_io_page_pool;
|
||||
struct bio_set drbd_md_io_bio_set;
|
||||
struct bio_set drbd_io_bio_set;
|
||||
|
||||
/* I do not use a standard mempool, because:
|
||||
1) I want to hand out the pre-allocated objects first.
|
||||
|
@ -153,10 +153,10 @@ struct bio *bio_alloc_drbd(gfp_t gfp_mask)
|
|||
{
|
||||
struct bio *bio;
|
||||
|
||||
if (!drbd_md_io_bio_set)
|
||||
if (!bioset_initialized(&drbd_md_io_bio_set))
|
||||
return bio_alloc(gfp_mask, 1);
|
||||
|
||||
bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
|
||||
bio = bio_alloc_bioset(gfp_mask, 1, &drbd_md_io_bio_set);
|
||||
if (!bio)
|
||||
return NULL;
|
||||
return bio;
|
||||
|
@ -2097,16 +2097,11 @@ static void drbd_destroy_mempools(void)
|
|||
|
||||
/* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
|
||||
|
||||
if (drbd_io_bio_set)
|
||||
bioset_free(drbd_io_bio_set);
|
||||
if (drbd_md_io_bio_set)
|
||||
bioset_free(drbd_md_io_bio_set);
|
||||
if (drbd_md_io_page_pool)
|
||||
mempool_destroy(drbd_md_io_page_pool);
|
||||
if (drbd_ee_mempool)
|
||||
mempool_destroy(drbd_ee_mempool);
|
||||
if (drbd_request_mempool)
|
||||
mempool_destroy(drbd_request_mempool);
|
||||
bioset_exit(&drbd_io_bio_set);
|
||||
bioset_exit(&drbd_md_io_bio_set);
|
||||
mempool_exit(&drbd_md_io_page_pool);
|
||||
mempool_exit(&drbd_ee_mempool);
|
||||
mempool_exit(&drbd_request_mempool);
|
||||
if (drbd_ee_cache)
|
||||
kmem_cache_destroy(drbd_ee_cache);
|
||||
if (drbd_request_cache)
|
||||
|
@ -2116,11 +2111,6 @@ static void drbd_destroy_mempools(void)
|
|||
if (drbd_al_ext_cache)
|
||||
kmem_cache_destroy(drbd_al_ext_cache);
|
||||
|
||||
drbd_io_bio_set = NULL;
|
||||
drbd_md_io_bio_set = NULL;
|
||||
drbd_md_io_page_pool = NULL;
|
||||
drbd_ee_mempool = NULL;
|
||||
drbd_request_mempool = NULL;
|
||||
drbd_ee_cache = NULL;
|
||||
drbd_request_cache = NULL;
|
||||
drbd_bm_ext_cache = NULL;
|
||||
|
@ -2133,18 +2123,7 @@ static int drbd_create_mempools(void)
|
|||
{
|
||||
struct page *page;
|
||||
const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count;
|
||||
int i;
|
||||
|
||||
/* prepare our caches and mempools */
|
||||
drbd_request_mempool = NULL;
|
||||
drbd_ee_cache = NULL;
|
||||
drbd_request_cache = NULL;
|
||||
drbd_bm_ext_cache = NULL;
|
||||
drbd_al_ext_cache = NULL;
|
||||
drbd_pp_pool = NULL;
|
||||
drbd_md_io_page_pool = NULL;
|
||||
drbd_md_io_bio_set = NULL;
|
||||
drbd_io_bio_set = NULL;
|
||||
int i, ret;
|
||||
|
||||
/* caches */
|
||||
drbd_request_cache = kmem_cache_create(
|
||||
|
@ -2168,26 +2147,26 @@ static int drbd_create_mempools(void)
|
|||
goto Enomem;
|
||||
|
||||
/* mempools */
|
||||
drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
|
||||
if (drbd_io_bio_set == NULL)
|
||||
ret = bioset_init(&drbd_io_bio_set, BIO_POOL_SIZE, 0, 0);
|
||||
if (ret)
|
||||
goto Enomem;
|
||||
|
||||
drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
|
||||
BIOSET_NEED_BVECS);
|
||||
if (drbd_md_io_bio_set == NULL)
|
||||
ret = bioset_init(&drbd_md_io_bio_set, DRBD_MIN_POOL_PAGES, 0,
|
||||
BIOSET_NEED_BVECS);
|
||||
if (ret)
|
||||
goto Enomem;
|
||||
|
||||
drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0);
|
||||
if (drbd_md_io_page_pool == NULL)
|
||||
ret = mempool_init_page_pool(&drbd_md_io_page_pool, DRBD_MIN_POOL_PAGES, 0);
|
||||
if (ret)
|
||||
goto Enomem;
|
||||
|
||||
drbd_request_mempool = mempool_create_slab_pool(number,
|
||||
drbd_request_cache);
|
||||
if (drbd_request_mempool == NULL)
|
||||
ret = mempool_init_slab_pool(&drbd_request_mempool, number,
|
||||
drbd_request_cache);
|
||||
if (ret)
|
||||
goto Enomem;
|
||||
|
||||
drbd_ee_mempool = mempool_create_slab_pool(number, drbd_ee_cache);
|
||||
if (drbd_ee_mempool == NULL)
|
||||
ret = mempool_init_slab_pool(&drbd_ee_mempool, number, drbd_ee_cache);
|
||||
if (ret)
|
||||
goto Enomem;
|
||||
|
||||
/* drbd's page pool */
|
||||
|
@ -3010,7 +2989,7 @@ static int __init drbd_init(void)
|
|||
goto fail;
|
||||
|
||||
err = -ENOMEM;
|
||||
drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
|
||||
drbd_proc = proc_create_data("drbd", S_IFREG | 0444 , NULL, &drbd_proc_fops, NULL);
|
||||
if (!drbd_proc) {
|
||||
pr_err("unable to register proc file\n");
|
||||
goto fail;
|
||||
|
|
|
@ -378,7 +378,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
|
|||
if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
|
||||
return NULL;
|
||||
|
||||
peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
|
||||
peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
|
||||
if (!peer_req) {
|
||||
if (!(gfp_mask & __GFP_NOWARN))
|
||||
drbd_err(device, "%s: allocation failed\n", __func__);
|
||||
|
@ -409,7 +409,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
|
|||
return peer_req;
|
||||
|
||||
fail:
|
||||
mempool_free(peer_req, drbd_ee_mempool);
|
||||
mempool_free(peer_req, &drbd_ee_mempool);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -426,7 +426,7 @@ void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *
|
|||
peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
|
||||
drbd_al_complete_io(device, &peer_req->i);
|
||||
}
|
||||
mempool_free(peer_req, drbd_ee_mempool);
|
||||
mempool_free(peer_req, &drbd_ee_mempool);
|
||||
}
|
||||
|
||||
int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
|
||||
|
|
|
@ -55,7 +55,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
|
|||
{
|
||||
struct drbd_request *req;
|
||||
|
||||
req = mempool_alloc(drbd_request_mempool, GFP_NOIO);
|
||||
req = mempool_alloc(&drbd_request_mempool, GFP_NOIO);
|
||||
if (!req)
|
||||
return NULL;
|
||||
memset(req, 0, sizeof(*req));
|
||||
|
@ -184,7 +184,7 @@ void drbd_req_destroy(struct kref *kref)
|
|||
}
|
||||
}
|
||||
|
||||
mempool_free(req, drbd_request_mempool);
|
||||
mempool_free(req, &drbd_request_mempool);
|
||||
}
|
||||
|
||||
static void wake_all_senders(struct drbd_connection *connection)
|
||||
|
|
|
@ -269,7 +269,7 @@ enum drbd_req_state_bits {
|
|||
static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
|
||||
{
|
||||
struct bio *bio;
|
||||
bio = bio_clone_fast(bio_src, GFP_NOIO, drbd_io_bio_set);
|
||||
bio = bio_clone_fast(bio_src, GFP_NOIO, &drbd_io_bio_set);
|
||||
|
||||
req->private_bio = bio;
|
||||
|
||||
|
|
|
@ -4450,7 +4450,7 @@ static ssize_t floppy_cmos_show(struct device *dev,
|
|||
return sprintf(buf, "%X\n", UDP->cmos);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(cmos, S_IRUGO, floppy_cmos_show, NULL);
|
||||
static DEVICE_ATTR(cmos, 0444, floppy_cmos_show, NULL);
|
||||
|
||||
static struct attribute *floppy_dev_attrs[] = {
|
||||
&dev_attr_cmos.attr,
|
||||
|
|
|
@ -732,7 +732,7 @@ static ssize_t loop_attr_do_show_##_name(struct device *d, \
|
|||
return loop_attr_show(d, b, loop_attr_##_name##_show); \
|
||||
} \
|
||||
static struct device_attribute loop_attr_##_name = \
|
||||
__ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL);
|
||||
__ATTR(_name, 0444, loop_attr_do_show_##_name, NULL);
|
||||
|
||||
static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
|
||||
{
|
||||
|
@ -809,16 +809,17 @@ static struct attribute_group loop_attribute_group = {
|
|||
.attrs= loop_attrs,
|
||||
};
|
||||
|
||||
static int loop_sysfs_init(struct loop_device *lo)
|
||||
static void loop_sysfs_init(struct loop_device *lo)
|
||||
{
|
||||
return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
|
||||
&loop_attribute_group);
|
||||
lo->sysfs_inited = !sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
|
||||
&loop_attribute_group);
|
||||
}
|
||||
|
||||
static void loop_sysfs_exit(struct loop_device *lo)
|
||||
{
|
||||
sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
|
||||
&loop_attribute_group);
|
||||
if (lo->sysfs_inited)
|
||||
sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
|
||||
&loop_attribute_group);
|
||||
}
|
||||
|
||||
static void loop_config_discard(struct loop_device *lo)
|
||||
|
@ -1677,9 +1678,9 @@ static const struct block_device_operations lo_fops = {
|
|||
* And now the modules code and kernel interface.
|
||||
*/
|
||||
static int max_loop;
|
||||
module_param(max_loop, int, S_IRUGO);
|
||||
module_param(max_loop, int, 0444);
|
||||
MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
|
||||
module_param(max_part, int, S_IRUGO);
|
||||
module_param(max_part, int, 0444);
|
||||
MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
|
||||
|
|
|
@ -58,6 +58,7 @@ struct loop_device {
|
|||
struct kthread_worker worker;
|
||||
struct task_struct *worker_task;
|
||||
bool use_dio;
|
||||
bool sysfs_inited;
|
||||
|
||||
struct request_queue *lo_queue;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
|
|
|
@ -2285,7 +2285,7 @@ static ssize_t mtip_hw_show_status(struct device *dev,
|
|||
return size;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
|
||||
static DEVICE_ATTR(status, 0444, mtip_hw_show_status, NULL);
|
||||
|
||||
/* debugsfs entries */
|
||||
|
||||
|
@ -2566,10 +2566,9 @@ static int mtip_hw_debugfs_init(struct driver_data *dd)
|
|||
return -1;
|
||||
}
|
||||
|
||||
debugfs_create_file("flags", S_IRUGO, dd->dfs_node, dd,
|
||||
&mtip_flags_fops);
|
||||
debugfs_create_file("registers", S_IRUGO, dd->dfs_node, dd,
|
||||
&mtip_regs_fops);
|
||||
debugfs_create_file("flags", 0444, dd->dfs_node, dd, &mtip_flags_fops);
|
||||
debugfs_create_file("registers", 0444, dd->dfs_node, dd,
|
||||
&mtip_regs_fops);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -2726,15 +2725,11 @@ static void mtip_softirq_done_fn(struct request *rq)
|
|||
blk_mq_end_request(rq, cmd->status);
|
||||
}
|
||||
|
||||
static void mtip_abort_cmd(struct request *req, void *data,
|
||||
bool reserved)
|
||||
static void mtip_abort_cmd(struct request *req, void *data, bool reserved)
|
||||
{
|
||||
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
|
||||
struct driver_data *dd = data;
|
||||
|
||||
if (!blk_mq_request_started(req))
|
||||
return;
|
||||
|
||||
dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
|
||||
|
||||
clear_bit(req->tag, dd->port->cmds_to_issue);
|
||||
|
@ -2742,14 +2737,10 @@ static void mtip_abort_cmd(struct request *req, void *data,
|
|||
mtip_softirq_done_fn(req);
|
||||
}
|
||||
|
||||
static void mtip_queue_cmd(struct request *req, void *data,
|
||||
bool reserved)
|
||||
static void mtip_queue_cmd(struct request *req, void *data, bool reserved)
|
||||
{
|
||||
struct driver_data *dd = data;
|
||||
|
||||
if (!blk_mq_request_started(req))
|
||||
return;
|
||||
|
||||
set_bit(req->tag, dd->port->cmds_to_issue);
|
||||
blk_abort_request(req);
|
||||
}
|
||||
|
@ -3720,7 +3711,8 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
|
|||
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
|
||||
|
||||
cmd->status = BLK_STS_TIMEOUT;
|
||||
return BLK_EH_HANDLED;
|
||||
blk_mq_complete_request(req);
|
||||
return BLK_EH_DONE;
|
||||
}
|
||||
|
||||
if (test_bit(req->tag, dd->port->cmds_to_issue))
|
||||
|
@ -3862,7 +3854,6 @@ skip_create_disk:
|
|||
blk_queue_max_hw_sectors(dd->queue, 0xffff);
|
||||
blk_queue_max_segment_size(dd->queue, 0x400000);
|
||||
blk_queue_io_min(dd->queue, 4096);
|
||||
blk_queue_bounce_limit(dd->queue, dd->pdev->dma_mask);
|
||||
|
||||
/* Signal trim support */
|
||||
if (dd->trim_supp == true) {
|
||||
|
@ -4273,7 +4264,7 @@ static int mtip_pci_probe(struct pci_dev *pdev,
|
|||
if (!dd->isr_workq) {
|
||||
dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance);
|
||||
rv = -ENOMEM;
|
||||
goto block_initialize_err;
|
||||
goto setmask_err;
|
||||
}
|
||||
|
||||
memset(cpu_list, 0, sizeof(cpu_list));
|
||||
|
@ -4614,7 +4605,7 @@ static int __init mtip_init(void)
|
|||
}
|
||||
if (dfs_parent) {
|
||||
dfs_device_status = debugfs_create_file("device_status",
|
||||
S_IRUGO, dfs_parent, NULL,
|
||||
0444, dfs_parent, NULL,
|
||||
&mtip_device_status_fops);
|
||||
if (IS_ERR_OR_NULL(dfs_device_status)) {
|
||||
pr_err("Error creating device_status node\n");
|
||||
|
|
|
@ -166,16 +166,19 @@ static ssize_t pid_show(struct device *dev,
|
|||
}
|
||||
|
||||
static const struct device_attribute pid_attr = {
|
||||
.attr = { .name = "pid", .mode = S_IRUGO},
|
||||
.attr = { .name = "pid", .mode = 0444},
|
||||
.show = pid_show,
|
||||
};
|
||||
|
||||
static void nbd_dev_remove(struct nbd_device *nbd)
|
||||
{
|
||||
struct gendisk *disk = nbd->disk;
|
||||
struct request_queue *q;
|
||||
|
||||
if (disk) {
|
||||
q = disk->queue;
|
||||
del_gendisk(disk);
|
||||
blk_cleanup_queue(disk->queue);
|
||||
blk_cleanup_queue(q);
|
||||
blk_mq_free_tag_set(&nbd->tag_set);
|
||||
disk->private_data = NULL;
|
||||
put_disk(disk);
|
||||
|
@ -213,7 +216,15 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
|
|||
}
|
||||
if (!nsock->dead) {
|
||||
kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
|
||||
atomic_dec(&nbd->config->live_connections);
|
||||
if (atomic_dec_return(&nbd->config->live_connections) == 0) {
|
||||
if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED,
|
||||
&nbd->config->runtime_flags)) {
|
||||
set_bit(NBD_DISCONNECTED,
|
||||
&nbd->config->runtime_flags);
|
||||
dev_info(nbd_to_dev(nbd),
|
||||
"Disconnected due to user request.\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
nsock->dead = true;
|
||||
nsock->pending = NULL;
|
||||
|
@ -231,9 +242,22 @@ static void nbd_size_clear(struct nbd_device *nbd)
|
|||
static void nbd_size_update(struct nbd_device *nbd)
|
||||
{
|
||||
struct nbd_config *config = nbd->config;
|
||||
struct block_device *bdev = bdget_disk(nbd->disk, 0);
|
||||
|
||||
if (config->flags & NBD_FLAG_SEND_TRIM) {
|
||||
nbd->disk->queue->limits.discard_granularity = config->blksize;
|
||||
blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
|
||||
}
|
||||
blk_queue_logical_block_size(nbd->disk->queue, config->blksize);
|
||||
blk_queue_physical_block_size(nbd->disk->queue, config->blksize);
|
||||
set_capacity(nbd->disk, config->bytesize >> 9);
|
||||
if (bdev) {
|
||||
if (bdev->bd_disk)
|
||||
bd_set_size(bdev, config->bytesize);
|
||||
else
|
||||
bdev->bd_invalidated = 1;
|
||||
bdput(bdev);
|
||||
}
|
||||
kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
|
||||
}
|
||||
|
||||
|
@ -243,6 +267,8 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize,
|
|||
struct nbd_config *config = nbd->config;
|
||||
config->blksize = blocksize;
|
||||
config->bytesize = blocksize * nr_blocks;
|
||||
if (nbd->task_recv != NULL)
|
||||
nbd_size_update(nbd);
|
||||
}
|
||||
|
||||
static void nbd_complete_rq(struct request *req)
|
||||
|
@ -286,13 +312,15 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
|
|||
|
||||
if (!refcount_inc_not_zero(&nbd->config_refs)) {
|
||||
cmd->status = BLK_STS_TIMEOUT;
|
||||
return BLK_EH_HANDLED;
|
||||
goto done;
|
||||
}
|
||||
config = nbd->config;
|
||||
|
||||
if (config->num_connections > 1) {
|
||||
dev_err_ratelimited(nbd_to_dev(nbd),
|
||||
"Connection timed out, retrying\n");
|
||||
"Connection timed out, retrying (%d/%d alive)\n",
|
||||
atomic_read(&config->live_connections),
|
||||
config->num_connections);
|
||||
/*
|
||||
* Hooray we have more connections, requeue this IO, the submit
|
||||
* path will put it on a real connection.
|
||||
|
@ -314,7 +342,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
|
|||
}
|
||||
blk_mq_requeue_request(req, true);
|
||||
nbd_config_put(nbd);
|
||||
return BLK_EH_NOT_HANDLED;
|
||||
return BLK_EH_DONE;
|
||||
}
|
||||
} else {
|
||||
dev_err_ratelimited(nbd_to_dev(nbd),
|
||||
|
@ -324,8 +352,9 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
|
|||
cmd->status = BLK_STS_IOERR;
|
||||
sock_shutdown(nbd);
|
||||
nbd_config_put(nbd);
|
||||
|
||||
return BLK_EH_HANDLED;
|
||||
done:
|
||||
blk_mq_complete_request(req);
|
||||
return BLK_EH_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -647,11 +676,8 @@ static void recv_work(struct work_struct *work)
|
|||
|
||||
static void nbd_clear_req(struct request *req, void *data, bool reserved)
|
||||
{
|
||||
struct nbd_cmd *cmd;
|
||||
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
|
||||
|
||||
if (!blk_mq_request_started(req))
|
||||
return;
|
||||
cmd = blk_mq_rq_to_pdu(req);
|
||||
cmd->status = BLK_STS_IOERR;
|
||||
blk_mq_complete_request(req);
|
||||
}
|
||||
|
@ -714,10 +740,9 @@ static int wait_for_reconnect(struct nbd_device *nbd)
|
|||
return 0;
|
||||
if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
|
||||
return 0;
|
||||
wait_event_timeout(config->conn_wait,
|
||||
atomic_read(&config->live_connections),
|
||||
config->dead_conn_timeout);
|
||||
return atomic_read(&config->live_connections);
|
||||
return wait_event_timeout(config->conn_wait,
|
||||
atomic_read(&config->live_connections) > 0,
|
||||
config->dead_conn_timeout) > 0;
|
||||
}
|
||||
|
||||
static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
|
||||
|
@ -950,10 +975,6 @@ static void nbd_bdev_reset(struct block_device *bdev)
|
|||
if (bdev->bd_openers > 1)
|
||||
return;
|
||||
bd_set_size(bdev, 0);
|
||||
if (max_part > 0) {
|
||||
blkdev_reread_part(bdev);
|
||||
bdev->bd_invalidated = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void nbd_parse_flags(struct nbd_device *nbd)
|
||||
|
@ -1040,6 +1061,8 @@ static void nbd_config_put(struct nbd_device *nbd)
|
|||
nbd->config = NULL;
|
||||
|
||||
nbd->tag_set.timeout = 0;
|
||||
nbd->disk->queue->limits.discard_granularity = 0;
|
||||
blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
|
||||
|
||||
mutex_unlock(&nbd->config_lock);
|
||||
|
@ -1109,7 +1132,6 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
bd_set_size(bdev, config->bytesize);
|
||||
if (max_part)
|
||||
bdev->bd_invalidated = 1;
|
||||
mutex_unlock(&nbd->config_lock);
|
||||
|
@ -1118,7 +1140,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
|
|||
if (ret)
|
||||
sock_shutdown(nbd);
|
||||
mutex_lock(&nbd->config_lock);
|
||||
bd_set_size(bdev, 0);
|
||||
nbd_bdev_reset(bdev);
|
||||
/* user requested, ignore socket errors */
|
||||
if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags))
|
||||
ret = 0;
|
||||
|
@ -1269,6 +1291,9 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
|
|||
refcount_set(&nbd->config_refs, 1);
|
||||
refcount_inc(&nbd->refs);
|
||||
mutex_unlock(&nbd->config_lock);
|
||||
bdev->bd_invalidated = 1;
|
||||
} else if (nbd_disconnected(nbd->config)) {
|
||||
bdev->bd_invalidated = 1;
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&nbd_index_mutex);
|
||||
|
@ -1490,8 +1515,8 @@ static int nbd_dev_add(int index)
|
|||
*/
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
|
||||
disk->queue->limits.discard_granularity = 512;
|
||||
blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
|
||||
disk->queue->limits.discard_granularity = 0;
|
||||
blk_queue_max_discard_sectors(disk->queue, 0);
|
||||
blk_queue_max_segment_size(disk->queue, UINT_MAX);
|
||||
blk_queue_max_segments(disk->queue, USHRT_MAX);
|
||||
blk_queue_max_hw_sectors(disk->queue, 65536);
|
||||
|
@ -1755,6 +1780,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
|
|||
}
|
||||
mutex_lock(&nbd->config_lock);
|
||||
nbd_disconnect(nbd);
|
||||
nbd_clear_sock(nbd);
|
||||
mutex_unlock(&nbd->config_lock);
|
||||
if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
|
||||
&nbd->config->runtime_flags))
|
||||
|
@ -2093,7 +2119,8 @@ static int __init nbd_init(void)
|
|||
if (nbds_max > 1UL << (MINORBITS - part_shift))
|
||||
return -EINVAL;
|
||||
recv_workqueue = alloc_workqueue("knbd-recv",
|
||||
WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
|
||||
WQ_MEM_RECLAIM | WQ_HIGHPRI |
|
||||
WQ_UNBOUND, 0);
|
||||
if (!recv_workqueue)
|
||||
return -ENOMEM;
|
||||
|
||||
|
|
|
@ -157,23 +157,23 @@ enum {
|
|||
};
|
||||
|
||||
static int g_no_sched;
|
||||
module_param_named(no_sched, g_no_sched, int, S_IRUGO);
|
||||
module_param_named(no_sched, g_no_sched, int, 0444);
|
||||
MODULE_PARM_DESC(no_sched, "No io scheduler");
|
||||
|
||||
static int g_submit_queues = 1;
|
||||
module_param_named(submit_queues, g_submit_queues, int, S_IRUGO);
|
||||
module_param_named(submit_queues, g_submit_queues, int, 0444);
|
||||
MODULE_PARM_DESC(submit_queues, "Number of submission queues");
|
||||
|
||||
static int g_home_node = NUMA_NO_NODE;
|
||||
module_param_named(home_node, g_home_node, int, S_IRUGO);
|
||||
module_param_named(home_node, g_home_node, int, 0444);
|
||||
MODULE_PARM_DESC(home_node, "Home node for the device");
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
static char g_timeout_str[80];
|
||||
module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), S_IRUGO);
|
||||
module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
|
||||
|
||||
static char g_requeue_str[80];
|
||||
module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), S_IRUGO);
|
||||
module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
|
||||
#endif
|
||||
|
||||
static int g_queue_mode = NULL_Q_MQ;
|
||||
|
@ -203,27 +203,27 @@ static const struct kernel_param_ops null_queue_mode_param_ops = {
|
|||
.get = param_get_int,
|
||||
};
|
||||
|
||||
device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, S_IRUGO);
|
||||
device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444);
|
||||
MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
|
||||
|
||||
static int g_gb = 250;
|
||||
module_param_named(gb, g_gb, int, S_IRUGO);
|
||||
module_param_named(gb, g_gb, int, 0444);
|
||||
MODULE_PARM_DESC(gb, "Size in GB");
|
||||
|
||||
static int g_bs = 512;
|
||||
module_param_named(bs, g_bs, int, S_IRUGO);
|
||||
module_param_named(bs, g_bs, int, 0444);
|
||||
MODULE_PARM_DESC(bs, "Block size (in bytes)");
|
||||
|
||||
static int nr_devices = 1;
|
||||
module_param(nr_devices, int, S_IRUGO);
|
||||
module_param(nr_devices, int, 0444);
|
||||
MODULE_PARM_DESC(nr_devices, "Number of devices to register");
|
||||
|
||||
static bool g_blocking;
|
||||
module_param_named(blocking, g_blocking, bool, S_IRUGO);
|
||||
module_param_named(blocking, g_blocking, bool, 0444);
|
||||
MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
|
||||
|
||||
static bool shared_tags;
|
||||
module_param(shared_tags, bool, S_IRUGO);
|
||||
module_param(shared_tags, bool, 0444);
|
||||
MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
|
||||
|
||||
static int g_irqmode = NULL_IRQ_SOFTIRQ;
|
||||
|
@ -239,19 +239,19 @@ static const struct kernel_param_ops null_irqmode_param_ops = {
|
|||
.get = param_get_int,
|
||||
};
|
||||
|
||||
device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, S_IRUGO);
|
||||
device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444);
|
||||
MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
|
||||
|
||||
static unsigned long g_completion_nsec = 10000;
|
||||
module_param_named(completion_nsec, g_completion_nsec, ulong, S_IRUGO);
|
||||
module_param_named(completion_nsec, g_completion_nsec, ulong, 0444);
|
||||
MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
|
||||
|
||||
static int g_hw_queue_depth = 64;
|
||||
module_param_named(hw_queue_depth, g_hw_queue_depth, int, S_IRUGO);
|
||||
module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444);
|
||||
MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
|
||||
|
||||
static bool g_use_per_node_hctx;
|
||||
module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, S_IRUGO);
|
||||
module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
|
||||
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
|
||||
|
||||
static struct nullb_device *null_alloc_dev(void);
|
||||
|
@ -1365,7 +1365,8 @@ static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
|
|||
static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq)
|
||||
{
|
||||
pr_info("null: rq %p timed out\n", rq);
|
||||
return BLK_EH_HANDLED;
|
||||
blk_mq_complete_request(rq);
|
||||
return BLK_EH_DONE;
|
||||
}
|
||||
|
||||
static int null_rq_prep_fn(struct request_queue *q, struct request *req)
|
||||
|
@ -1427,7 +1428,8 @@ static void null_request_fn(struct request_queue *q)
|
|||
static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
|
||||
{
|
||||
pr_info("null: rq %p timed out\n", rq);
|
||||
return BLK_EH_HANDLED;
|
||||
blk_mq_complete_request(rq);
|
||||
return BLK_EH_DONE;
|
||||
}
|
||||
|
||||
static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
|
|
|
@ -740,7 +740,7 @@ static int pd_special_command(struct pd_unit *disk,
|
|||
{
|
||||
struct request *rq;
|
||||
|
||||
rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
|
||||
rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, 0);
|
||||
if (IS_ERR(rq))
|
||||
return PTR_ERR(rq);
|
||||
|
||||
|
|
|
@ -97,8 +97,8 @@ static int pktdev_major;
|
|||
static int write_congestion_on = PKT_WRITE_CONGESTION_ON;
|
||||
static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
|
||||
static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */
|
||||
static mempool_t *psd_pool;
|
||||
static struct bio_set *pkt_bio_set;
|
||||
static mempool_t psd_pool;
|
||||
static struct bio_set pkt_bio_set;
|
||||
|
||||
static struct class *class_pktcdvd = NULL; /* /sys/class/pktcdvd */
|
||||
static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
|
||||
|
@ -478,8 +478,8 @@ static void pkt_debugfs_dev_new(struct pktcdvd_device *pd)
|
|||
if (!pd->dfs_d_root)
|
||||
return;
|
||||
|
||||
pd->dfs_f_info = debugfs_create_file("info", S_IRUGO,
|
||||
pd->dfs_d_root, pd, &debug_fops);
|
||||
pd->dfs_f_info = debugfs_create_file("info", 0444,
|
||||
pd->dfs_d_root, pd, &debug_fops);
|
||||
}
|
||||
|
||||
static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd)
|
||||
|
@ -631,7 +631,7 @@ static inline struct pkt_rb_node *pkt_rbtree_next(struct pkt_rb_node *node)
|
|||
static void pkt_rbtree_erase(struct pktcdvd_device *pd, struct pkt_rb_node *node)
|
||||
{
|
||||
rb_erase(&node->rb_node, &pd->bio_queue);
|
||||
mempool_free(node, pd->rb_pool);
|
||||
mempool_free(node, &pd->rb_pool);
|
||||
pd->bio_queue_size--;
|
||||
BUG_ON(pd->bio_queue_size < 0);
|
||||
}
|
||||
|
@ -704,13 +704,13 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
|
|||
int ret = 0;
|
||||
|
||||
rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
|
||||
REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
|
||||
REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
|
||||
if (IS_ERR(rq))
|
||||
return PTR_ERR(rq);
|
||||
|
||||
if (cgc->buflen) {
|
||||
ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
|
||||
__GFP_RECLAIM);
|
||||
GFP_NOIO);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
@ -1285,7 +1285,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
|
|||
* Fill-in bvec with data from orig_bios.
|
||||
*/
|
||||
spin_lock(&pkt->lock);
|
||||
bio_copy_data(pkt->w_bio, pkt->orig_bios.head);
|
||||
bio_list_copy_data(pkt->w_bio, pkt->orig_bios.head);
|
||||
|
||||
pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
|
||||
spin_unlock(&pkt->lock);
|
||||
|
@ -2303,14 +2303,14 @@ static void pkt_end_io_read_cloned(struct bio *bio)
|
|||
psd->bio->bi_status = bio->bi_status;
|
||||
bio_put(bio);
|
||||
bio_endio(psd->bio);
|
||||
mempool_free(psd, psd_pool);
|
||||
mempool_free(psd, &psd_pool);
|
||||
pkt_bio_finished(pd);
|
||||
}
|
||||
|
||||
static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
|
||||
{
|
||||
struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, pkt_bio_set);
|
||||
struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
|
||||
struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, &pkt_bio_set);
|
||||
struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO);
|
||||
|
||||
psd->pd = pd;
|
||||
psd->bio = bio;
|
||||
|
@ -2381,7 +2381,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
|
|||
/*
|
||||
* No matching packet found. Store the bio in the work queue.
|
||||
*/
|
||||
node = mempool_alloc(pd->rb_pool, GFP_NOIO);
|
||||
node = mempool_alloc(&pd->rb_pool, GFP_NOIO);
|
||||
node->bio = bio;
|
||||
spin_lock(&pd->lock);
|
||||
BUG_ON(pd->bio_queue_size < 0);
|
||||
|
@ -2451,7 +2451,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
|
|||
|
||||
split = bio_split(bio, last_zone -
|
||||
bio->bi_iter.bi_sector,
|
||||
GFP_NOIO, pkt_bio_set);
|
||||
GFP_NOIO, &pkt_bio_set);
|
||||
bio_chain(split, bio);
|
||||
} else {
|
||||
split = bio;
|
||||
|
@ -2707,9 +2707,9 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
|
|||
if (!pd)
|
||||
goto out_mutex;
|
||||
|
||||
pd->rb_pool = mempool_create_kmalloc_pool(PKT_RB_POOL_SIZE,
|
||||
sizeof(struct pkt_rb_node));
|
||||
if (!pd->rb_pool)
|
||||
ret = mempool_init_kmalloc_pool(&pd->rb_pool, PKT_RB_POOL_SIZE,
|
||||
sizeof(struct pkt_rb_node));
|
||||
if (ret)
|
||||
goto out_mem;
|
||||
|
||||
INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
|
||||
|
@ -2766,7 +2766,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
|
|||
out_mem2:
|
||||
put_disk(disk);
|
||||
out_mem:
|
||||
mempool_destroy(pd->rb_pool);
|
||||
mempool_exit(&pd->rb_pool);
|
||||
kfree(pd);
|
||||
out_mutex:
|
||||
mutex_unlock(&ctl_mutex);
|
||||
|
@ -2817,7 +2817,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
|
|||
blk_cleanup_queue(pd->disk->queue);
|
||||
put_disk(pd->disk);
|
||||
|
||||
mempool_destroy(pd->rb_pool);
|
||||
mempool_exit(&pd->rb_pool);
|
||||
kfree(pd);
|
||||
|
||||
/* This is safe: open() is still holding a reference. */
|
||||
|
@ -2914,14 +2914,14 @@ static int __init pkt_init(void)
|
|||
|
||||
mutex_init(&ctl_mutex);
|
||||
|
||||
psd_pool = mempool_create_kmalloc_pool(PSD_POOL_SIZE,
|
||||
sizeof(struct packet_stacked_data));
|
||||
if (!psd_pool)
|
||||
return -ENOMEM;
|
||||
pkt_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
|
||||
if (!pkt_bio_set) {
|
||||
mempool_destroy(psd_pool);
|
||||
return -ENOMEM;
|
||||
ret = mempool_init_kmalloc_pool(&psd_pool, PSD_POOL_SIZE,
|
||||
sizeof(struct packet_stacked_data));
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = bioset_init(&pkt_bio_set, BIO_POOL_SIZE, 0, 0);
|
||||
if (ret) {
|
||||
mempool_exit(&psd_pool);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = register_blkdev(pktdev_major, DRIVER_NAME);
|
||||
|
@ -2954,8 +2954,8 @@ out_misc:
|
|||
out:
|
||||
unregister_blkdev(pktdev_major, DRIVER_NAME);
|
||||
out2:
|
||||
mempool_destroy(psd_pool);
|
||||
bioset_free(pkt_bio_set);
|
||||
mempool_exit(&psd_pool);
|
||||
bioset_exit(&pkt_bio_set);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -2968,8 +2968,8 @@ static void __exit pkt_exit(void)
|
|||
pkt_sysfs_cleanup();
|
||||
|
||||
unregister_blkdev(pktdev_major, DRIVER_NAME);
|
||||
mempool_destroy(psd_pool);
|
||||
bioset_free(pkt_bio_set);
|
||||
mempool_exit(&psd_pool);
|
||||
bioset_exit(&pkt_bio_set);
|
||||
}
|
||||
|
||||
MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
|
||||
|
|
|
@ -465,8 +465,6 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
|
|||
priv->queue = queue;
|
||||
queue->queuedata = dev;
|
||||
|
||||
blk_queue_bounce_limit(queue, BLK_BOUNCE_HIGH);
|
||||
|
||||
blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9);
|
||||
blk_queue_segment_boundary(queue, -1UL);
|
||||
blk_queue_dma_alignment(queue, dev->blk_size-1);
|
||||
|
|
|
@ -424,7 +424,7 @@ static struct workqueue_struct *rbd_wq;
|
|||
* single-major requires >= 0.75 version of userspace rbd utility.
|
||||
*/
|
||||
static bool single_major = true;
|
||||
module_param(single_major, bool, S_IRUGO);
|
||||
module_param(single_major, bool, 0444);
|
||||
MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)");
|
||||
|
||||
static ssize_t rbd_add(struct bus_type *bus, const char *buf,
|
||||
|
@ -468,11 +468,11 @@ static ssize_t rbd_supported_features_show(struct bus_type *bus, char *buf)
|
|||
return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED);
|
||||
}
|
||||
|
||||
static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
|
||||
static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
|
||||
static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
|
||||
static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);
|
||||
static BUS_ATTR(supported_features, S_IRUGO, rbd_supported_features_show, NULL);
|
||||
static BUS_ATTR(add, 0200, NULL, rbd_add);
|
||||
static BUS_ATTR(remove, 0200, NULL, rbd_remove);
|
||||
static BUS_ATTR(add_single_major, 0200, NULL, rbd_add_single_major);
|
||||
static BUS_ATTR(remove_single_major, 0200, NULL, rbd_remove_single_major);
|
||||
static BUS_ATTR(supported_features, 0444, rbd_supported_features_show, NULL);
|
||||
|
||||
static struct attribute *rbd_bus_attrs[] = {
|
||||
&bus_attr_add.attr,
|
||||
|
@ -4204,22 +4204,22 @@ static ssize_t rbd_image_refresh(struct device *dev,
|
|||
return size;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
|
||||
static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL);
|
||||
static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
|
||||
static DEVICE_ATTR(minor, S_IRUGO, rbd_minor_show, NULL);
|
||||
static DEVICE_ATTR(client_addr, S_IRUGO, rbd_client_addr_show, NULL);
|
||||
static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
|
||||
static DEVICE_ATTR(cluster_fsid, S_IRUGO, rbd_cluster_fsid_show, NULL);
|
||||
static DEVICE_ATTR(config_info, S_IRUSR, rbd_config_info_show, NULL);
|
||||
static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
|
||||
static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
|
||||
static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
|
||||
static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL);
|
||||
static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
|
||||
static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
|
||||
static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL);
|
||||
static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL);
|
||||
static DEVICE_ATTR(size, 0444, rbd_size_show, NULL);
|
||||
static DEVICE_ATTR(features, 0444, rbd_features_show, NULL);
|
||||
static DEVICE_ATTR(major, 0444, rbd_major_show, NULL);
|
||||
static DEVICE_ATTR(minor, 0444, rbd_minor_show, NULL);
|
||||
static DEVICE_ATTR(client_addr, 0444, rbd_client_addr_show, NULL);
|
||||
static DEVICE_ATTR(client_id, 0444, rbd_client_id_show, NULL);
|
||||
static DEVICE_ATTR(cluster_fsid, 0444, rbd_cluster_fsid_show, NULL);
|
||||
static DEVICE_ATTR(config_info, 0400, rbd_config_info_show, NULL);
|
||||
static DEVICE_ATTR(pool, 0444, rbd_pool_show, NULL);
|
||||
static DEVICE_ATTR(pool_id, 0444, rbd_pool_id_show, NULL);
|
||||
static DEVICE_ATTR(name, 0444, rbd_name_show, NULL);
|
||||
static DEVICE_ATTR(image_id, 0444, rbd_image_id_show, NULL);
|
||||
static DEVICE_ATTR(refresh, 0200, NULL, rbd_image_refresh);
|
||||
static DEVICE_ATTR(current_snap, 0444, rbd_snap_show, NULL);
|
||||
static DEVICE_ATTR(snap_id, 0444, rbd_snap_id_show, NULL);
|
||||
static DEVICE_ATTR(parent, 0444, rbd_parent_show, NULL);
|
||||
|
||||
static struct attribute *rbd_attrs[] = {
|
||||
&dev_attr_size.attr,
|
||||
|
|
|
@ -247,19 +247,19 @@ static void rsxx_debugfs_dev_new(struct rsxx_cardinfo *card)
|
|||
if (IS_ERR_OR_NULL(card->debugfs_dir))
|
||||
goto failed_debugfs_dir;
|
||||
|
||||
debugfs_stats = debugfs_create_file("stats", S_IRUGO,
|
||||
debugfs_stats = debugfs_create_file("stats", 0444,
|
||||
card->debugfs_dir, card,
|
||||
&debugfs_stats_fops);
|
||||
if (IS_ERR_OR_NULL(debugfs_stats))
|
||||
goto failed_debugfs_stats;
|
||||
|
||||
debugfs_pci_regs = debugfs_create_file("pci_regs", S_IRUGO,
|
||||
debugfs_pci_regs = debugfs_create_file("pci_regs", 0444,
|
||||
card->debugfs_dir, card,
|
||||
&debugfs_pci_regs_fops);
|
||||
if (IS_ERR_OR_NULL(debugfs_pci_regs))
|
||||
goto failed_debugfs_pci_regs;
|
||||
|
||||
debugfs_cram = debugfs_create_file("cram", S_IRUGO | S_IWUSR,
|
||||
debugfs_cram = debugfs_create_file("cram", 0644,
|
||||
card->debugfs_dir, card,
|
||||
&debugfs_cram_fops);
|
||||
if (IS_ERR_OR_NULL(debugfs_cram))
|
||||
|
|
|
@ -567,7 +567,7 @@ static struct carm_request *carm_get_special(struct carm_host *host)
|
|||
if (!crq)
|
||||
return NULL;
|
||||
|
||||
rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, GFP_KERNEL);
|
||||
rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, 0);
|
||||
if (IS_ERR(rq)) {
|
||||
spin_lock_irqsave(&host->lock, flags);
|
||||
carm_put_request(host, crq);
|
||||
|
|
|
@ -298,7 +298,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
|
|||
struct request *req;
|
||||
int err;
|
||||
|
||||
req = blk_get_request(q, REQ_OP_DRV_IN, GFP_KERNEL);
|
||||
req = blk_get_request(q, REQ_OP_DRV_IN, 0);
|
||||
if (IS_ERR(req))
|
||||
return PTR_ERR(req);
|
||||
|
||||
|
@ -371,7 +371,7 @@ static ssize_t virtblk_serial_show(struct device *dev,
|
|||
return err;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
|
||||
static DEVICE_ATTR(serial, 0444, virtblk_serial_show, NULL);
|
||||
|
||||
/* The queue's logical block size must be set before calling this */
|
||||
static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize)
|
||||
|
@ -576,10 +576,10 @@ virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
|
|||
}
|
||||
|
||||
static const struct device_attribute dev_attr_cache_type_ro =
|
||||
__ATTR(cache_type, S_IRUGO,
|
||||
__ATTR(cache_type, 0444,
|
||||
virtblk_cache_type_show, NULL);
|
||||
static const struct device_attribute dev_attr_cache_type_rw =
|
||||
__ATTR(cache_type, S_IRUGO|S_IWUSR,
|
||||
__ATTR(cache_type, 0644,
|
||||
virtblk_cache_type_show, virtblk_cache_type_store);
|
||||
|
||||
static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq,
|
||||
|
|
|
@ -98,7 +98,7 @@ MODULE_PARM_DESC(max_queues,
|
|||
* backend, 4KB page granularity is used.
|
||||
*/
|
||||
unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
|
||||
module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
|
||||
module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
|
||||
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
|
||||
/*
|
||||
* The LRU mechanism to clean the lists of persistent grants needs to
|
||||
|
|
|
@ -367,7 +367,7 @@ int __init xen_blkif_interface_init(void)
|
|||
out: \
|
||||
return sprintf(buf, format, result); \
|
||||
} \
|
||||
static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
|
||||
static DEVICE_ATTR(name, 0444, show_##name, NULL)
|
||||
|
||||
VBD_SHOW_ALLRING(oo_req, "%llu\n");
|
||||
VBD_SHOW_ALLRING(rd_req, "%llu\n");
|
||||
|
@ -403,7 +403,7 @@ static const struct attribute_group xen_vbdstat_group = {
|
|||
\
|
||||
return sprintf(buf, format, ##args); \
|
||||
} \
|
||||
static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
|
||||
static DEVICE_ATTR(name, 0444, show_##name, NULL)
|
||||
|
||||
VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
|
||||
VBD_SHOW(mode, "%s\n", be->mode);
|
||||
|
|
|
@ -129,13 +129,12 @@ static const struct block_device_operations xlvbd_block_fops;
|
|||
*/
|
||||
|
||||
static unsigned int xen_blkif_max_segments = 32;
|
||||
module_param_named(max_indirect_segments, xen_blkif_max_segments, uint,
|
||||
S_IRUGO);
|
||||
module_param_named(max_indirect_segments, xen_blkif_max_segments, uint, 0444);
|
||||
MODULE_PARM_DESC(max_indirect_segments,
|
||||
"Maximum amount of segments in indirect requests (default is 32)");
|
||||
|
||||
static unsigned int xen_blkif_max_queues = 4;
|
||||
module_param_named(max_queues, xen_blkif_max_queues, uint, S_IRUGO);
|
||||
module_param_named(max_queues, xen_blkif_max_queues, uint, 0444);
|
||||
MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per virtual disk");
|
||||
|
||||
/*
|
||||
|
@ -143,7 +142,7 @@ MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per v
|
|||
* backend, 4KB page granularity is used.
|
||||
*/
|
||||
static unsigned int xen_blkif_max_ring_order;
|
||||
module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
|
||||
module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
|
||||
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
|
||||
|
||||
#define BLK_RING_SIZE(info) \
|
||||
|
|
|
@ -2192,7 +2192,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
|
|||
|
||||
len = nr * CD_FRAMESIZE_RAW;
|
||||
|
||||
rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
|
||||
rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
|
||||
if (IS_ERR(rq)) {
|
||||
ret = PTR_ERR(rq);
|
||||
break;
|
||||
|
|
|
@ -92,7 +92,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
|
|||
struct request *rq;
|
||||
int error;
|
||||
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
|
||||
ide_req(rq)->type = ATA_PRIV_MISC;
|
||||
rq->special = (char *)pc;
|
||||
|
||||
|
|
|
@ -437,7 +437,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
|
|||
bool delay = false;
|
||||
|
||||
rq = blk_get_request(drive->queue,
|
||||
write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
|
||||
write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
|
||||
memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
|
||||
ide_req(rq)->type = ATA_PRIV_PC;
|
||||
rq->rq_flags |= rq_flags;
|
||||
|
|
|
@ -304,7 +304,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
|
|||
struct request *rq;
|
||||
int ret;
|
||||
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
|
||||
ide_req(rq)->type = ATA_PRIV_MISC;
|
||||
rq->rq_flags = RQF_QUIET;
|
||||
blk_execute_rq(drive->queue, cd->disk, rq, 0);
|
||||
|
|
|
@ -166,7 +166,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
|
|||
if (!(setting->flags & DS_SYNC))
|
||||
return setting->set(drive, arg);
|
||||
|
||||
rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
|
||||
rq = blk_get_request(q, REQ_OP_DRV_IN, 0);
|
||||
ide_req(rq)->type = ATA_PRIV_MISC;
|
||||
scsi_req(rq)->cmd_len = 5;
|
||||
scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
|
||||
|
|
|
@ -478,7 +478,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
|
|||
if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
|
||||
return -EBUSY;
|
||||
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
|
||||
ide_req(rq)->type = ATA_PRIV_TASKFILE;
|
||||
|
||||
drive->mult_req = arg;
|
||||
|
|
|
@ -125,7 +125,7 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
|
|||
if (NULL == (void *) arg) {
|
||||
struct request *rq;
|
||||
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
|
||||
ide_req(rq)->type = ATA_PRIV_TASKFILE;
|
||||
blk_execute_rq(drive->queue, NULL, rq, 0);
|
||||
err = scsi_req(rq)->result ? -EIO : 0;
|
||||
|
@ -222,7 +222,7 @@ static int generic_drive_reset(ide_drive_t *drive)
|
|||
struct request *rq;
|
||||
int ret = 0;
|
||||
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
|
||||
ide_req(rq)->type = ATA_PRIV_MISC;
|
||||
scsi_req(rq)->cmd_len = 1;
|
||||
scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
|
||||
|
|
|
@ -32,7 +32,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
|
|||
}
|
||||
spin_unlock_irq(&hwif->lock);
|
||||
|
||||
rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
|
||||
rq = blk_get_request(q, REQ_OP_DRV_IN, 0);
|
||||
scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
|
||||
scsi_req(rq)->cmd_len = 1;
|
||||
ide_req(rq)->type = ATA_PRIV_MISC;
|
||||
|
@ -47,7 +47,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
|
|||
* Make sure that *some* command is sent to the drive after the
|
||||
* timeout has expired, so power management will be reenabled.
|
||||
*/
|
||||
rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT);
|
||||
rq = blk_get_request(q, REQ_OP_DRV_IN, BLK_MQ_REQ_NOWAIT);
|
||||
if (IS_ERR(rq))
|
||||
goto out;
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
|
|||
}
|
||||
|
||||
memset(&rqpm, 0, sizeof(rqpm));
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
|
||||
ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
|
||||
rq->special = &rqpm;
|
||||
rqpm.pm_step = IDE_PM_START_SUSPEND;
|
||||
|
@ -90,8 +90,7 @@ int generic_ide_resume(struct device *dev)
|
|||
}
|
||||
|
||||
memset(&rqpm, 0, sizeof(rqpm));
|
||||
rq = blk_get_request_flags(drive->queue, REQ_OP_DRV_IN,
|
||||
BLK_MQ_REQ_PREEMPT);
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PREEMPT);
|
||||
ide_req(rq)->type = ATA_PRIV_PM_RESUME;
|
||||
rq->special = &rqpm;
|
||||
rqpm.pm_step = IDE_PM_START_RESUME;
|
||||
|
|
|
@ -854,7 +854,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
|
|||
BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
|
||||
BUG_ON(size < 0 || size % tape->blk_size);
|
||||
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
|
||||
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
|
||||
ide_req(rq)->type = ATA_PRIV_MISC;
|
||||
scsi_req(rq)->cmd[13] = cmd;
|
||||
rq->rq_disk = tape->disk;
|
||||
|
@ -862,7 +862,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
|
|||
|
||||
if (size) {
|
||||
ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
|
||||
__GFP_RECLAIM);
|
||||
GFP_NOIO);
|
||||
if (ret)
|
||||
goto out_put;
|
||||
}
|
||||
|
|
|
@ -431,7 +431,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
|
|||
|
||||
rq = blk_get_request(drive->queue,
|
||||
(cmd->tf_flags & IDE_TFLAG_WRITE) ?
|
||||
REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
|
||||
REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
|
||||
ide_req(rq)->type = ATA_PRIV_TASKFILE;
|
||||
|
||||
/*
|
||||
|
@ -442,7 +442,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
|
|||
*/
|
||||
if (nsect) {
|
||||
error = blk_rq_map_kern(drive->queue, rq, buf,
|
||||
nsect * SECTOR_SIZE, __GFP_RECLAIM);
|
||||
nsect * SECTOR_SIZE, GFP_NOIO);
|
||||
if (error)
|
||||
goto put_req;
|
||||
}
|
||||
|
|
|
@ -431,7 +431,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
|
|||
return 0;
|
||||
err_sysfs:
|
||||
if (tt->exit)
|
||||
tt->exit(targetdata);
|
||||
tt->exit(targetdata, true);
|
||||
err_init:
|
||||
blk_cleanup_queue(tqueue);
|
||||
tdisk->queue = NULL;
|
||||
|
@ -446,7 +446,7 @@ err_reserve:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void __nvm_remove_target(struct nvm_target *t)
|
||||
static void __nvm_remove_target(struct nvm_target *t, bool graceful)
|
||||
{
|
||||
struct nvm_tgt_type *tt = t->type;
|
||||
struct gendisk *tdisk = t->disk;
|
||||
|
@ -459,7 +459,7 @@ static void __nvm_remove_target(struct nvm_target *t)
|
|||
tt->sysfs_exit(tdisk);
|
||||
|
||||
if (tt->exit)
|
||||
tt->exit(tdisk->private_data);
|
||||
tt->exit(tdisk->private_data, graceful);
|
||||
|
||||
nvm_remove_tgt_dev(t->dev, 1);
|
||||
put_disk(tdisk);
|
||||
|
@ -489,7 +489,7 @@ static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove)
|
|||
mutex_unlock(&dev->mlock);
|
||||
return 1;
|
||||
}
|
||||
__nvm_remove_target(t);
|
||||
__nvm_remove_target(t, true);
|
||||
mutex_unlock(&dev->mlock);
|
||||
|
||||
return 0;
|
||||
|
@ -963,7 +963,7 @@ void nvm_unregister(struct nvm_dev *dev)
|
|||
list_for_each_entry_safe(t, tmp, &dev->targets, list) {
|
||||
if (t->dev->parent != dev)
|
||||
continue;
|
||||
__nvm_remove_target(t);
|
||||
__nvm_remove_target(t, false);
|
||||
}
|
||||
mutex_unlock(&dev->mlock);
|
||||
|
||||
|
|
|
@ -44,13 +44,15 @@ retry:
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely(!bio_has_data(bio)))
|
||||
goto out;
|
||||
|
||||
pblk_ppa_set_empty(&w_ctx.ppa);
|
||||
w_ctx.flags = flags;
|
||||
if (bio->bi_opf & REQ_PREFLUSH)
|
||||
if (bio->bi_opf & REQ_PREFLUSH) {
|
||||
w_ctx.flags |= PBLK_FLUSH_ENTRY;
|
||||
pblk_write_kick(pblk);
|
||||
}
|
||||
|
||||
if (unlikely(!bio_has_data(bio)))
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < nr_entries; i++) {
|
||||
void *data = bio_data(bio);
|
||||
|
|
|
@ -40,7 +40,7 @@ static void pblk_line_mark_bb(struct work_struct *work)
|
|||
}
|
||||
|
||||
kfree(ppa);
|
||||
mempool_free(line_ws, pblk->gen_ws_pool);
|
||||
mempool_free(line_ws, &pblk->gen_ws_pool);
|
||||
}
|
||||
|
||||
static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
|
||||
|
@ -102,7 +102,7 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
|
|||
struct pblk *pblk = rqd->private;
|
||||
|
||||
__pblk_end_io_erase(pblk, rqd);
|
||||
mempool_free(rqd, pblk->e_rq_pool);
|
||||
mempool_free(rqd, &pblk->e_rq_pool);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -237,15 +237,15 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type)
|
|||
switch (type) {
|
||||
case PBLK_WRITE:
|
||||
case PBLK_WRITE_INT:
|
||||
pool = pblk->w_rq_pool;
|
||||
pool = &pblk->w_rq_pool;
|
||||
rq_size = pblk_w_rq_size;
|
||||
break;
|
||||
case PBLK_READ:
|
||||
pool = pblk->r_rq_pool;
|
||||
pool = &pblk->r_rq_pool;
|
||||
rq_size = pblk_g_rq_size;
|
||||
break;
|
||||
default:
|
||||
pool = pblk->e_rq_pool;
|
||||
pool = &pblk->e_rq_pool;
|
||||
rq_size = pblk_g_rq_size;
|
||||
}
|
||||
|
||||
|
@ -265,20 +265,22 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type)
|
|||
case PBLK_WRITE:
|
||||
kfree(((struct pblk_c_ctx *)nvm_rq_to_pdu(rqd))->lun_bitmap);
|
||||
case PBLK_WRITE_INT:
|
||||
pool = pblk->w_rq_pool;
|
||||
pool = &pblk->w_rq_pool;
|
||||
break;
|
||||
case PBLK_READ:
|
||||
pool = pblk->r_rq_pool;
|
||||
pool = &pblk->r_rq_pool;
|
||||
break;
|
||||
case PBLK_ERASE:
|
||||
pool = pblk->e_rq_pool;
|
||||
pool = &pblk->e_rq_pool;
|
||||
break;
|
||||
default:
|
||||
pr_err("pblk: trying to free unknown rqd type\n");
|
||||
return;
|
||||
}
|
||||
|
||||
nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
|
||||
if (rqd->meta_list)
|
||||
nvm_dev_dma_free(dev->parent, rqd->meta_list,
|
||||
rqd->dma_meta_list);
|
||||
mempool_free(rqd, pool);
|
||||
}
|
||||
|
||||
|
@ -292,7 +294,7 @@ void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
|
|||
|
||||
for (i = off; i < nr_pages + off; i++) {
|
||||
bv = bio->bi_io_vec[i];
|
||||
mempool_free(bv.bv_page, pblk->page_bio_pool);
|
||||
mempool_free(bv.bv_page, &pblk->page_bio_pool);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -304,23 +306,23 @@ int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
|
|||
int i, ret;
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
page = mempool_alloc(pblk->page_bio_pool, flags);
|
||||
page = mempool_alloc(&pblk->page_bio_pool, flags);
|
||||
|
||||
ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0);
|
||||
if (ret != PBLK_EXPOSED_PAGE_SIZE) {
|
||||
pr_err("pblk: could not add page to bio\n");
|
||||
mempool_free(page, pblk->page_bio_pool);
|
||||
mempool_free(page, &pblk->page_bio_pool);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
err:
|
||||
pblk_bio_free_pages(pblk, bio, 0, i - 1);
|
||||
pblk_bio_free_pages(pblk, bio, (bio->bi_vcnt - i), i);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void pblk_write_kick(struct pblk *pblk)
|
||||
void pblk_write_kick(struct pblk *pblk)
|
||||
{
|
||||
wake_up_process(pblk->writer_ts);
|
||||
mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000));
|
||||
|
@ -342,13 +344,6 @@ void pblk_write_should_kick(struct pblk *pblk)
|
|||
pblk_write_kick(pblk);
|
||||
}
|
||||
|
||||
void pblk_end_io_sync(struct nvm_rq *rqd)
|
||||
{
|
||||
struct completion *waiting = rqd->private;
|
||||
|
||||
complete(waiting);
|
||||
}
|
||||
|
||||
static void pblk_wait_for_meta(struct pblk *pblk)
|
||||
{
|
||||
do {
|
||||
|
@ -380,7 +375,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
|
|||
|
||||
lockdep_assert_held(&line->lock);
|
||||
|
||||
if (!vsc) {
|
||||
if (line->w_err_gc->has_write_err) {
|
||||
if (line->gc_group != PBLK_LINEGC_WERR) {
|
||||
line->gc_group = PBLK_LINEGC_WERR;
|
||||
move_list = &l_mg->gc_werr_list;
|
||||
pblk_rl_werr_line_in(&pblk->rl);
|
||||
}
|
||||
} else if (!vsc) {
|
||||
if (line->gc_group != PBLK_LINEGC_FULL) {
|
||||
line->gc_group = PBLK_LINEGC_FULL;
|
||||
move_list = &l_mg->gc_full_list;
|
||||
|
@ -467,16 +468,13 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
|
|||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
int ret;
|
||||
|
||||
ret = pblk_check_io(pblk, rqd);
|
||||
if (ret)
|
||||
return ret;
|
||||
#endif
|
||||
|
||||
atomic_inc(&pblk->inflight_io);
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
if (pblk_check_io(pblk, rqd))
|
||||
return NVM_IO_ERR;
|
||||
#endif
|
||||
|
||||
return nvm_submit_io(dev, rqd);
|
||||
}
|
||||
|
||||
|
@ -484,16 +482,13 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
|
|||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
int ret;
|
||||
|
||||
ret = pblk_check_io(pblk, rqd);
|
||||
if (ret)
|
||||
return ret;
|
||||
#endif
|
||||
|
||||
atomic_inc(&pblk->inflight_io);
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
if (pblk_check_io(pblk, rqd))
|
||||
return NVM_IO_ERR;
|
||||
#endif
|
||||
|
||||
return nvm_submit_io_sync(dev, rqd);
|
||||
}
|
||||
|
||||
|
@ -856,9 +851,10 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
|
|||
atomic_dec(&pblk->inflight_io);
|
||||
|
||||
if (rqd.error) {
|
||||
if (dir == PBLK_WRITE)
|
||||
if (dir == PBLK_WRITE) {
|
||||
pblk_log_write_err(pblk, &rqd);
|
||||
else if (dir == PBLK_READ)
|
||||
ret = 1;
|
||||
} else if (dir == PBLK_READ)
|
||||
pblk_log_read_err(pblk, &rqd);
|
||||
}
|
||||
|
||||
|
@ -1071,6 +1067,25 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int pblk_line_alloc_bitmaps(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
|
||||
line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
|
||||
if (!line->map_bitmap)
|
||||
return -ENOMEM;
|
||||
|
||||
/* will be initialized using bb info from map_bitmap */
|
||||
line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
|
||||
if (!line->invalid_bitmap) {
|
||||
kfree(line->map_bitmap);
|
||||
line->map_bitmap = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* For now lines are always assumed full lines. Thus, smeta former and current
|
||||
* lun bitmaps are omitted.
|
||||
*/
|
||||
|
@ -1108,7 +1123,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
|
|||
|
||||
if (init && pblk_line_submit_smeta_io(pblk, line, off, PBLK_WRITE)) {
|
||||
pr_debug("pblk: line smeta I/O failed. Retry\n");
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
|
||||
|
@ -1174,19 +1189,9 @@ static int pblk_prepare_new_line(struct pblk *pblk, struct pblk_line *line)
|
|||
static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
int blk_in_line = atomic_read(&line->blk_in_line);
|
||||
int blk_to_erase;
|
||||
|
||||
line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_ATOMIC);
|
||||
if (!line->map_bitmap)
|
||||
return -ENOMEM;
|
||||
|
||||
/* will be initialized using bb info from map_bitmap */
|
||||
line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_ATOMIC);
|
||||
if (!line->invalid_bitmap) {
|
||||
kfree(line->map_bitmap);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Bad blocks do not need to be erased */
|
||||
bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
|
||||
|
||||
|
@ -1199,16 +1204,19 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
|
|||
blk_to_erase = pblk_prepare_new_line(pblk, line);
|
||||
line->state = PBLK_LINESTATE_FREE;
|
||||
} else {
|
||||
blk_to_erase = atomic_read(&line->blk_in_line);
|
||||
blk_to_erase = blk_in_line;
|
||||
}
|
||||
|
||||
if (blk_in_line < lm->min_blk_line) {
|
||||
spin_unlock(&line->lock);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if (line->state != PBLK_LINESTATE_FREE) {
|
||||
kfree(line->map_bitmap);
|
||||
kfree(line->invalid_bitmap);
|
||||
spin_unlock(&line->lock);
|
||||
WARN(1, "pblk: corrupted line %d, state %d\n",
|
||||
line->id, line->state);
|
||||
return -EAGAIN;
|
||||
spin_unlock(&line->lock);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
line->state = PBLK_LINESTATE_OPEN;
|
||||
|
@ -1241,13 +1249,16 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
|
|||
}
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
|
||||
pblk_rl_free_lines_dec(&pblk->rl, line, true);
|
||||
ret = pblk_line_alloc_bitmaps(pblk, line);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!pblk_line_init_bb(pblk, line, 0)) {
|
||||
list_add(&line->list, &l_mg->free_list);
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
pblk_rl_free_lines_dec(&pblk->rl, line, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1259,6 +1270,24 @@ void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
|
|||
line->emeta = NULL;
|
||||
}
|
||||
|
||||
static void pblk_line_reinit(struct pblk_line *line)
|
||||
{
|
||||
*line->vsc = cpu_to_le32(EMPTY_ENTRY);
|
||||
|
||||
line->map_bitmap = NULL;
|
||||
line->invalid_bitmap = NULL;
|
||||
line->smeta = NULL;
|
||||
line->emeta = NULL;
|
||||
}
|
||||
|
||||
void pblk_line_free(struct pblk_line *line)
|
||||
{
|
||||
kfree(line->map_bitmap);
|
||||
kfree(line->invalid_bitmap);
|
||||
|
||||
pblk_line_reinit(line);
|
||||
}
|
||||
|
||||
struct pblk_line *pblk_line_get(struct pblk *pblk)
|
||||
{
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
|
@ -1292,10 +1321,14 @@ retry:
|
|||
|
||||
ret = pblk_line_prepare(pblk, line);
|
||||
if (ret) {
|
||||
if (ret == -EAGAIN) {
|
||||
switch (ret) {
|
||||
case -EAGAIN:
|
||||
list_add(&line->list, &l_mg->bad_list);
|
||||
goto retry;
|
||||
case -EINTR:
|
||||
list_add(&line->list, &l_mg->corrupt_list);
|
||||
goto retry;
|
||||
} else {
|
||||
default:
|
||||
pr_err("pblk: failed to prepare line %d\n", line->id);
|
||||
list_add(&line->list, &l_mg->free_list);
|
||||
l_mg->nr_free_lines++;
|
||||
|
@ -1321,11 +1354,14 @@ retry:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
retry_line->map_bitmap = line->map_bitmap;
|
||||
retry_line->invalid_bitmap = line->invalid_bitmap;
|
||||
retry_line->smeta = line->smeta;
|
||||
retry_line->emeta = line->emeta;
|
||||
retry_line->meta_line = line->meta_line;
|
||||
|
||||
pblk_line_free(pblk, line);
|
||||
pblk_line_reinit(line);
|
||||
|
||||
l_mg->data_line = retry_line;
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
|
||||
|
@ -1378,6 +1414,9 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
|
|||
}
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
|
||||
if (pblk_line_alloc_bitmaps(pblk, line))
|
||||
return NULL;
|
||||
|
||||
if (pblk_line_erase(pblk, line)) {
|
||||
line = pblk_line_retry(pblk, line);
|
||||
if (!line)
|
||||
|
@ -1449,7 +1488,7 @@ static void pblk_line_close_meta_sync(struct pblk *pblk)
|
|||
flush_workqueue(pblk->close_wq);
|
||||
}
|
||||
|
||||
void pblk_pipeline_stop(struct pblk *pblk)
|
||||
void __pblk_pipeline_flush(struct pblk *pblk)
|
||||
{
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
int ret;
|
||||
|
@ -1474,6 +1513,11 @@ void pblk_pipeline_stop(struct pblk *pblk)
|
|||
|
||||
flush_workqueue(pblk->bb_wq);
|
||||
pblk_line_close_meta_sync(pblk);
|
||||
}
|
||||
|
||||
void __pblk_pipeline_stop(struct pblk *pblk)
|
||||
{
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
|
||||
spin_lock(&l_mg->free_lock);
|
||||
pblk->state = PBLK_STATE_STOPPED;
|
||||
|
@ -1482,6 +1526,12 @@ void pblk_pipeline_stop(struct pblk *pblk)
|
|||
spin_unlock(&l_mg->free_lock);
|
||||
}
|
||||
|
||||
void pblk_pipeline_stop(struct pblk *pblk)
|
||||
{
|
||||
__pblk_pipeline_flush(pblk);
|
||||
__pblk_pipeline_stop(pblk);
|
||||
}
|
||||
|
||||
struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
|
||||
{
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
|
@ -1511,6 +1561,9 @@ retry_erase:
|
|||
goto retry_erase;
|
||||
}
|
||||
|
||||
if (pblk_line_alloc_bitmaps(pblk, new))
|
||||
return NULL;
|
||||
|
||||
retry_setup:
|
||||
if (!pblk_line_init_metadata(pblk, new, cur)) {
|
||||
new = pblk_line_retry(pblk, new);
|
||||
|
@ -1550,19 +1603,6 @@ out:
|
|||
return new;
|
||||
}
|
||||
|
||||
void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
kfree(line->map_bitmap);
|
||||
kfree(line->invalid_bitmap);
|
||||
|
||||
*line->vsc = cpu_to_le32(EMPTY_ENTRY);
|
||||
|
||||
line->map_bitmap = NULL;
|
||||
line->invalid_bitmap = NULL;
|
||||
line->smeta = NULL;
|
||||
line->emeta = NULL;
|
||||
}
|
||||
|
||||
static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
|
@ -1572,9 +1612,14 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
|
|||
WARN_ON(line->state != PBLK_LINESTATE_GC);
|
||||
line->state = PBLK_LINESTATE_FREE;
|
||||
line->gc_group = PBLK_LINEGC_NONE;
|
||||
pblk_line_free(pblk, line);
|
||||
spin_unlock(&line->lock);
|
||||
pblk_line_free(line);
|
||||
|
||||
if (line->w_err_gc->has_write_err) {
|
||||
pblk_rl_werr_line_out(&pblk->rl);
|
||||
line->w_err_gc->has_write_err = 0;
|
||||
}
|
||||
|
||||
spin_unlock(&line->lock);
|
||||
atomic_dec(&gc->pipeline_gc);
|
||||
|
||||
spin_lock(&l_mg->free_lock);
|
||||
|
@ -1593,7 +1638,7 @@ static void pblk_line_put_ws(struct work_struct *work)
|
|||
struct pblk_line *line = line_put_ws->line;
|
||||
|
||||
__pblk_line_put(pblk, line);
|
||||
mempool_free(line_put_ws, pblk->gen_ws_pool);
|
||||
mempool_free(line_put_ws, &pblk->gen_ws_pool);
|
||||
}
|
||||
|
||||
void pblk_line_put(struct kref *ref)
|
||||
|
@ -1610,7 +1655,7 @@ void pblk_line_put_wq(struct kref *ref)
|
|||
struct pblk *pblk = line->pblk;
|
||||
struct pblk_line_ws *line_put_ws;
|
||||
|
||||
line_put_ws = mempool_alloc(pblk->gen_ws_pool, GFP_ATOMIC);
|
||||
line_put_ws = mempool_alloc(&pblk->gen_ws_pool, GFP_ATOMIC);
|
||||
if (!line_put_ws)
|
||||
return;
|
||||
|
||||
|
@ -1737,11 +1782,34 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
|
|||
|
||||
spin_lock(&l_mg->close_lock);
|
||||
spin_lock(&line->lock);
|
||||
|
||||
/* Update the in-memory start address for emeta, in case it has
|
||||
* shifted due to write errors
|
||||
*/
|
||||
if (line->emeta_ssec != line->cur_sec)
|
||||
line->emeta_ssec = line->cur_sec;
|
||||
|
||||
list_add_tail(&line->list, &l_mg->emeta_list);
|
||||
spin_unlock(&line->lock);
|
||||
spin_unlock(&l_mg->close_lock);
|
||||
|
||||
pblk_line_should_sync_meta(pblk);
|
||||
|
||||
|
||||
}
|
||||
|
||||
static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
unsigned int lba_list_size = lm->emeta_len[2];
|
||||
struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
|
||||
struct pblk_emeta *emeta = line->emeta;
|
||||
|
||||
w_err_gc->lba_list = pblk_malloc(lba_list_size,
|
||||
l_mg->emeta_alloc_type, GFP_KERNEL);
|
||||
memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
|
||||
lba_list_size);
|
||||
}
|
||||
|
||||
void pblk_line_close_ws(struct work_struct *work)
|
||||
|
@ -1750,9 +1818,16 @@ void pblk_line_close_ws(struct work_struct *work)
|
|||
ws);
|
||||
struct pblk *pblk = line_ws->pblk;
|
||||
struct pblk_line *line = line_ws->line;
|
||||
struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
|
||||
|
||||
/* Write errors makes the emeta start address stored in smeta invalid,
|
||||
* so keep a copy of the lba list until we've gc'd the line
|
||||
*/
|
||||
if (w_err_gc->has_write_err)
|
||||
pblk_save_lba_list(pblk, line);
|
||||
|
||||
pblk_line_close(pblk, line);
|
||||
mempool_free(line_ws, pblk->gen_ws_pool);
|
||||
mempool_free(line_ws, &pblk->gen_ws_pool);
|
||||
}
|
||||
|
||||
void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
|
||||
|
@ -1761,7 +1836,7 @@ void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
|
|||
{
|
||||
struct pblk_line_ws *line_ws;
|
||||
|
||||
line_ws = mempool_alloc(pblk->gen_ws_pool, gfp_mask);
|
||||
line_ws = mempool_alloc(&pblk->gen_ws_pool, gfp_mask);
|
||||
|
||||
line_ws->pblk = pblk;
|
||||
line_ws->line = line;
|
||||
|
|
|
@ -129,6 +129,53 @@ out:
|
|||
kfree(gc_rq_ws);
|
||||
}
|
||||
|
||||
static __le64 *get_lba_list_from_emeta(struct pblk *pblk,
|
||||
struct pblk_line *line)
|
||||
{
|
||||
struct line_emeta *emeta_buf;
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
unsigned int lba_list_size = lm->emeta_len[2];
|
||||
__le64 *lba_list;
|
||||
int ret;
|
||||
|
||||
emeta_buf = pblk_malloc(lm->emeta_len[0],
|
||||
l_mg->emeta_alloc_type, GFP_KERNEL);
|
||||
if (!emeta_buf)
|
||||
return NULL;
|
||||
|
||||
ret = pblk_line_read_emeta(pblk, line, emeta_buf);
|
||||
if (ret) {
|
||||
pr_err("pblk: line %d read emeta failed (%d)\n",
|
||||
line->id, ret);
|
||||
pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* If this read fails, it means that emeta is corrupted.
|
||||
* For now, leave the line untouched.
|
||||
* TODO: Implement a recovery routine that scans and moves
|
||||
* all sectors on the line.
|
||||
*/
|
||||
|
||||
ret = pblk_recov_check_emeta(pblk, emeta_buf);
|
||||
if (ret) {
|
||||
pr_err("pblk: inconsistent emeta (line %d)\n",
|
||||
line->id);
|
||||
pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lba_list = pblk_malloc(lba_list_size,
|
||||
l_mg->emeta_alloc_type, GFP_KERNEL);
|
||||
if (lba_list)
|
||||
memcpy(lba_list, emeta_to_lbas(pblk, emeta_buf), lba_list_size);
|
||||
|
||||
pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
|
||||
|
||||
return lba_list;
|
||||
}
|
||||
|
||||
static void pblk_gc_line_prepare_ws(struct work_struct *work)
|
||||
{
|
||||
struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
|
||||
|
@ -138,46 +185,26 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
|
|||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
struct line_emeta *emeta_buf;
|
||||
struct pblk_line_ws *gc_rq_ws;
|
||||
struct pblk_gc_rq *gc_rq;
|
||||
__le64 *lba_list;
|
||||
unsigned long *invalid_bitmap;
|
||||
int sec_left, nr_secs, bit;
|
||||
int ret;
|
||||
|
||||
invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
|
||||
if (!invalid_bitmap)
|
||||
goto fail_free_ws;
|
||||
|
||||
emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
|
||||
GFP_KERNEL);
|
||||
if (!emeta_buf) {
|
||||
pr_err("pblk: cannot use GC emeta\n");
|
||||
goto fail_free_bitmap;
|
||||
}
|
||||
|
||||
ret = pblk_line_read_emeta(pblk, line, emeta_buf);
|
||||
if (ret) {
|
||||
pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
|
||||
goto fail_free_emeta;
|
||||
}
|
||||
|
||||
/* If this read fails, it means that emeta is corrupted. For now, leave
|
||||
* the line untouched. TODO: Implement a recovery routine that scans and
|
||||
* moves all sectors on the line.
|
||||
*/
|
||||
|
||||
ret = pblk_recov_check_emeta(pblk, emeta_buf);
|
||||
if (ret) {
|
||||
pr_err("pblk: inconsistent emeta (line %d)\n", line->id);
|
||||
goto fail_free_emeta;
|
||||
}
|
||||
|
||||
lba_list = emeta_to_lbas(pblk, emeta_buf);
|
||||
if (!lba_list) {
|
||||
pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
|
||||
goto fail_free_emeta;
|
||||
if (line->w_err_gc->has_write_err) {
|
||||
lba_list = line->w_err_gc->lba_list;
|
||||
line->w_err_gc->lba_list = NULL;
|
||||
} else {
|
||||
lba_list = get_lba_list_from_emeta(pblk, line);
|
||||
if (!lba_list) {
|
||||
pr_err("pblk: could not interpret emeta (line %d)\n",
|
||||
line->id);
|
||||
goto fail_free_ws;
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock(&line->lock);
|
||||
|
@ -187,14 +214,14 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
|
|||
|
||||
if (sec_left < 0) {
|
||||
pr_err("pblk: corrupted GC line (%d)\n", line->id);
|
||||
goto fail_free_emeta;
|
||||
goto fail_free_lba_list;
|
||||
}
|
||||
|
||||
bit = -1;
|
||||
next_rq:
|
||||
gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
|
||||
if (!gc_rq)
|
||||
goto fail_free_emeta;
|
||||
goto fail_free_lba_list;
|
||||
|
||||
nr_secs = 0;
|
||||
do {
|
||||
|
@ -240,7 +267,7 @@ next_rq:
|
|||
goto next_rq;
|
||||
|
||||
out:
|
||||
pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
|
||||
pblk_mfree(lba_list, l_mg->emeta_alloc_type);
|
||||
kfree(line_ws);
|
||||
kfree(invalid_bitmap);
|
||||
|
||||
|
@ -251,9 +278,8 @@ out:
|
|||
|
||||
fail_free_gc_rq:
|
||||
kfree(gc_rq);
|
||||
fail_free_emeta:
|
||||
pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
|
||||
fail_free_bitmap:
|
||||
fail_free_lba_list:
|
||||
pblk_mfree(lba_list, l_mg->emeta_alloc_type);
|
||||
kfree(invalid_bitmap);
|
||||
fail_free_ws:
|
||||
kfree(line_ws);
|
||||
|
@ -349,12 +375,14 @@ static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
|
|||
static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
|
||||
{
|
||||
unsigned int nr_blocks_free, nr_blocks_need;
|
||||
unsigned int werr_lines = atomic_read(&rl->werr_lines);
|
||||
|
||||
nr_blocks_need = pblk_rl_high_thrs(rl);
|
||||
nr_blocks_free = pblk_rl_nr_free_blks(rl);
|
||||
|
||||
/* This is not critical, no need to take lock here */
|
||||
return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
|
||||
return ((werr_lines > 0) ||
|
||||
((gc->gc_active) && (nr_blocks_need > nr_blocks_free)));
|
||||
}
|
||||
|
||||
void pblk_gc_free_full_lines(struct pblk *pblk)
|
||||
|
@ -649,7 +677,7 @@ fail_free_main_kthread:
|
|||
return ret;
|
||||
}
|
||||
|
||||
void pblk_gc_exit(struct pblk *pblk)
|
||||
void pblk_gc_exit(struct pblk *pblk, bool graceful)
|
||||
{
|
||||
struct pblk_gc *gc = &pblk->gc;
|
||||
|
||||
|
@ -663,10 +691,12 @@ void pblk_gc_exit(struct pblk *pblk)
|
|||
if (gc->gc_reader_ts)
|
||||
kthread_stop(gc->gc_reader_ts);
|
||||
|
||||
flush_workqueue(gc->gc_reader_wq);
|
||||
destroy_workqueue(gc->gc_reader_wq);
|
||||
if (graceful) {
|
||||
flush_workqueue(gc->gc_reader_wq);
|
||||
flush_workqueue(gc->gc_line_reader_wq);
|
||||
}
|
||||
|
||||
flush_workqueue(gc->gc_line_reader_wq);
|
||||
destroy_workqueue(gc->gc_reader_wq);
|
||||
destroy_workqueue(gc->gc_line_reader_wq);
|
||||
|
||||
if (gc->gc_writer_ts)
|
||||
|
|
|
@ -20,10 +20,15 @@
|
|||
|
||||
#include "pblk.h"
|
||||
|
||||
unsigned int write_buffer_size;
|
||||
|
||||
module_param(write_buffer_size, uint, 0644);
|
||||
MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer");
|
||||
|
||||
static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
|
||||
*pblk_w_rq_cache;
|
||||
static DECLARE_RWSEM(pblk_lock);
|
||||
struct bio_set *pblk_bio_set;
|
||||
struct bio_set pblk_bio_set;
|
||||
|
||||
static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
|
||||
struct bio *bio)
|
||||
|
@ -127,10 +132,8 @@ static int pblk_l2p_recover(struct pblk *pblk, bool factory_init)
|
|||
if (!line) {
|
||||
/* Configure next line for user data */
|
||||
line = pblk_line_get_first_data(pblk);
|
||||
if (!line) {
|
||||
pr_err("pblk: line list corrupted\n");
|
||||
if (!line)
|
||||
return -EFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -141,6 +144,7 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
|
|||
sector_t i;
|
||||
struct ppa_addr ppa;
|
||||
size_t map_size;
|
||||
int ret = 0;
|
||||
|
||||
map_size = pblk_trans_map_size(pblk);
|
||||
pblk->trans_map = vmalloc(map_size);
|
||||
|
@ -152,7 +156,11 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
|
|||
for (i = 0; i < pblk->rl.nr_secs; i++)
|
||||
pblk_trans_map_set(pblk, i, ppa);
|
||||
|
||||
return pblk_l2p_recover(pblk, factory_init);
|
||||
ret = pblk_l2p_recover(pblk, factory_init);
|
||||
if (ret)
|
||||
vfree(pblk->trans_map);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void pblk_rwb_free(struct pblk *pblk)
|
||||
|
@ -169,10 +177,15 @@ static int pblk_rwb_init(struct pblk *pblk)
|
|||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_rb_entry *entries;
|
||||
unsigned long nr_entries;
|
||||
unsigned long nr_entries, buffer_size;
|
||||
unsigned int power_size, power_seg_sz;
|
||||
|
||||
nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer);
|
||||
if (write_buffer_size && (write_buffer_size > pblk->pgs_in_buffer))
|
||||
buffer_size = write_buffer_size;
|
||||
else
|
||||
buffer_size = pblk->pgs_in_buffer;
|
||||
|
||||
nr_entries = pblk_rb_calculate_size(buffer_size);
|
||||
|
||||
entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry));
|
||||
if (!entries)
|
||||
|
@ -341,7 +354,7 @@ static int pblk_core_init(struct pblk *pblk)
|
|||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
int max_write_ppas;
|
||||
int ret, max_write_ppas;
|
||||
|
||||
atomic64_set(&pblk->user_wa, 0);
|
||||
atomic64_set(&pblk->pad_wa, 0);
|
||||
|
@ -375,33 +388,33 @@ static int pblk_core_init(struct pblk *pblk)
|
|||
goto fail_free_pad_dist;
|
||||
|
||||
/* Internal bios can be at most the sectors signaled by the device. */
|
||||
pblk->page_bio_pool = mempool_create_page_pool(NVM_MAX_VLBA, 0);
|
||||
if (!pblk->page_bio_pool)
|
||||
ret = mempool_init_page_pool(&pblk->page_bio_pool, NVM_MAX_VLBA, 0);
|
||||
if (ret)
|
||||
goto free_global_caches;
|
||||
|
||||
pblk->gen_ws_pool = mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE,
|
||||
pblk_ws_cache);
|
||||
if (!pblk->gen_ws_pool)
|
||||
ret = mempool_init_slab_pool(&pblk->gen_ws_pool, PBLK_GEN_WS_POOL_SIZE,
|
||||
pblk_ws_cache);
|
||||
if (ret)
|
||||
goto free_page_bio_pool;
|
||||
|
||||
pblk->rec_pool = mempool_create_slab_pool(geo->all_luns,
|
||||
pblk_rec_cache);
|
||||
if (!pblk->rec_pool)
|
||||
ret = mempool_init_slab_pool(&pblk->rec_pool, geo->all_luns,
|
||||
pblk_rec_cache);
|
||||
if (ret)
|
||||
goto free_gen_ws_pool;
|
||||
|
||||
pblk->r_rq_pool = mempool_create_slab_pool(geo->all_luns,
|
||||
pblk_g_rq_cache);
|
||||
if (!pblk->r_rq_pool)
|
||||
ret = mempool_init_slab_pool(&pblk->r_rq_pool, geo->all_luns,
|
||||
pblk_g_rq_cache);
|
||||
if (ret)
|
||||
goto free_rec_pool;
|
||||
|
||||
pblk->e_rq_pool = mempool_create_slab_pool(geo->all_luns,
|
||||
pblk_g_rq_cache);
|
||||
if (!pblk->e_rq_pool)
|
||||
ret = mempool_init_slab_pool(&pblk->e_rq_pool, geo->all_luns,
|
||||
pblk_g_rq_cache);
|
||||
if (ret)
|
||||
goto free_r_rq_pool;
|
||||
|
||||
pblk->w_rq_pool = mempool_create_slab_pool(geo->all_luns,
|
||||
pblk_w_rq_cache);
|
||||
if (!pblk->w_rq_pool)
|
||||
ret = mempool_init_slab_pool(&pblk->w_rq_pool, geo->all_luns,
|
||||
pblk_w_rq_cache);
|
||||
if (ret)
|
||||
goto free_e_rq_pool;
|
||||
|
||||
pblk->close_wq = alloc_workqueue("pblk-close-wq",
|
||||
|
@ -423,6 +436,7 @@ static int pblk_core_init(struct pblk *pblk)
|
|||
goto free_r_end_wq;
|
||||
|
||||
INIT_LIST_HEAD(&pblk->compl_list);
|
||||
INIT_LIST_HEAD(&pblk->resubmit_list);
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -433,17 +447,17 @@ free_bb_wq:
|
|||
free_close_wq:
|
||||
destroy_workqueue(pblk->close_wq);
|
||||
free_w_rq_pool:
|
||||
mempool_destroy(pblk->w_rq_pool);
|
||||
mempool_exit(&pblk->w_rq_pool);
|
||||
free_e_rq_pool:
|
||||
mempool_destroy(pblk->e_rq_pool);
|
||||
mempool_exit(&pblk->e_rq_pool);
|
||||
free_r_rq_pool:
|
||||
mempool_destroy(pblk->r_rq_pool);
|
||||
mempool_exit(&pblk->r_rq_pool);
|
||||
free_rec_pool:
|
||||
mempool_destroy(pblk->rec_pool);
|
||||
mempool_exit(&pblk->rec_pool);
|
||||
free_gen_ws_pool:
|
||||
mempool_destroy(pblk->gen_ws_pool);
|
||||
mempool_exit(&pblk->gen_ws_pool);
|
||||
free_page_bio_pool:
|
||||
mempool_destroy(pblk->page_bio_pool);
|
||||
mempool_exit(&pblk->page_bio_pool);
|
||||
free_global_caches:
|
||||
pblk_free_global_caches(pblk);
|
||||
fail_free_pad_dist:
|
||||
|
@ -462,12 +476,12 @@ static void pblk_core_free(struct pblk *pblk)
|
|||
if (pblk->bb_wq)
|
||||
destroy_workqueue(pblk->bb_wq);
|
||||
|
||||
mempool_destroy(pblk->page_bio_pool);
|
||||
mempool_destroy(pblk->gen_ws_pool);
|
||||
mempool_destroy(pblk->rec_pool);
|
||||
mempool_destroy(pblk->r_rq_pool);
|
||||
mempool_destroy(pblk->e_rq_pool);
|
||||
mempool_destroy(pblk->w_rq_pool);
|
||||
mempool_exit(&pblk->page_bio_pool);
|
||||
mempool_exit(&pblk->gen_ws_pool);
|
||||
mempool_exit(&pblk->rec_pool);
|
||||
mempool_exit(&pblk->r_rq_pool);
|
||||
mempool_exit(&pblk->e_rq_pool);
|
||||
mempool_exit(&pblk->w_rq_pool);
|
||||
|
||||
pblk_free_global_caches(pblk);
|
||||
kfree(pblk->pad_dist);
|
||||
|
@ -489,11 +503,17 @@ static void pblk_line_mg_free(struct pblk *pblk)
|
|||
}
|
||||
}
|
||||
|
||||
static void pblk_line_meta_free(struct pblk_line *line)
|
||||
static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg,
|
||||
struct pblk_line *line)
|
||||
{
|
||||
struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
|
||||
|
||||
kfree(line->blk_bitmap);
|
||||
kfree(line->erase_bitmap);
|
||||
kfree(line->chks);
|
||||
|
||||
pblk_mfree(w_err_gc->lba_list, l_mg->emeta_alloc_type);
|
||||
kfree(w_err_gc);
|
||||
}
|
||||
|
||||
static void pblk_lines_free(struct pblk *pblk)
|
||||
|
@ -506,8 +526,8 @@ static void pblk_lines_free(struct pblk *pblk)
|
|||
for (i = 0; i < l_mg->nr_lines; i++) {
|
||||
line = &pblk->lines[i];
|
||||
|
||||
pblk_line_free(pblk, line);
|
||||
pblk_line_meta_free(line);
|
||||
pblk_line_free(line);
|
||||
pblk_line_meta_free(l_mg, line);
|
||||
}
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
|
||||
|
@ -748,14 +768,14 @@ static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line,
|
|||
chunk->cnlb = chunk_meta->cnlb;
|
||||
chunk->wp = chunk_meta->wp;
|
||||
|
||||
if (!(chunk->state & NVM_CHK_ST_OFFLINE))
|
||||
continue;
|
||||
|
||||
if (chunk->type & NVM_CHK_TP_SZ_SPEC) {
|
||||
WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(chunk->state & NVM_CHK_ST_OFFLINE))
|
||||
continue;
|
||||
|
||||
set_bit(pos, line->blk_bitmap);
|
||||
nr_bad_chks++;
|
||||
}
|
||||
|
@ -809,20 +829,28 @@ static int pblk_alloc_line_meta(struct pblk *pblk, struct pblk_line *line)
|
|||
return -ENOMEM;
|
||||
|
||||
line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
|
||||
if (!line->erase_bitmap) {
|
||||
kfree(line->blk_bitmap);
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!line->erase_bitmap)
|
||||
goto free_blk_bitmap;
|
||||
|
||||
|
||||
line->chks = kmalloc(lm->blk_per_line * sizeof(struct nvm_chk_meta),
|
||||
GFP_KERNEL);
|
||||
if (!line->chks) {
|
||||
kfree(line->erase_bitmap);
|
||||
kfree(line->blk_bitmap);
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!line->chks)
|
||||
goto free_erase_bitmap;
|
||||
|
||||
line->w_err_gc = kzalloc(sizeof(struct pblk_w_err_gc), GFP_KERNEL);
|
||||
if (!line->w_err_gc)
|
||||
goto free_chks;
|
||||
|
||||
return 0;
|
||||
|
||||
free_chks:
|
||||
kfree(line->chks);
|
||||
free_erase_bitmap:
|
||||
kfree(line->erase_bitmap);
|
||||
free_blk_bitmap:
|
||||
kfree(line->blk_bitmap);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int pblk_line_mg_init(struct pblk *pblk)
|
||||
|
@ -847,12 +875,14 @@ static int pblk_line_mg_init(struct pblk *pblk)
|
|||
INIT_LIST_HEAD(&l_mg->gc_mid_list);
|
||||
INIT_LIST_HEAD(&l_mg->gc_low_list);
|
||||
INIT_LIST_HEAD(&l_mg->gc_empty_list);
|
||||
INIT_LIST_HEAD(&l_mg->gc_werr_list);
|
||||
|
||||
INIT_LIST_HEAD(&l_mg->emeta_list);
|
||||
|
||||
l_mg->gc_lists[0] = &l_mg->gc_high_list;
|
||||
l_mg->gc_lists[1] = &l_mg->gc_mid_list;
|
||||
l_mg->gc_lists[2] = &l_mg->gc_low_list;
|
||||
l_mg->gc_lists[0] = &l_mg->gc_werr_list;
|
||||
l_mg->gc_lists[1] = &l_mg->gc_high_list;
|
||||
l_mg->gc_lists[2] = &l_mg->gc_mid_list;
|
||||
l_mg->gc_lists[3] = &l_mg->gc_low_list;
|
||||
|
||||
spin_lock_init(&l_mg->free_lock);
|
||||
spin_lock_init(&l_mg->close_lock);
|
||||
|
@ -1047,6 +1077,11 @@ static int pblk_lines_init(struct pblk *pblk)
|
|||
nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i);
|
||||
}
|
||||
|
||||
if (!nr_free_chks) {
|
||||
pr_err("pblk: too many bad blocks prevent for sane instance\n");
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
pblk_set_provision(pblk, nr_free_chks);
|
||||
|
||||
kfree(chunk_meta);
|
||||
|
@ -1054,7 +1089,7 @@ static int pblk_lines_init(struct pblk *pblk)
|
|||
|
||||
fail_free_lines:
|
||||
while (--i >= 0)
|
||||
pblk_line_meta_free(&pblk->lines[i]);
|
||||
pblk_line_meta_free(l_mg, &pblk->lines[i]);
|
||||
kfree(pblk->lines);
|
||||
fail_free_chunk_meta:
|
||||
kfree(chunk_meta);
|
||||
|
@ -1110,23 +1145,25 @@ static void pblk_free(struct pblk *pblk)
|
|||
kfree(pblk);
|
||||
}
|
||||
|
||||
static void pblk_tear_down(struct pblk *pblk)
|
||||
static void pblk_tear_down(struct pblk *pblk, bool graceful)
|
||||
{
|
||||
pblk_pipeline_stop(pblk);
|
||||
if (graceful)
|
||||
__pblk_pipeline_flush(pblk);
|
||||
__pblk_pipeline_stop(pblk);
|
||||
pblk_writer_stop(pblk);
|
||||
pblk_rb_sync_l2p(&pblk->rwb);
|
||||
pblk_rl_free(&pblk->rl);
|
||||
|
||||
pr_debug("pblk: consistent tear down\n");
|
||||
pr_debug("pblk: consistent tear down (graceful:%d)\n", graceful);
|
||||
}
|
||||
|
||||
static void pblk_exit(void *private)
|
||||
static void pblk_exit(void *private, bool graceful)
|
||||
{
|
||||
struct pblk *pblk = private;
|
||||
|
||||
down_write(&pblk_lock);
|
||||
pblk_gc_exit(pblk);
|
||||
pblk_tear_down(pblk);
|
||||
pblk_gc_exit(pblk, graceful);
|
||||
pblk_tear_down(pblk, graceful);
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
pr_info("pblk exit: L2P CRC: %x\n", pblk_l2p_crc(pblk));
|
||||
|
@ -1175,6 +1212,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
|
|||
pblk->state = PBLK_STATE_RUNNING;
|
||||
pblk->gc.gc_enabled = 0;
|
||||
|
||||
spin_lock_init(&pblk->resubmit_lock);
|
||||
spin_lock_init(&pblk->trans_lock);
|
||||
spin_lock_init(&pblk->lock);
|
||||
|
||||
|
@ -1297,18 +1335,18 @@ static int __init pblk_module_init(void)
|
|||
{
|
||||
int ret;
|
||||
|
||||
pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
|
||||
if (!pblk_bio_set)
|
||||
return -ENOMEM;
|
||||
ret = bioset_init(&pblk_bio_set, BIO_POOL_SIZE, 0, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = nvm_register_tgt_type(&tt_pblk);
|
||||
if (ret)
|
||||
bioset_free(pblk_bio_set);
|
||||
bioset_exit(&pblk_bio_set);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void pblk_module_exit(void)
|
||||
{
|
||||
bioset_free(pblk_bio_set);
|
||||
bioset_exit(&pblk_bio_set);
|
||||
nvm_unregister_tgt_type(&tt_pblk);
|
||||
}
|
||||
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
|
||||
#include "pblk.h"
|
||||
|
||||
static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
|
||||
struct ppa_addr *ppa_list,
|
||||
unsigned long *lun_bitmap,
|
||||
struct pblk_sec_meta *meta_list,
|
||||
unsigned int valid_secs)
|
||||
static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
|
||||
struct ppa_addr *ppa_list,
|
||||
unsigned long *lun_bitmap,
|
||||
struct pblk_sec_meta *meta_list,
|
||||
unsigned int valid_secs)
|
||||
{
|
||||
struct pblk_line *line = pblk_line_get_data(pblk);
|
||||
struct pblk_emeta *emeta;
|
||||
|
@ -35,8 +35,14 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
|
|||
if (pblk_line_is_full(line)) {
|
||||
struct pblk_line *prev_line = line;
|
||||
|
||||
/* If we cannot allocate a new line, make sure to store metadata
|
||||
* on current line and then fail
|
||||
*/
|
||||
line = pblk_line_replace_data(pblk);
|
||||
pblk_line_close_meta(pblk, prev_line);
|
||||
|
||||
if (!line)
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
emeta = line->emeta;
|
||||
|
@ -74,6 +80,7 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
|
|||
}
|
||||
|
||||
pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
|
||||
|
@ -87,8 +94,12 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
|
|||
|
||||
for (i = off; i < rqd->nr_ppas; i += min) {
|
||||
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
|
||||
pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
|
||||
lun_bitmap, &meta_list[i], map_secs);
|
||||
if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
|
||||
lun_bitmap, &meta_list[i], map_secs)) {
|
||||
bio_put(rqd->bio);
|
||||
pblk_free_rqd(pblk, rqd, PBLK_WRITE);
|
||||
pblk_pipeline_stop(pblk);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -108,8 +119,12 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
|
|||
|
||||
for (i = 0; i < rqd->nr_ppas; i += min) {
|
||||
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
|
||||
pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
|
||||
lun_bitmap, &meta_list[i], map_secs);
|
||||
if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
|
||||
lun_bitmap, &meta_list[i], map_secs)) {
|
||||
bio_put(rqd->bio);
|
||||
pblk_free_rqd(pblk, rqd, PBLK_WRITE);
|
||||
pblk_pipeline_stop(pblk);
|
||||
}
|
||||
|
||||
erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);
|
||||
|
||||
|
|
|
@ -142,10 +142,9 @@ static void clean_wctx(struct pblk_w_ctx *w_ctx)
|
|||
{
|
||||
int flags;
|
||||
|
||||
try:
|
||||
flags = READ_ONCE(w_ctx->flags);
|
||||
if (!(flags & PBLK_SUBMITTED_ENTRY))
|
||||
goto try;
|
||||
WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY),
|
||||
"pblk: overwriting unsubmitted data\n");
|
||||
|
||||
/* Release flags on context. Protect from writes and reads */
|
||||
smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
|
||||
|
@ -350,7 +349,7 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
|
|||
}
|
||||
|
||||
static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
|
||||
unsigned int pos)
|
||||
unsigned int pos)
|
||||
{
|
||||
struct pblk_rb_entry *entry;
|
||||
unsigned int sync, flush_point;
|
||||
|
@ -420,7 +419,7 @@ void pblk_rb_flush(struct pblk_rb *rb)
|
|||
if (pblk_rb_flush_point_set(rb, NULL, mem))
|
||||
return;
|
||||
|
||||
pblk_write_should_kick(pblk);
|
||||
pblk_write_kick(pblk);
|
||||
}
|
||||
|
||||
static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
|
||||
|
@ -503,45 +502,6 @@ int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
|
|||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller of this function must ensure that the backpointer will not
|
||||
* overwrite the entries passed on the list.
|
||||
*/
|
||||
unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
|
||||
struct list_head *list,
|
||||
unsigned int max)
|
||||
{
|
||||
struct pblk_rb_entry *entry, *tentry;
|
||||
struct page *page;
|
||||
unsigned int read = 0;
|
||||
int ret;
|
||||
|
||||
list_for_each_entry_safe(entry, tentry, list, index) {
|
||||
if (read > max) {
|
||||
pr_err("pblk: too many entries on list\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
page = virt_to_page(entry->data);
|
||||
if (!page) {
|
||||
pr_err("pblk: could not allocate write bio page\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = bio_add_page(bio, page, rb->seg_size, 0);
|
||||
if (ret != rb->seg_size) {
|
||||
pr_err("pblk: could not add page to write bio\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
list_del(&entry->index);
|
||||
read++;
|
||||
}
|
||||
|
||||
out:
|
||||
return read;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read available entries on rb and add them to the given bio. To avoid a memory
|
||||
* copy, a page reference to the write buffer is used to be added to the bio.
|
||||
|
|
|
@ -39,10 +39,10 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
|
|||
}
|
||||
|
||||
static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
sector_t blba, unsigned long *read_bitmap)
|
||||
struct bio *bio, sector_t blba,
|
||||
unsigned long *read_bitmap)
|
||||
{
|
||||
struct pblk_sec_meta *meta_list = rqd->meta_list;
|
||||
struct bio *bio = rqd->bio;
|
||||
struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS];
|
||||
int nr_secs = rqd->nr_ppas;
|
||||
bool advanced_bio = false;
|
||||
|
@ -102,34 +102,71 @@ next:
|
|||
#endif
|
||||
}
|
||||
|
||||
static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
|
||||
|
||||
static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
sector_t blba)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = pblk_submit_io(pblk, rqd);
|
||||
if (err)
|
||||
return NVM_IO_ERR;
|
||||
|
||||
return NVM_IO_OK;
|
||||
}
|
||||
|
||||
static void pblk_read_check(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
sector_t blba)
|
||||
{
|
||||
struct pblk_sec_meta *meta_list = rqd->meta_list;
|
||||
struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
|
||||
int nr_lbas = rqd->nr_ppas;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_lbas; i++) {
|
||||
u64 lba = le64_to_cpu(meta_list[i].lba);
|
||||
u64 lba = le64_to_cpu(meta_lba_list[i].lba);
|
||||
|
||||
if (lba == ADDR_EMPTY)
|
||||
continue;
|
||||
|
||||
WARN(lba != blba + i, "pblk: corrupted read LBA\n");
|
||||
if (lba != blba + i) {
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
struct ppa_addr *p;
|
||||
|
||||
p = (nr_lbas == 1) ? &rqd->ppa_list[i] : &rqd->ppa_addr;
|
||||
print_ppa(&pblk->dev->geo, p, "seq", i);
|
||||
#endif
|
||||
pr_err("pblk: corrupted read LBA (%llu/%llu)\n",
|
||||
lba, (u64)blba + i);
|
||||
WARN_ON(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* There can be holes in the lba list.
|
||||
*/
|
||||
static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
u64 *lba_list, int nr_lbas)
|
||||
{
|
||||
struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
|
||||
int i, j;
|
||||
|
||||
for (i = 0, j = 0; i < nr_lbas; i++) {
|
||||
u64 lba = lba_list[i];
|
||||
u64 meta_lba;
|
||||
|
||||
if (lba == ADDR_EMPTY)
|
||||
continue;
|
||||
|
||||
meta_lba = le64_to_cpu(meta_lba_list[j].lba);
|
||||
|
||||
if (lba != meta_lba) {
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
struct ppa_addr *p;
|
||||
int nr_ppas = rqd->nr_ppas;
|
||||
|
||||
p = (nr_ppas == 1) ? &rqd->ppa_list[j] : &rqd->ppa_addr;
|
||||
print_ppa(&pblk->dev->geo, p, "seq", j);
|
||||
#endif
|
||||
pr_err("pblk: corrupted read LBA (%llu/%llu)\n",
|
||||
lba, meta_lba);
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
j++;
|
||||
}
|
||||
|
||||
WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n");
|
||||
}
|
||||
|
||||
static void pblk_read_put_rqd_kref(struct pblk *pblk, struct nvm_rq *rqd)
|
||||
{
|
||||
struct ppa_addr *ppa_list;
|
||||
|
@ -152,7 +189,6 @@ static void pblk_end_user_read(struct bio *bio)
|
|||
WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n");
|
||||
#endif
|
||||
bio_endio(bio);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
|
@ -160,23 +196,18 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
|
|||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
|
||||
struct bio *bio = rqd->bio;
|
||||
struct bio *int_bio = rqd->bio;
|
||||
unsigned long start_time = r_ctx->start_time;
|
||||
|
||||
generic_end_io_acct(dev->q, READ, &pblk->disk->part0, start_time);
|
||||
|
||||
if (rqd->error)
|
||||
pblk_log_read_err(pblk, rqd);
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
else
|
||||
WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
|
||||
#endif
|
||||
|
||||
pblk_read_check(pblk, rqd, r_ctx->lba);
|
||||
pblk_read_check_seq(pblk, rqd, r_ctx->lba);
|
||||
|
||||
bio_put(bio);
|
||||
if (r_ctx->private)
|
||||
pblk_end_user_read((struct bio *)r_ctx->private);
|
||||
if (int_bio)
|
||||
bio_put(int_bio);
|
||||
|
||||
if (put_line)
|
||||
pblk_read_put_rqd_kref(pblk, rqd);
|
||||
|
@ -193,16 +224,19 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
|
|||
static void pblk_end_io_read(struct nvm_rq *rqd)
|
||||
{
|
||||
struct pblk *pblk = rqd->private;
|
||||
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
|
||||
struct bio *bio = (struct bio *)r_ctx->private;
|
||||
|
||||
pblk_end_user_read(bio);
|
||||
__pblk_end_io_read(pblk, rqd, true);
|
||||
}
|
||||
|
||||
static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
unsigned int bio_init_idx,
|
||||
unsigned long *read_bitmap)
|
||||
static int pblk_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
struct bio *orig_bio, unsigned int bio_init_idx,
|
||||
unsigned long *read_bitmap)
|
||||
{
|
||||
struct bio *new_bio, *bio = rqd->bio;
|
||||
struct pblk_sec_meta *meta_list = rqd->meta_list;
|
||||
struct bio *new_bio;
|
||||
struct bio_vec src_bv, dst_bv;
|
||||
void *ppa_ptr = NULL;
|
||||
void *src_p, *dst_p;
|
||||
|
@ -219,11 +253,11 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
|
|||
new_bio = bio_alloc(GFP_KERNEL, nr_holes);
|
||||
|
||||
if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
|
||||
goto err;
|
||||
goto fail_add_pages;
|
||||
|
||||
if (nr_holes != new_bio->bi_vcnt) {
|
||||
pr_err("pblk: malformed bio\n");
|
||||
goto err;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_secs; i++)
|
||||
|
@ -246,7 +280,7 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
|
|||
if (ret) {
|
||||
bio_put(rqd->bio);
|
||||
pr_err("pblk: sync read IO submission failed\n");
|
||||
goto err;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (rqd->error) {
|
||||
|
@ -282,7 +316,7 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
|
|||
meta_list[hole].lba = lba_list_media[i];
|
||||
|
||||
src_bv = new_bio->bi_io_vec[i++];
|
||||
dst_bv = bio->bi_io_vec[bio_init_idx + hole];
|
||||
dst_bv = orig_bio->bi_io_vec[bio_init_idx + hole];
|
||||
|
||||
src_p = kmap_atomic(src_bv.bv_page);
|
||||
dst_p = kmap_atomic(dst_bv.bv_page);
|
||||
|
@ -294,35 +328,33 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
|
|||
kunmap_atomic(src_p);
|
||||
kunmap_atomic(dst_p);
|
||||
|
||||
mempool_free(src_bv.bv_page, pblk->page_bio_pool);
|
||||
mempool_free(src_bv.bv_page, &pblk->page_bio_pool);
|
||||
|
||||
hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1);
|
||||
} while (hole < nr_secs);
|
||||
|
||||
bio_put(new_bio);
|
||||
|
||||
/* Complete the original bio and associated request */
|
||||
bio_endio(bio);
|
||||
rqd->bio = bio;
|
||||
/* restore original request */
|
||||
rqd->bio = NULL;
|
||||
rqd->nr_ppas = nr_secs;
|
||||
|
||||
__pblk_end_io_read(pblk, rqd, false);
|
||||
return NVM_IO_OK;
|
||||
|
||||
err:
|
||||
pr_err("pblk: failed to perform partial read\n");
|
||||
return NVM_IO_DONE;
|
||||
|
||||
fail:
|
||||
/* Free allocated pages in new bio */
|
||||
pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt);
|
||||
pblk_bio_free_pages(pblk, new_bio, 0, new_bio->bi_vcnt);
|
||||
fail_add_pages:
|
||||
pr_err("pblk: failed to perform partial read\n");
|
||||
__pblk_end_io_read(pblk, rqd, false);
|
||||
return NVM_IO_ERR;
|
||||
}
|
||||
|
||||
static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
|
||||
sector_t lba, unsigned long *read_bitmap)
|
||||
{
|
||||
struct pblk_sec_meta *meta_list = rqd->meta_list;
|
||||
struct bio *bio = rqd->bio;
|
||||
struct ppa_addr ppa;
|
||||
|
||||
pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
|
||||
|
@ -386,14 +418,15 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
|
|||
rqd = pblk_alloc_rqd(pblk, PBLK_READ);
|
||||
|
||||
rqd->opcode = NVM_OP_PREAD;
|
||||
rqd->bio = bio;
|
||||
rqd->nr_ppas = nr_secs;
|
||||
rqd->bio = NULL; /* cloned bio if needed */
|
||||
rqd->private = pblk;
|
||||
rqd->end_io = pblk_end_io_read;
|
||||
|
||||
r_ctx = nvm_rq_to_pdu(rqd);
|
||||
r_ctx->start_time = jiffies;
|
||||
r_ctx->lba = blba;
|
||||
r_ctx->private = bio; /* original bio */
|
||||
|
||||
/* Save the index for this bio's start. This is needed in case
|
||||
* we need to fill a partial read.
|
||||
|
@ -411,17 +444,15 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
|
|||
rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
|
||||
rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
|
||||
|
||||
pblk_read_ppalist_rq(pblk, rqd, blba, &read_bitmap);
|
||||
pblk_read_ppalist_rq(pblk, rqd, bio, blba, &read_bitmap);
|
||||
} else {
|
||||
pblk_read_rq(pblk, rqd, blba, &read_bitmap);
|
||||
pblk_read_rq(pblk, rqd, bio, blba, &read_bitmap);
|
||||
}
|
||||
|
||||
bio_get(bio);
|
||||
if (bitmap_full(&read_bitmap, nr_secs)) {
|
||||
bio_endio(bio);
|
||||
atomic_inc(&pblk->inflight_io);
|
||||
__pblk_end_io_read(pblk, rqd, false);
|
||||
return NVM_IO_OK;
|
||||
return NVM_IO_DONE;
|
||||
}
|
||||
|
||||
/* All sectors are to be read from the device */
|
||||
|
@ -429,20 +460,17 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
|
|||
struct bio *int_bio = NULL;
|
||||
|
||||
/* Clone read bio to deal with read errors internally */
|
||||
int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set);
|
||||
int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
|
||||
if (!int_bio) {
|
||||
pr_err("pblk: could not clone read bio\n");
|
||||
goto fail_end_io;
|
||||
}
|
||||
|
||||
rqd->bio = int_bio;
|
||||
r_ctx->private = bio;
|
||||
|
||||
ret = pblk_submit_read_io(pblk, rqd);
|
||||
if (ret) {
|
||||
if (pblk_submit_io(pblk, rqd)) {
|
||||
pr_err("pblk: read IO submission failed\n");
|
||||
if (int_bio)
|
||||
bio_put(int_bio);
|
||||
ret = NVM_IO_ERR;
|
||||
goto fail_end_io;
|
||||
}
|
||||
|
||||
|
@ -452,7 +480,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
|
|||
/* The read bio request could be partially filled by the write buffer,
|
||||
* but there are some holes that need to be read from the drive.
|
||||
*/
|
||||
return pblk_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap);
|
||||
return pblk_partial_read(pblk, rqd, bio, bio_init_idx, &read_bitmap);
|
||||
|
||||
fail_rqd_free:
|
||||
pblk_free_rqd(pblk, rqd, PBLK_READ);
|
||||
|
@ -585,6 +613,8 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
|
|||
goto err_free_bio;
|
||||
}
|
||||
|
||||
pblk_read_check_rand(pblk, &rqd, gc_rq->lba_list, gc_rq->nr_secs);
|
||||
|
||||
atomic_dec(&pblk->inflight_io);
|
||||
|
||||
if (rqd.error) {
|
||||
|
|
|
@ -16,97 +16,6 @@
|
|||
|
||||
#include "pblk.h"
|
||||
|
||||
void pblk_submit_rec(struct work_struct *work)
|
||||
{
|
||||
struct pblk_rec_ctx *recovery =
|
||||
container_of(work, struct pblk_rec_ctx, ws_rec);
|
||||
struct pblk *pblk = recovery->pblk;
|
||||
struct nvm_rq *rqd = recovery->rqd;
|
||||
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
|
||||
struct bio *bio;
|
||||
unsigned int nr_rec_secs;
|
||||
unsigned int pgs_read;
|
||||
int ret;
|
||||
|
||||
nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status,
|
||||
NVM_MAX_VLBA);
|
||||
|
||||
bio = bio_alloc(GFP_KERNEL, nr_rec_secs);
|
||||
|
||||
bio->bi_iter.bi_sector = 0;
|
||||
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
|
||||
rqd->bio = bio;
|
||||
rqd->nr_ppas = nr_rec_secs;
|
||||
|
||||
pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed,
|
||||
nr_rec_secs);
|
||||
if (pgs_read != nr_rec_secs) {
|
||||
pr_err("pblk: could not read recovery entries\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) {
|
||||
pr_err("pblk: could not setup recovery request\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
atomic_long_add(nr_rec_secs, &pblk->recov_writes);
|
||||
#endif
|
||||
|
||||
ret = pblk_submit_io(pblk, rqd);
|
||||
if (ret) {
|
||||
pr_err("pblk: I/O submission failed: %d\n", ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
mempool_free(recovery, pblk->rec_pool);
|
||||
return;
|
||||
|
||||
err:
|
||||
bio_put(bio);
|
||||
pblk_free_rqd(pblk, rqd, PBLK_WRITE);
|
||||
}
|
||||
|
||||
int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
|
||||
struct pblk_rec_ctx *recovery, u64 *comp_bits,
|
||||
unsigned int comp)
|
||||
{
|
||||
struct nvm_rq *rec_rqd;
|
||||
struct pblk_c_ctx *rec_ctx;
|
||||
int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded;
|
||||
|
||||
rec_rqd = pblk_alloc_rqd(pblk, PBLK_WRITE);
|
||||
rec_ctx = nvm_rq_to_pdu(rec_rqd);
|
||||
|
||||
/* Copy completion bitmap, but exclude the first X completed entries */
|
||||
bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status,
|
||||
(unsigned long int *)comp_bits,
|
||||
comp, NVM_MAX_VLBA);
|
||||
|
||||
/* Save the context for the entries that need to be re-written and
|
||||
* update current context with the completed entries.
|
||||
*/
|
||||
rec_ctx->sentry = pblk_rb_wrap_pos(&pblk->rwb, c_ctx->sentry + comp);
|
||||
if (comp >= c_ctx->nr_valid) {
|
||||
rec_ctx->nr_valid = 0;
|
||||
rec_ctx->nr_padded = nr_entries - comp;
|
||||
|
||||
c_ctx->nr_padded = comp - c_ctx->nr_valid;
|
||||
} else {
|
||||
rec_ctx->nr_valid = c_ctx->nr_valid - comp;
|
||||
rec_ctx->nr_padded = c_ctx->nr_padded;
|
||||
|
||||
c_ctx->nr_valid = comp;
|
||||
c_ctx->nr_padded = 0;
|
||||
}
|
||||
|
||||
recovery->rqd = rec_rqd;
|
||||
recovery->pblk = pblk;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf)
|
||||
{
|
||||
u32 crc;
|
||||
|
@ -865,18 +774,30 @@ static void pblk_recov_wa_counters(struct pblk *pblk,
|
|||
}
|
||||
|
||||
static int pblk_line_was_written(struct pblk_line *line,
|
||||
struct pblk_line_meta *lm)
|
||||
struct pblk *pblk)
|
||||
{
|
||||
|
||||
int i;
|
||||
int state_mask = NVM_CHK_ST_OFFLINE | NVM_CHK_ST_FREE;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct nvm_chk_meta *chunk;
|
||||
struct ppa_addr bppa;
|
||||
int smeta_blk;
|
||||
|
||||
for (i = 0; i < lm->blk_per_line; i++) {
|
||||
if (!(line->chks[i].state & state_mask))
|
||||
return 1;
|
||||
}
|
||||
if (line->state == PBLK_LINESTATE_BAD)
|
||||
return 0;
|
||||
|
||||
return 0;
|
||||
smeta_blk = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
|
||||
if (smeta_blk >= lm->blk_per_line)
|
||||
return 0;
|
||||
|
||||
bppa = pblk->luns[smeta_blk].bppa;
|
||||
chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)];
|
||||
|
||||
if (chunk->state & NVM_CHK_ST_FREE)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
|
||||
|
@ -915,7 +836,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
|
|||
line->lun_bitmap = ((void *)(smeta_buf)) +
|
||||
sizeof(struct line_smeta);
|
||||
|
||||
if (!pblk_line_was_written(line, lm))
|
||||
if (!pblk_line_was_written(line, pblk))
|
||||
continue;
|
||||
|
||||
/* Lines that cannot be read are assumed as not written here */
|
||||
|
|
|
@ -73,6 +73,16 @@ void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
|
|||
pblk_rl_kick_u_timer(rl);
|
||||
}
|
||||
|
||||
void pblk_rl_werr_line_in(struct pblk_rl *rl)
|
||||
{
|
||||
atomic_inc(&rl->werr_lines);
|
||||
}
|
||||
|
||||
void pblk_rl_werr_line_out(struct pblk_rl *rl)
|
||||
{
|
||||
atomic_dec(&rl->werr_lines);
|
||||
}
|
||||
|
||||
void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries)
|
||||
{
|
||||
atomic_add(nr_entries, &rl->rb_gc_cnt);
|
||||
|
@ -99,11 +109,21 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl,
|
|||
{
|
||||
struct pblk *pblk = container_of(rl, struct pblk, rl);
|
||||
int max = rl->rb_budget;
|
||||
int werr_gc_needed = atomic_read(&rl->werr_lines);
|
||||
|
||||
if (free_blocks >= rl->high) {
|
||||
rl->rb_user_max = max;
|
||||
rl->rb_gc_max = 0;
|
||||
rl->rb_state = PBLK_RL_HIGH;
|
||||
if (werr_gc_needed) {
|
||||
/* Allocate a small budget for recovering
|
||||
* lines with write errors
|
||||
*/
|
||||
rl->rb_gc_max = 1 << rl->rb_windows_pw;
|
||||
rl->rb_user_max = max - rl->rb_gc_max;
|
||||
rl->rb_state = PBLK_RL_WERR;
|
||||
} else {
|
||||
rl->rb_user_max = max;
|
||||
rl->rb_gc_max = 0;
|
||||
rl->rb_state = PBLK_RL_OFF;
|
||||
}
|
||||
} else if (free_blocks < rl->high) {
|
||||
int shift = rl->high_pw - rl->rb_windows_pw;
|
||||
int user_windows = free_blocks >> shift;
|
||||
|
@ -124,7 +144,7 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl,
|
|||
rl->rb_state = PBLK_RL_LOW;
|
||||
}
|
||||
|
||||
if (rl->rb_state == (PBLK_RL_MID | PBLK_RL_LOW))
|
||||
if (rl->rb_state != PBLK_RL_OFF)
|
||||
pblk_gc_should_start(pblk);
|
||||
else
|
||||
pblk_gc_should_stop(pblk);
|
||||
|
@ -221,6 +241,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget)
|
|||
atomic_set(&rl->rb_user_cnt, 0);
|
||||
atomic_set(&rl->rb_gc_cnt, 0);
|
||||
atomic_set(&rl->rb_space, -1);
|
||||
atomic_set(&rl->werr_lines, 0);
|
||||
|
||||
timer_setup(&rl->u_timer, pblk_rl_u_timer, 0);
|
||||
|
||||
|
|
|
@ -173,6 +173,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
|
|||
int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
|
||||
int d_line_cnt = 0, l_line_cnt = 0;
|
||||
int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
|
||||
int gc_werr = 0;
|
||||
|
||||
int bad = 0, cor = 0;
|
||||
int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
|
||||
int map_weight = 0, meta_weight = 0;
|
||||
|
@ -237,6 +239,15 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
|
|||
gc_empty++;
|
||||
}
|
||||
|
||||
list_for_each_entry(line, &l_mg->gc_werr_list, list) {
|
||||
if (line->type == PBLK_LINETYPE_DATA)
|
||||
d_line_cnt++;
|
||||
else if (line->type == PBLK_LINETYPE_LOG)
|
||||
l_line_cnt++;
|
||||
closed_line_cnt++;
|
||||
gc_werr++;
|
||||
}
|
||||
|
||||
list_for_each_entry(line, &l_mg->bad_list, list)
|
||||
bad++;
|
||||
list_for_each_entry(line, &l_mg->corrupt_list, list)
|
||||
|
@ -275,8 +286,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
|
|||
l_mg->nr_lines);
|
||||
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
"GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n",
|
||||
gc_full, gc_high, gc_mid, gc_low, gc_empty,
|
||||
"GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n",
|
||||
gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr,
|
||||
atomic_read(&pblk->gc.read_inflight_gc));
|
||||
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
|
|
|
@ -103,68 +103,150 @@ retry:
|
|||
pblk_rb_sync_end(&pblk->rwb, &flags);
|
||||
}
|
||||
|
||||
/* When a write fails, we are not sure whether the block has grown bad or a page
|
||||
* range is more susceptible to write errors. If a high number of pages fail, we
|
||||
* assume that the block is bad and we mark it accordingly. In all cases, we
|
||||
* remap and resubmit the failed entries as fast as possible; if a flush is
|
||||
* waiting on a completion, the whole stack would stall otherwise.
|
||||
*/
|
||||
/* Map remaining sectors in chunk, starting from ppa */
|
||||
static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_line *line;
|
||||
struct ppa_addr map_ppa = *ppa;
|
||||
u64 paddr;
|
||||
int done = 0;
|
||||
|
||||
line = &pblk->lines[pblk_ppa_to_line(*ppa)];
|
||||
spin_lock(&line->lock);
|
||||
|
||||
while (!done) {
|
||||
paddr = pblk_dev_ppa_to_line_addr(pblk, map_ppa);
|
||||
|
||||
if (!test_and_set_bit(paddr, line->map_bitmap))
|
||||
line->left_msecs--;
|
||||
|
||||
if (!test_and_set_bit(paddr, line->invalid_bitmap))
|
||||
le32_add_cpu(line->vsc, -1);
|
||||
|
||||
if (geo->version == NVM_OCSSD_SPEC_12) {
|
||||
map_ppa.ppa++;
|
||||
if (map_ppa.g.pg == geo->num_pg)
|
||||
done = 1;
|
||||
} else {
|
||||
map_ppa.m.sec++;
|
||||
if (map_ppa.m.sec == geo->clba)
|
||||
done = 1;
|
||||
}
|
||||
}
|
||||
|
||||
line->w_err_gc->has_write_err = 1;
|
||||
spin_unlock(&line->lock);
|
||||
}
|
||||
|
||||
static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry,
|
||||
unsigned int nr_entries)
|
||||
{
|
||||
struct pblk_rb *rb = &pblk->rwb;
|
||||
struct pblk_rb_entry *entry;
|
||||
struct pblk_line *line;
|
||||
struct pblk_w_ctx *w_ctx;
|
||||
struct ppa_addr ppa_l2p;
|
||||
int flags;
|
||||
unsigned int pos, i;
|
||||
|
||||
spin_lock(&pblk->trans_lock);
|
||||
pos = sentry;
|
||||
for (i = 0; i < nr_entries; i++) {
|
||||
entry = &rb->entries[pos];
|
||||
w_ctx = &entry->w_ctx;
|
||||
|
||||
/* Check if the lba has been overwritten */
|
||||
ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba);
|
||||
if (!pblk_ppa_comp(ppa_l2p, entry->cacheline))
|
||||
w_ctx->lba = ADDR_EMPTY;
|
||||
|
||||
/* Mark up the entry as submittable again */
|
||||
flags = READ_ONCE(w_ctx->flags);
|
||||
flags |= PBLK_WRITTEN_DATA;
|
||||
/* Release flags on write context. Protect from writes */
|
||||
smp_store_release(&w_ctx->flags, flags);
|
||||
|
||||
/* Decrese the reference count to the line as we will
|
||||
* re-map these entries
|
||||
*/
|
||||
line = &pblk->lines[pblk_ppa_to_line(w_ctx->ppa)];
|
||||
kref_put(&line->ref, pblk_line_put);
|
||||
|
||||
pos = (pos + 1) & (rb->nr_entries - 1);
|
||||
}
|
||||
spin_unlock(&pblk->trans_lock);
|
||||
}
|
||||
|
||||
static void pblk_queue_resubmit(struct pblk *pblk, struct pblk_c_ctx *c_ctx)
|
||||
{
|
||||
struct pblk_c_ctx *r_ctx;
|
||||
|
||||
r_ctx = kzalloc(sizeof(struct pblk_c_ctx), GFP_KERNEL);
|
||||
if (!r_ctx)
|
||||
return;
|
||||
|
||||
r_ctx->lun_bitmap = NULL;
|
||||
r_ctx->sentry = c_ctx->sentry;
|
||||
r_ctx->nr_valid = c_ctx->nr_valid;
|
||||
r_ctx->nr_padded = c_ctx->nr_padded;
|
||||
|
||||
spin_lock(&pblk->resubmit_lock);
|
||||
list_add_tail(&r_ctx->list, &pblk->resubmit_list);
|
||||
spin_unlock(&pblk->resubmit_lock);
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
atomic_long_add(c_ctx->nr_valid, &pblk->recov_writes);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void pblk_submit_rec(struct work_struct *work)
|
||||
{
|
||||
struct pblk_rec_ctx *recovery =
|
||||
container_of(work, struct pblk_rec_ctx, ws_rec);
|
||||
struct pblk *pblk = recovery->pblk;
|
||||
struct nvm_rq *rqd = recovery->rqd;
|
||||
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
|
||||
struct ppa_addr *ppa_list;
|
||||
|
||||
pblk_log_write_err(pblk, rqd);
|
||||
|
||||
if (rqd->nr_ppas == 1)
|
||||
ppa_list = &rqd->ppa_addr;
|
||||
else
|
||||
ppa_list = rqd->ppa_list;
|
||||
|
||||
pblk_map_remaining(pblk, ppa_list);
|
||||
pblk_queue_resubmit(pblk, c_ctx);
|
||||
|
||||
pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap);
|
||||
if (c_ctx->nr_padded)
|
||||
pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid,
|
||||
c_ctx->nr_padded);
|
||||
bio_put(rqd->bio);
|
||||
pblk_free_rqd(pblk, rqd, PBLK_WRITE);
|
||||
mempool_free(recovery, &pblk->rec_pool);
|
||||
|
||||
atomic_dec(&pblk->inflight_io);
|
||||
}
|
||||
|
||||
|
||||
static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
|
||||
{
|
||||
void *comp_bits = &rqd->ppa_status;
|
||||
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
|
||||
struct pblk_rec_ctx *recovery;
|
||||
struct ppa_addr *ppa_list = rqd->ppa_list;
|
||||
int nr_ppas = rqd->nr_ppas;
|
||||
unsigned int c_entries;
|
||||
int bit, ret;
|
||||
|
||||
if (unlikely(nr_ppas == 1))
|
||||
ppa_list = &rqd->ppa_addr;
|
||||
|
||||
recovery = mempool_alloc(pblk->rec_pool, GFP_ATOMIC);
|
||||
|
||||
INIT_LIST_HEAD(&recovery->failed);
|
||||
|
||||
bit = -1;
|
||||
while ((bit = find_next_bit(comp_bits, nr_ppas, bit + 1)) < nr_ppas) {
|
||||
struct pblk_rb_entry *entry;
|
||||
struct ppa_addr ppa;
|
||||
|
||||
/* Logic error */
|
||||
if (bit > c_ctx->nr_valid) {
|
||||
WARN_ONCE(1, "pblk: corrupted write request\n");
|
||||
mempool_free(recovery, pblk->rec_pool);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ppa = ppa_list[bit];
|
||||
entry = pblk_rb_sync_scan_entry(&pblk->rwb, &ppa);
|
||||
if (!entry) {
|
||||
pr_err("pblk: could not scan entry on write failure\n");
|
||||
mempool_free(recovery, pblk->rec_pool);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* The list is filled first and emptied afterwards. No need for
|
||||
* protecting it with a lock
|
||||
*/
|
||||
list_add_tail(&entry->index, &recovery->failed);
|
||||
recovery = mempool_alloc(&pblk->rec_pool, GFP_ATOMIC);
|
||||
if (!recovery) {
|
||||
pr_err("pblk: could not allocate recovery work\n");
|
||||
return;
|
||||
}
|
||||
|
||||
c_entries = find_first_bit(comp_bits, nr_ppas);
|
||||
ret = pblk_recov_setup_rq(pblk, c_ctx, recovery, comp_bits, c_entries);
|
||||
if (ret) {
|
||||
pr_err("pblk: could not recover from write failure\n");
|
||||
mempool_free(recovery, pblk->rec_pool);
|
||||
goto out;
|
||||
}
|
||||
recovery->pblk = pblk;
|
||||
recovery->rqd = rqd;
|
||||
|
||||
INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
|
||||
queue_work(pblk->close_wq, &recovery->ws_rec);
|
||||
|
||||
out:
|
||||
pblk_complete_write(pblk, rqd, c_ctx);
|
||||
}
|
||||
|
||||
static void pblk_end_io_write(struct nvm_rq *rqd)
|
||||
|
@ -173,8 +255,8 @@ static void pblk_end_io_write(struct nvm_rq *rqd)
|
|||
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
|
||||
|
||||
if (rqd->error) {
|
||||
pblk_log_write_err(pblk, rqd);
|
||||
return pblk_end_w_fail(pblk, rqd);
|
||||
pblk_end_w_fail(pblk, rqd);
|
||||
return;
|
||||
}
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
else
|
||||
|
@ -198,6 +280,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
|
|||
if (rqd->error) {
|
||||
pblk_log_write_err(pblk, rqd);
|
||||
pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
|
||||
line->w_err_gc->has_write_err = 1;
|
||||
}
|
||||
|
||||
sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
|
||||
|
@ -266,31 +349,6 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
struct pblk_c_ctx *c_ctx)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
unsigned long *lun_bitmap;
|
||||
int ret;
|
||||
|
||||
lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
|
||||
if (!lun_bitmap)
|
||||
return -ENOMEM;
|
||||
|
||||
c_ctx->lun_bitmap = lun_bitmap;
|
||||
|
||||
ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, c_ctx->nr_valid, 0);
|
||||
|
||||
rqd->ppa_status = (u64)0;
|
||||
rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
|
||||
unsigned int secs_to_flush)
|
||||
{
|
||||
|
@ -339,6 +397,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
|
|||
bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
|
||||
l_mg->emeta_alloc_type, GFP_KERNEL);
|
||||
if (IS_ERR(bio)) {
|
||||
pr_err("pblk: failed to map emeta io");
|
||||
ret = PTR_ERR(bio);
|
||||
goto fail_free_rqd;
|
||||
}
|
||||
|
@ -515,27 +574,55 @@ static int pblk_submit_write(struct pblk *pblk)
|
|||
unsigned int secs_avail, secs_to_sync, secs_to_com;
|
||||
unsigned int secs_to_flush;
|
||||
unsigned long pos;
|
||||
unsigned int resubmit;
|
||||
|
||||
/* If there are no sectors in the cache, flushes (bios without data)
|
||||
* will be cleared on the cache threads
|
||||
*/
|
||||
secs_avail = pblk_rb_read_count(&pblk->rwb);
|
||||
if (!secs_avail)
|
||||
return 1;
|
||||
spin_lock(&pblk->resubmit_lock);
|
||||
resubmit = !list_empty(&pblk->resubmit_list);
|
||||
spin_unlock(&pblk->resubmit_lock);
|
||||
|
||||
secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
|
||||
if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
|
||||
return 1;
|
||||
/* Resubmit failed writes first */
|
||||
if (resubmit) {
|
||||
struct pblk_c_ctx *r_ctx;
|
||||
|
||||
secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush);
|
||||
if (secs_to_sync > pblk->max_write_pgs) {
|
||||
pr_err("pblk: bad buffer sync calculation\n");
|
||||
return 1;
|
||||
spin_lock(&pblk->resubmit_lock);
|
||||
r_ctx = list_first_entry(&pblk->resubmit_list,
|
||||
struct pblk_c_ctx, list);
|
||||
list_del(&r_ctx->list);
|
||||
spin_unlock(&pblk->resubmit_lock);
|
||||
|
||||
secs_avail = r_ctx->nr_valid;
|
||||
pos = r_ctx->sentry;
|
||||
|
||||
pblk_prepare_resubmit(pblk, pos, secs_avail);
|
||||
secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
|
||||
secs_avail);
|
||||
|
||||
kfree(r_ctx);
|
||||
} else {
|
||||
/* If there are no sectors in the cache,
|
||||
* flushes (bios without data) will be cleared on
|
||||
* the cache threads
|
||||
*/
|
||||
secs_avail = pblk_rb_read_count(&pblk->rwb);
|
||||
if (!secs_avail)
|
||||
return 1;
|
||||
|
||||
secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
|
||||
if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
|
||||
return 1;
|
||||
|
||||
secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
|
||||
secs_to_flush);
|
||||
if (secs_to_sync > pblk->max_write_pgs) {
|
||||
pr_err("pblk: bad buffer sync calculation\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
secs_to_com = (secs_to_sync > secs_avail) ?
|
||||
secs_avail : secs_to_sync;
|
||||
pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
|
||||
}
|
||||
|
||||
secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
|
||||
pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
|
||||
|
||||
bio = bio_alloc(GFP_KERNEL, secs_to_sync);
|
||||
|
||||
bio->bi_iter.bi_sector = 0; /* internal bio */
|
||||
|
|
|
@ -89,12 +89,14 @@ struct pblk_sec_meta {
|
|||
/* The number of GC lists and the rate-limiter states go together. This way the
|
||||
* rate-limiter can dictate how much GC is needed based on resource utilization.
|
||||
*/
|
||||
#define PBLK_GC_NR_LISTS 3
|
||||
#define PBLK_GC_NR_LISTS 4
|
||||
|
||||
enum {
|
||||
PBLK_RL_HIGH = 1,
|
||||
PBLK_RL_MID = 2,
|
||||
PBLK_RL_LOW = 3,
|
||||
PBLK_RL_OFF = 0,
|
||||
PBLK_RL_WERR = 1,
|
||||
PBLK_RL_HIGH = 2,
|
||||
PBLK_RL_MID = 3,
|
||||
PBLK_RL_LOW = 4
|
||||
};
|
||||
|
||||
#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
|
||||
|
@ -128,7 +130,6 @@ struct pblk_pad_rq {
|
|||
struct pblk_rec_ctx {
|
||||
struct pblk *pblk;
|
||||
struct nvm_rq *rqd;
|
||||
struct list_head failed;
|
||||
struct work_struct ws_rec;
|
||||
};
|
||||
|
||||
|
@ -279,6 +280,8 @@ struct pblk_rl {
|
|||
int rb_user_active;
|
||||
int rb_gc_active;
|
||||
|
||||
atomic_t werr_lines; /* Number of write error lines that needs gc */
|
||||
|
||||
struct timer_list u_timer;
|
||||
|
||||
unsigned long long nr_secs;
|
||||
|
@ -312,6 +315,7 @@ enum {
|
|||
PBLK_LINEGC_MID = 23,
|
||||
PBLK_LINEGC_HIGH = 24,
|
||||
PBLK_LINEGC_FULL = 25,
|
||||
PBLK_LINEGC_WERR = 26
|
||||
};
|
||||
|
||||
#define PBLK_MAGIC 0x70626c6b /*pblk*/
|
||||
|
@ -413,6 +417,11 @@ struct pblk_smeta {
|
|||
struct line_smeta *buf; /* smeta buffer in persistent format */
|
||||
};
|
||||
|
||||
struct pblk_w_err_gc {
|
||||
int has_write_err;
|
||||
__le64 *lba_list;
|
||||
};
|
||||
|
||||
struct pblk_line {
|
||||
struct pblk *pblk;
|
||||
unsigned int id; /* Line number corresponds to the
|
||||
|
@ -458,6 +467,8 @@ struct pblk_line {
|
|||
|
||||
struct kref ref; /* Write buffer L2P references */
|
||||
|
||||
struct pblk_w_err_gc *w_err_gc; /* Write error gc recovery metadata */
|
||||
|
||||
spinlock_t lock; /* Necessary for invalid_bitmap only */
|
||||
};
|
||||
|
||||
|
@ -489,6 +500,8 @@ struct pblk_line_mgmt {
|
|||
struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */
|
||||
struct list_head gc_low_list; /* Full lines ready to GC, low isc */
|
||||
|
||||
struct list_head gc_werr_list; /* Write err recovery list */
|
||||
|
||||
struct list_head gc_full_list; /* Full lines ready to GC, no valid */
|
||||
struct list_head gc_empty_list; /* Full lines close, all valid */
|
||||
|
||||
|
@ -664,12 +677,15 @@ struct pblk {
|
|||
|
||||
struct list_head compl_list;
|
||||
|
||||
mempool_t *page_bio_pool;
|
||||
mempool_t *gen_ws_pool;
|
||||
mempool_t *rec_pool;
|
||||
mempool_t *r_rq_pool;
|
||||
mempool_t *w_rq_pool;
|
||||
mempool_t *e_rq_pool;
|
||||
spinlock_t resubmit_lock; /* Resubmit list lock */
|
||||
struct list_head resubmit_list; /* Resubmit list for failed writes*/
|
||||
|
||||
mempool_t page_bio_pool;
|
||||
mempool_t gen_ws_pool;
|
||||
mempool_t rec_pool;
|
||||
mempool_t r_rq_pool;
|
||||
mempool_t w_rq_pool;
|
||||
mempool_t e_rq_pool;
|
||||
|
||||
struct workqueue_struct *close_wq;
|
||||
struct workqueue_struct *bb_wq;
|
||||
|
@ -713,9 +729,6 @@ void pblk_rb_sync_l2p(struct pblk_rb *rb);
|
|||
unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
|
||||
unsigned int pos, unsigned int nr_entries,
|
||||
unsigned int count);
|
||||
unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
|
||||
struct list_head *list,
|
||||
unsigned int max);
|
||||
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
|
||||
struct ppa_addr ppa, int bio_iter, bool advanced_bio);
|
||||
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
|
||||
|
@ -766,11 +779,13 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk);
|
|||
struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
|
||||
int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
|
||||
int pblk_line_is_full(struct pblk_line *line);
|
||||
void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
|
||||
void pblk_line_free(struct pblk_line *line);
|
||||
void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
|
||||
void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
|
||||
void pblk_line_close_ws(struct work_struct *work);
|
||||
void pblk_pipeline_stop(struct pblk *pblk);
|
||||
void __pblk_pipeline_stop(struct pblk *pblk);
|
||||
void __pblk_pipeline_flush(struct pblk *pblk);
|
||||
void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
|
||||
void (*work)(struct work_struct *), gfp_t gfp_mask,
|
||||
struct workqueue_struct *wq);
|
||||
|
@ -794,7 +809,6 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
|
|||
void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
|
||||
void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
|
||||
unsigned long *lun_bitmap);
|
||||
void pblk_end_io_sync(struct nvm_rq *rqd);
|
||||
int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
|
||||
int nr_pages);
|
||||
void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
|
||||
|
@ -837,23 +851,20 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
|
|||
int pblk_write_ts(void *data);
|
||||
void pblk_write_timer_fn(struct timer_list *t);
|
||||
void pblk_write_should_kick(struct pblk *pblk);
|
||||
void pblk_write_kick(struct pblk *pblk);
|
||||
|
||||
/*
|
||||
* pblk read path
|
||||
*/
|
||||
extern struct bio_set *pblk_bio_set;
|
||||
extern struct bio_set pblk_bio_set;
|
||||
int pblk_submit_read(struct pblk *pblk, struct bio *bio);
|
||||
int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq);
|
||||
/*
|
||||
* pblk recovery
|
||||
*/
|
||||
void pblk_submit_rec(struct work_struct *work);
|
||||
struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
|
||||
int pblk_recov_pad(struct pblk *pblk);
|
||||
int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta);
|
||||
int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
|
||||
struct pblk_rec_ctx *recovery, u64 *comp_bits,
|
||||
unsigned int comp);
|
||||
|
||||
/*
|
||||
* pblk gc
|
||||
|
@ -864,7 +875,7 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
|
|||
#define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */
|
||||
|
||||
int pblk_gc_init(struct pblk *pblk);
|
||||
void pblk_gc_exit(struct pblk *pblk);
|
||||
void pblk_gc_exit(struct pblk *pblk, bool graceful);
|
||||
void pblk_gc_should_start(struct pblk *pblk);
|
||||
void pblk_gc_should_stop(struct pblk *pblk);
|
||||
void pblk_gc_should_kick(struct pblk *pblk);
|
||||
|
@ -894,6 +905,9 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
|
|||
bool used);
|
||||
int pblk_rl_is_limit(struct pblk_rl *rl);
|
||||
|
||||
void pblk_rl_werr_line_in(struct pblk_rl *rl);
|
||||
void pblk_rl_werr_line_out(struct pblk_rl *rl);
|
||||
|
||||
/*
|
||||
* pblk sysfs
|
||||
*/
|
||||
|
|
|
@ -269,7 +269,7 @@ struct bcache_device {
|
|||
atomic_t *stripe_sectors_dirty;
|
||||
unsigned long *full_dirty_stripes;
|
||||
|
||||
struct bio_set *bio_split;
|
||||
struct bio_set bio_split;
|
||||
|
||||
unsigned data_csum:1;
|
||||
|
||||
|
@ -345,6 +345,7 @@ struct cached_dev {
|
|||
|
||||
struct keybuf writeback_keys;
|
||||
|
||||
struct task_struct *status_update_thread;
|
||||
/*
|
||||
* Order the write-half of writeback operations strongly in dispatch
|
||||
* order. (Maintain LBA order; don't allow reads completing out of
|
||||
|
@ -392,6 +393,7 @@ struct cached_dev {
|
|||
#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
|
||||
atomic_t io_errors;
|
||||
unsigned error_limit;
|
||||
unsigned offline_seconds;
|
||||
|
||||
char backing_dev_name[BDEVNAME_SIZE];
|
||||
};
|
||||
|
@ -528,9 +530,9 @@ struct cache_set {
|
|||
struct closure sb_write;
|
||||
struct semaphore sb_write_mutex;
|
||||
|
||||
mempool_t *search;
|
||||
mempool_t *bio_meta;
|
||||
struct bio_set *bio_split;
|
||||
mempool_t search;
|
||||
mempool_t bio_meta;
|
||||
struct bio_set bio_split;
|
||||
|
||||
/* For the btree cache */
|
||||
struct shrinker shrink;
|
||||
|
@ -655,7 +657,7 @@ struct cache_set {
|
|||
* A btree node on disk could have too many bsets for an iterator to fit
|
||||
* on the stack - have to dynamically allocate them
|
||||
*/
|
||||
mempool_t *fill_iter;
|
||||
mempool_t fill_iter;
|
||||
|
||||
struct bset_sort_state sort;
|
||||
|
||||
|
@ -956,8 +958,6 @@ void bch_prio_write(struct cache *);
|
|||
void bch_write_bdev_super(struct cached_dev *, struct closure *);
|
||||
|
||||
extern struct workqueue_struct *bcache_wq;
|
||||
extern const char * const bch_cache_modes[];
|
||||
extern const char * const bch_stop_on_failure_modes[];
|
||||
extern struct mutex bch_register_lock;
|
||||
extern struct list_head bch_cache_sets;
|
||||
|
||||
|
|
|
@ -1118,8 +1118,7 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
|
|||
|
||||
void bch_bset_sort_state_free(struct bset_sort_state *state)
|
||||
{
|
||||
if (state->pool)
|
||||
mempool_destroy(state->pool);
|
||||
mempool_exit(&state->pool);
|
||||
}
|
||||
|
||||
int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
|
||||
|
@ -1129,11 +1128,7 @@ int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
|
|||
state->page_order = page_order;
|
||||
state->crit_factor = int_sqrt(1 << page_order);
|
||||
|
||||
state->pool = mempool_create_page_pool(1, page_order);
|
||||
if (!state->pool)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
return mempool_init_page_pool(&state->pool, 1, page_order);
|
||||
}
|
||||
EXPORT_SYMBOL(bch_bset_sort_state_init);
|
||||
|
||||
|
@ -1191,7 +1186,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
|
|||
|
||||
BUG_ON(order > state->page_order);
|
||||
|
||||
outp = mempool_alloc(state->pool, GFP_NOIO);
|
||||
outp = mempool_alloc(&state->pool, GFP_NOIO);
|
||||
out = page_address(outp);
|
||||
used_mempool = true;
|
||||
order = state->page_order;
|
||||
|
@ -1220,7 +1215,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
|
|||
}
|
||||
|
||||
if (used_mempool)
|
||||
mempool_free(virt_to_page(out), state->pool);
|
||||
mempool_free(virt_to_page(out), &state->pool);
|
||||
else
|
||||
free_pages((unsigned long) out, order);
|
||||
|
||||
|
|
|
@ -347,7 +347,7 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b,
|
|||
/* Sorting */
|
||||
|
||||
struct bset_sort_state {
|
||||
mempool_t *pool;
|
||||
mempool_t pool;
|
||||
|
||||
unsigned page_order;
|
||||
unsigned crit_factor;
|
||||
|
|
|
@ -204,7 +204,7 @@ void bch_btree_node_read_done(struct btree *b)
|
|||
struct bset *i = btree_bset_first(b);
|
||||
struct btree_iter *iter;
|
||||
|
||||
iter = mempool_alloc(b->c->fill_iter, GFP_NOIO);
|
||||
iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO);
|
||||
iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
|
||||
iter->used = 0;
|
||||
|
||||
|
@ -271,7 +271,7 @@ void bch_btree_node_read_done(struct btree *b)
|
|||
bch_bset_init_next(&b->keys, write_block(b),
|
||||
bset_magic(&b->c->sb));
|
||||
out:
|
||||
mempool_free(iter, b->c->fill_iter);
|
||||
mempool_free(iter, &b->c->fill_iter);
|
||||
return;
|
||||
err:
|
||||
set_btree_node_io_error(b);
|
||||
|
|
|
@ -17,12 +17,12 @@
|
|||
void bch_bbio_free(struct bio *bio, struct cache_set *c)
|
||||
{
|
||||
struct bbio *b = container_of(bio, struct bbio, bio);
|
||||
mempool_free(b, c->bio_meta);
|
||||
mempool_free(b, &c->bio_meta);
|
||||
}
|
||||
|
||||
struct bio *bch_bbio_alloc(struct cache_set *c)
|
||||
{
|
||||
struct bbio *b = mempool_alloc(c->bio_meta, GFP_NOIO);
|
||||
struct bbio *b = mempool_alloc(&c->bio_meta, GFP_NOIO);
|
||||
struct bio *bio = &b->bio;
|
||||
|
||||
bio_init(bio, bio->bi_inline_vecs, bucket_pages(c));
|
||||
|
|
|
@ -213,7 +213,7 @@ static void bch_data_insert_start(struct closure *cl)
|
|||
do {
|
||||
unsigned i;
|
||||
struct bkey *k;
|
||||
struct bio_set *split = op->c->bio_split;
|
||||
struct bio_set *split = &op->c->bio_split;
|
||||
|
||||
/* 1 for the device pointer and 1 for the chksum */
|
||||
if (bch_keylist_realloc(&op->insert_keys,
|
||||
|
@ -548,7 +548,7 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
|
|||
|
||||
n = bio_next_split(bio, min_t(uint64_t, INT_MAX,
|
||||
KEY_OFFSET(k) - bio->bi_iter.bi_sector),
|
||||
GFP_NOIO, s->d->bio_split);
|
||||
GFP_NOIO, &s->d->bio_split);
|
||||
|
||||
bio_key = &container_of(n, struct bbio, bio)->key;
|
||||
bch_bkey_copy_single_ptr(bio_key, k, ptr);
|
||||
|
@ -707,7 +707,7 @@ static void search_free(struct closure *cl)
|
|||
|
||||
bio_complete(s);
|
||||
closure_debug_destroy(cl);
|
||||
mempool_free(s, s->d->c->search);
|
||||
mempool_free(s, &s->d->c->search);
|
||||
}
|
||||
|
||||
static inline struct search *search_alloc(struct bio *bio,
|
||||
|
@ -715,7 +715,7 @@ static inline struct search *search_alloc(struct bio *bio,
|
|||
{
|
||||
struct search *s;
|
||||
|
||||
s = mempool_alloc(d->c->search, GFP_NOIO);
|
||||
s = mempool_alloc(&d->c->search, GFP_NOIO);
|
||||
|
||||
closure_init(&s->cl, NULL);
|
||||
do_bio_hook(s, bio, request_endio);
|
||||
|
@ -864,7 +864,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
|
|||
s->cache_missed = 1;
|
||||
|
||||
if (s->cache_miss || s->iop.bypass) {
|
||||
miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
|
||||
miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split);
|
||||
ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
|
||||
goto out_submit;
|
||||
}
|
||||
|
@ -887,14 +887,14 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
|
|||
|
||||
s->iop.replace = true;
|
||||
|
||||
miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
|
||||
miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split);
|
||||
|
||||
/* btree_search_recurse()'s btree iterator is no good anymore */
|
||||
ret = miss == bio ? MAP_DONE : -EINTR;
|
||||
|
||||
cache_bio = bio_alloc_bioset(GFP_NOWAIT,
|
||||
DIV_ROUND_UP(s->insert_bio_sectors, PAGE_SECTORS),
|
||||
dc->disk.bio_split);
|
||||
&dc->disk.bio_split);
|
||||
if (!cache_bio)
|
||||
goto out_submit;
|
||||
|
||||
|
@ -1008,7 +1008,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
|
|||
struct bio *flush;
|
||||
|
||||
flush = bio_alloc_bioset(GFP_NOIO, 0,
|
||||
dc->disk.bio_split);
|
||||
&dc->disk.bio_split);
|
||||
if (!flush) {
|
||||
s->iop.status = BLK_STS_RESOURCE;
|
||||
goto insert_data;
|
||||
|
@ -1021,7 +1021,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
|
|||
closure_bio_submit(s->iop.c, flush, cl);
|
||||
}
|
||||
} else {
|
||||
s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
|
||||
s->iop.bio = bio_clone_fast(bio, GFP_NOIO, &dc->disk.bio_split);
|
||||
/* I/O request sent to backing device */
|
||||
bio->bi_end_io = backing_request_endio;
|
||||
closure_bio_submit(s->iop.c, bio, cl);
|
||||
|
|
|
@ -37,24 +37,6 @@ static const char invalid_uuid[] = {
|
|||
0xc8, 0x50, 0xfc, 0x5e, 0xcb, 0x16, 0xcd, 0x99
|
||||
};
|
||||
|
||||
/* Default is -1; we skip past it for struct cached_dev's cache mode */
|
||||
const char * const bch_cache_modes[] = {
|
||||
"default",
|
||||
"writethrough",
|
||||
"writeback",
|
||||
"writearound",
|
||||
"none",
|
||||
NULL
|
||||
};
|
||||
|
||||
/* Default is -1; we skip past it for stop_when_cache_set_failed */
|
||||
const char * const bch_stop_on_failure_modes[] = {
|
||||
"default",
|
||||
"auto",
|
||||
"always",
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct kobject *bcache_kobj;
|
||||
struct mutex bch_register_lock;
|
||||
LIST_HEAD(bch_cache_sets);
|
||||
|
@ -654,6 +636,11 @@ static int ioctl_dev(struct block_device *b, fmode_t mode,
|
|||
unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct bcache_device *d = b->bd_disk->private_data;
|
||||
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
|
||||
|
||||
if (dc->io_disable)
|
||||
return -EIO;
|
||||
|
||||
return d->ioctl(d, mode, cmd, arg);
|
||||
}
|
||||
|
||||
|
@ -766,8 +753,7 @@ static void bcache_device_free(struct bcache_device *d)
|
|||
put_disk(d->disk);
|
||||
}
|
||||
|
||||
if (d->bio_split)
|
||||
bioset_free(d->bio_split);
|
||||
bioset_exit(&d->bio_split);
|
||||
kvfree(d->full_dirty_stripes);
|
||||
kvfree(d->stripe_sectors_dirty);
|
||||
|
||||
|
@ -809,9 +795,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
|
|||
if (idx < 0)
|
||||
return idx;
|
||||
|
||||
if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio),
|
||||
BIOSET_NEED_BVECS |
|
||||
BIOSET_NEED_RESCUER)) ||
|
||||
if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio),
|
||||
BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) ||
|
||||
!(d->disk = alloc_disk(BCACHE_MINORS))) {
|
||||
ida_simple_remove(&bcache_device_idx, idx);
|
||||
return -ENOMEM;
|
||||
|
@ -864,6 +849,44 @@ static void calc_cached_dev_sectors(struct cache_set *c)
|
|||
c->cached_dev_sectors = sectors;
|
||||
}
|
||||
|
||||
#define BACKING_DEV_OFFLINE_TIMEOUT 5
|
||||
static int cached_dev_status_update(void *arg)
|
||||
{
|
||||
struct cached_dev *dc = arg;
|
||||
struct request_queue *q;
|
||||
|
||||
/*
|
||||
* If this delayed worker is stopping outside, directly quit here.
|
||||
* dc->io_disable might be set via sysfs interface, so check it
|
||||
* here too.
|
||||
*/
|
||||
while (!kthread_should_stop() && !dc->io_disable) {
|
||||
q = bdev_get_queue(dc->bdev);
|
||||
if (blk_queue_dying(q))
|
||||
dc->offline_seconds++;
|
||||
else
|
||||
dc->offline_seconds = 0;
|
||||
|
||||
if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
|
||||
pr_err("%s: device offline for %d seconds",
|
||||
dc->backing_dev_name,
|
||||
BACKING_DEV_OFFLINE_TIMEOUT);
|
||||
pr_err("%s: disable I/O request due to backing "
|
||||
"device offline", dc->disk.name);
|
||||
dc->io_disable = true;
|
||||
/* let others know earlier that io_disable is true */
|
||||
smp_mb();
|
||||
bcache_device_stop(&dc->disk);
|
||||
break;
|
||||
}
|
||||
schedule_timeout_interruptible(HZ);
|
||||
}
|
||||
|
||||
wait_for_kthread_stop();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void bch_cached_dev_run(struct cached_dev *dc)
|
||||
{
|
||||
struct bcache_device *d = &dc->disk;
|
||||
|
@ -906,6 +929,14 @@ void bch_cached_dev_run(struct cached_dev *dc)
|
|||
if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
|
||||
sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
|
||||
pr_debug("error creating sysfs link");
|
||||
|
||||
dc->status_update_thread = kthread_run(cached_dev_status_update,
|
||||
dc, "bcache_status_update");
|
||||
if (IS_ERR(dc->status_update_thread)) {
|
||||
pr_warn("failed to create bcache_status_update kthread, "
|
||||
"continue to run without monitoring backing "
|
||||
"device status");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1139,6 +1170,8 @@ static void cached_dev_free(struct closure *cl)
|
|||
kthread_stop(dc->writeback_thread);
|
||||
if (dc->writeback_write_wq)
|
||||
destroy_workqueue(dc->writeback_write_wq);
|
||||
if (!IS_ERR_OR_NULL(dc->status_update_thread))
|
||||
kthread_stop(dc->status_update_thread);
|
||||
|
||||
if (atomic_read(&dc->running))
|
||||
bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
|
||||
|
@ -1465,14 +1498,10 @@ static void cache_set_free(struct closure *cl)
|
|||
|
||||
if (c->moving_gc_wq)
|
||||
destroy_workqueue(c->moving_gc_wq);
|
||||
if (c->bio_split)
|
||||
bioset_free(c->bio_split);
|
||||
if (c->fill_iter)
|
||||
mempool_destroy(c->fill_iter);
|
||||
if (c->bio_meta)
|
||||
mempool_destroy(c->bio_meta);
|
||||
if (c->search)
|
||||
mempool_destroy(c->search);
|
||||
bioset_exit(&c->bio_split);
|
||||
mempool_exit(&c->fill_iter);
|
||||
mempool_exit(&c->bio_meta);
|
||||
mempool_exit(&c->search);
|
||||
kfree(c->devices);
|
||||
|
||||
mutex_lock(&bch_register_lock);
|
||||
|
@ -1683,21 +1712,17 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
|
|||
INIT_LIST_HEAD(&c->btree_cache_freed);
|
||||
INIT_LIST_HEAD(&c->data_buckets);
|
||||
|
||||
c->search = mempool_create_slab_pool(32, bch_search_cache);
|
||||
if (!c->search)
|
||||
goto err;
|
||||
|
||||
iter_size = (sb->bucket_size / sb->block_size + 1) *
|
||||
sizeof(struct btree_iter_set);
|
||||
|
||||
if (!(c->devices = kzalloc(c->nr_uuids * sizeof(void *), GFP_KERNEL)) ||
|
||||
!(c->bio_meta = mempool_create_kmalloc_pool(2,
|
||||
sizeof(struct bbio) + sizeof(struct bio_vec) *
|
||||
bucket_pages(c))) ||
|
||||
!(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
|
||||
!(c->bio_split = bioset_create(4, offsetof(struct bbio, bio),
|
||||
BIOSET_NEED_BVECS |
|
||||
BIOSET_NEED_RESCUER)) ||
|
||||
mempool_init_slab_pool(&c->search, 32, bch_search_cache) ||
|
||||
mempool_init_kmalloc_pool(&c->bio_meta, 2,
|
||||
sizeof(struct bbio) + sizeof(struct bio_vec) *
|
||||
bucket_pages(c)) ||
|
||||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
|
||||
bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio),
|
||||
BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) ||
|
||||
!(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
|
||||
!(c->moving_gc_wq = alloc_workqueue("bcache_gc",
|
||||
WQ_MEM_RECLAIM, 0)) ||
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче