block: remove legacy IO schedulers
Retain the deadline documentation, as that carries over to mq-deadline as well. Tested-by: Ming Lei <ming.lei@redhat.com> Reviewed-by: Omar Sandoval <osandov@fb.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Родитель
404b8f5a03
Коммит
f382fb0bce
|
@ -1,291 +0,0 @@
|
||||||
CFQ (Complete Fairness Queueing)
|
|
||||||
===============================
|
|
||||||
|
|
||||||
The main aim of CFQ scheduler is to provide a fair allocation of the disk
|
|
||||||
I/O bandwidth for all the processes which requests an I/O operation.
|
|
||||||
|
|
||||||
CFQ maintains the per process queue for the processes which request I/O
|
|
||||||
operation(synchronous requests). In case of asynchronous requests, all the
|
|
||||||
requests from all the processes are batched together according to their
|
|
||||||
process's I/O priority.
|
|
||||||
|
|
||||||
CFQ ioscheduler tunables
|
|
||||||
========================
|
|
||||||
|
|
||||||
slice_idle
|
|
||||||
----------
|
|
||||||
This specifies how long CFQ should idle for next request on certain cfq queues
|
|
||||||
(for sequential workloads) and service trees (for random workloads) before
|
|
||||||
queue is expired and CFQ selects next queue to dispatch from.
|
|
||||||
|
|
||||||
By default slice_idle is a non-zero value. That means by default we idle on
|
|
||||||
queues/service trees. This can be very helpful on highly seeky media like
|
|
||||||
single spindle SATA/SAS disks where we can cut down on overall number of
|
|
||||||
seeks and see improved throughput.
|
|
||||||
|
|
||||||
Setting slice_idle to 0 will remove all the idling on queues/service tree
|
|
||||||
level and one should see an overall improved throughput on faster storage
|
|
||||||
devices like multiple SATA/SAS disks in hardware RAID configuration. The down
|
|
||||||
side is that isolation provided from WRITES also goes down and notion of
|
|
||||||
IO priority becomes weaker.
|
|
||||||
|
|
||||||
So depending on storage and workload, it might be useful to set slice_idle=0.
|
|
||||||
In general I think for SATA/SAS disks and software RAID of SATA/SAS disks
|
|
||||||
keeping slice_idle enabled should be useful. For any configurations where
|
|
||||||
there are multiple spindles behind single LUN (Host based hardware RAID
|
|
||||||
controller or for storage arrays), setting slice_idle=0 might end up in better
|
|
||||||
throughput and acceptable latencies.
|
|
||||||
|
|
||||||
back_seek_max
|
|
||||||
-------------
|
|
||||||
This specifies, given in Kbytes, the maximum "distance" for backward seeking.
|
|
||||||
The distance is the amount of space from the current head location to the
|
|
||||||
sectors that are backward in terms of distance.
|
|
||||||
|
|
||||||
This parameter allows the scheduler to anticipate requests in the "backward"
|
|
||||||
direction and consider them as being the "next" if they are within this
|
|
||||||
distance from the current head location.
|
|
||||||
|
|
||||||
back_seek_penalty
|
|
||||||
-----------------
|
|
||||||
This parameter is used to compute the cost of backward seeking. If the
|
|
||||||
backward distance of request is just 1/back_seek_penalty from a "front"
|
|
||||||
request, then the seeking cost of two requests is considered equivalent.
|
|
||||||
|
|
||||||
So scheduler will not bias toward one or the other request (otherwise scheduler
|
|
||||||
will bias toward front request). Default value of back_seek_penalty is 2.
|
|
||||||
|
|
||||||
fifo_expire_async
|
|
||||||
-----------------
|
|
||||||
This parameter is used to set the timeout of asynchronous requests. Default
|
|
||||||
value of this is 248ms.
|
|
||||||
|
|
||||||
fifo_expire_sync
|
|
||||||
----------------
|
|
||||||
This parameter is used to set the timeout of synchronous requests. Default
|
|
||||||
value of this is 124ms. In case to favor synchronous requests over asynchronous
|
|
||||||
one, this value should be decreased relative to fifo_expire_async.
|
|
||||||
|
|
||||||
group_idle
|
|
||||||
-----------
|
|
||||||
This parameter forces idling at the CFQ group level instead of CFQ
|
|
||||||
queue level. This was introduced after a bottleneck was observed
|
|
||||||
in higher end storage due to idle on sequential queue and allow dispatch
|
|
||||||
from a single queue. The idea with this parameter is that it can be run with
|
|
||||||
slice_idle=0 and group_idle=8, so that idling does not happen on individual
|
|
||||||
queues in the group but happens overall on the group and thus still keeps the
|
|
||||||
IO controller working.
|
|
||||||
Not idling on individual queues in the group will dispatch requests from
|
|
||||||
multiple queues in the group at the same time and achieve higher throughput
|
|
||||||
on higher end storage.
|
|
||||||
|
|
||||||
Default value for this parameter is 8ms.
|
|
||||||
|
|
||||||
low_latency
|
|
||||||
-----------
|
|
||||||
This parameter is used to enable/disable the low latency mode of the CFQ
|
|
||||||
scheduler. If enabled, CFQ tries to recompute the slice time for each process
|
|
||||||
based on the target_latency set for the system. This favors fairness over
|
|
||||||
throughput. Disabling low latency (setting it to 0) ignores target latency,
|
|
||||||
allowing each process in the system to get a full time slice.
|
|
||||||
|
|
||||||
By default low latency mode is enabled.
|
|
||||||
|
|
||||||
target_latency
|
|
||||||
--------------
|
|
||||||
This parameter is used to calculate the time slice for a process if cfq's
|
|
||||||
latency mode is enabled. It will ensure that sync requests have an estimated
|
|
||||||
latency. But if sequential workload is higher(e.g. sequential read),
|
|
||||||
then to meet the latency constraints, throughput may decrease because of less
|
|
||||||
time for each process to issue I/O request before the cfq queue is switched.
|
|
||||||
|
|
||||||
Though this can be overcome by disabling the latency_mode, it may increase
|
|
||||||
the read latency for some applications. This parameter allows for changing
|
|
||||||
target_latency through the sysfs interface which can provide the balanced
|
|
||||||
throughput and read latency.
|
|
||||||
|
|
||||||
Default value for target_latency is 300ms.
|
|
||||||
|
|
||||||
slice_async
|
|
||||||
-----------
|
|
||||||
This parameter is same as of slice_sync but for asynchronous queue. The
|
|
||||||
default value is 40ms.
|
|
||||||
|
|
||||||
slice_async_rq
|
|
||||||
--------------
|
|
||||||
This parameter is used to limit the dispatching of asynchronous request to
|
|
||||||
device request queue in queue's slice time. The maximum number of request that
|
|
||||||
are allowed to be dispatched also depends upon the io priority. Default value
|
|
||||||
for this is 2.
|
|
||||||
|
|
||||||
slice_sync
|
|
||||||
----------
|
|
||||||
When a queue is selected for execution, the queues IO requests are only
|
|
||||||
executed for a certain amount of time(time_slice) before switching to another
|
|
||||||
queue. This parameter is used to calculate the time slice of synchronous
|
|
||||||
queue.
|
|
||||||
|
|
||||||
time_slice is computed using the below equation:-
|
|
||||||
time_slice = slice_sync + (slice_sync/5 * (4 - prio)). To increase the
|
|
||||||
time_slice of synchronous queue, increase the value of slice_sync. Default
|
|
||||||
value is 100ms.
|
|
||||||
|
|
||||||
quantum
|
|
||||||
-------
|
|
||||||
This specifies the number of request dispatched to the device queue. In a
|
|
||||||
queue's time slice, a request will not be dispatched if the number of request
|
|
||||||
in the device exceeds this parameter. This parameter is used for synchronous
|
|
||||||
request.
|
|
||||||
|
|
||||||
In case of storage with several disk, this setting can limit the parallel
|
|
||||||
processing of request. Therefore, increasing the value can improve the
|
|
||||||
performance although this can cause the latency of some I/O to increase due
|
|
||||||
to more number of requests.
|
|
||||||
|
|
||||||
CFQ Group scheduling
|
|
||||||
====================
|
|
||||||
|
|
||||||
CFQ supports blkio cgroup and has "blkio." prefixed files in each
|
|
||||||
blkio cgroup directory. It is weight-based and there are four knobs
|
|
||||||
for configuration - weight[_device] and leaf_weight[_device].
|
|
||||||
Internal cgroup nodes (the ones with children) can also have tasks in
|
|
||||||
them, so the former two configure how much proportion the cgroup as a
|
|
||||||
whole is entitled to at its parent's level while the latter two
|
|
||||||
configure how much proportion the tasks in the cgroup have compared to
|
|
||||||
its direct children.
|
|
||||||
|
|
||||||
Another way to think about it is assuming that each internal node has
|
|
||||||
an implicit leaf child node which hosts all the tasks whose weight is
|
|
||||||
configured by leaf_weight[_device]. Let's assume a blkio hierarchy
|
|
||||||
composed of five cgroups - root, A, B, AA and AB - with the following
|
|
||||||
weights where the names represent the hierarchy.
|
|
||||||
|
|
||||||
weight leaf_weight
|
|
||||||
root : 125 125
|
|
||||||
A : 500 750
|
|
||||||
B : 250 500
|
|
||||||
AA : 500 500
|
|
||||||
AB : 1000 500
|
|
||||||
|
|
||||||
root never has a parent making its weight is meaningless. For backward
|
|
||||||
compatibility, weight is always kept in sync with leaf_weight. B, AA
|
|
||||||
and AB have no child and thus its tasks have no children cgroup to
|
|
||||||
compete with. They always get 100% of what the cgroup won at the
|
|
||||||
parent level. Considering only the weights which matter, the hierarchy
|
|
||||||
looks like the following.
|
|
||||||
|
|
||||||
root
|
|
||||||
/ | \
|
|
||||||
A B leaf
|
|
||||||
500 250 125
|
|
||||||
/ | \
|
|
||||||
AA AB leaf
|
|
||||||
500 1000 750
|
|
||||||
|
|
||||||
If all cgroups have active IOs and competing with each other, disk
|
|
||||||
time will be distributed like the following.
|
|
||||||
|
|
||||||
Distribution below root. The total active weight at this level is
|
|
||||||
A:500 + B:250 + C:125 = 875.
|
|
||||||
|
|
||||||
root-leaf : 125 / 875 =~ 14%
|
|
||||||
A : 500 / 875 =~ 57%
|
|
||||||
B(-leaf) : 250 / 875 =~ 28%
|
|
||||||
|
|
||||||
A has children and further distributes its 57% among the children and
|
|
||||||
the implicit leaf node. The total active weight at this level is
|
|
||||||
AA:500 + AB:1000 + A-leaf:750 = 2250.
|
|
||||||
|
|
||||||
A-leaf : ( 750 / 2250) * A =~ 19%
|
|
||||||
AA(-leaf) : ( 500 / 2250) * A =~ 12%
|
|
||||||
AB(-leaf) : (1000 / 2250) * A =~ 25%
|
|
||||||
|
|
||||||
CFQ IOPS Mode for group scheduling
|
|
||||||
===================================
|
|
||||||
Basic CFQ design is to provide priority based time slices. Higher priority
|
|
||||||
process gets bigger time slice and lower priority process gets smaller time
|
|
||||||
slice. Measuring time becomes harder if storage is fast and supports NCQ and
|
|
||||||
it would be better to dispatch multiple requests from multiple cfq queues in
|
|
||||||
request queue at a time. In such scenario, it is not possible to measure time
|
|
||||||
consumed by single queue accurately.
|
|
||||||
|
|
||||||
What is possible though is to measure number of requests dispatched from a
|
|
||||||
single queue and also allow dispatch from multiple cfq queue at the same time.
|
|
||||||
This effectively becomes the fairness in terms of IOPS (IO operations per
|
|
||||||
second).
|
|
||||||
|
|
||||||
If one sets slice_idle=0 and if storage supports NCQ, CFQ internally switches
|
|
||||||
to IOPS mode and starts providing fairness in terms of number of requests
|
|
||||||
dispatched. Note that this mode switching takes effect only for group
|
|
||||||
scheduling. For non-cgroup users nothing should change.
|
|
||||||
|
|
||||||
CFQ IO scheduler Idling Theory
|
|
||||||
===============================
|
|
||||||
Idling on a queue is primarily about waiting for the next request to come
|
|
||||||
on same queue after completion of a request. In this process CFQ will not
|
|
||||||
dispatch requests from other cfq queues even if requests are pending there.
|
|
||||||
|
|
||||||
The rationale behind idling is that it can cut down on number of seeks
|
|
||||||
on rotational media. For example, if a process is doing dependent
|
|
||||||
sequential reads (next read will come on only after completion of previous
|
|
||||||
one), then not dispatching request from other queue should help as we
|
|
||||||
did not move the disk head and kept on dispatching sequential IO from
|
|
||||||
one queue.
|
|
||||||
|
|
||||||
CFQ has following service trees and various queues are put on these trees.
|
|
||||||
|
|
||||||
sync-idle sync-noidle async
|
|
||||||
|
|
||||||
All cfq queues doing synchronous sequential IO go on to sync-idle tree.
|
|
||||||
On this tree we idle on each queue individually.
|
|
||||||
|
|
||||||
All synchronous non-sequential queues go on sync-noidle tree. Also any
|
|
||||||
synchronous write request which is not marked with REQ_IDLE goes on this
|
|
||||||
service tree. On this tree we do not idle on individual queues instead idle
|
|
||||||
on the whole group of queues or the tree. So if there are 4 queues waiting
|
|
||||||
for IO to dispatch we will idle only once last queue has dispatched the IO
|
|
||||||
and there is no more IO on this service tree.
|
|
||||||
|
|
||||||
All async writes go on async service tree. There is no idling on async
|
|
||||||
queues.
|
|
||||||
|
|
||||||
CFQ has some optimizations for SSDs and if it detects a non-rotational
|
|
||||||
media which can support higher queue depth (multiple requests at in
|
|
||||||
flight at a time), then it cuts down on idling of individual queues and
|
|
||||||
all the queues move to sync-noidle tree and only tree idle remains. This
|
|
||||||
tree idling provides isolation with buffered write queues on async tree.
|
|
||||||
|
|
||||||
FAQ
|
|
||||||
===
|
|
||||||
Q1. Why to idle at all on queues not marked with REQ_IDLE.
|
|
||||||
|
|
||||||
A1. We only do tree idle (all queues on sync-noidle tree) on queues not marked
|
|
||||||
with REQ_IDLE. This helps in providing isolation with all the sync-idle
|
|
||||||
queues. Otherwise in presence of many sequential readers, other
|
|
||||||
synchronous IO might not get fair share of disk.
|
|
||||||
|
|
||||||
For example, if there are 10 sequential readers doing IO and they get
|
|
||||||
100ms each. If a !REQ_IDLE request comes in, it will be scheduled
|
|
||||||
roughly after 1 second. If after completion of !REQ_IDLE request we
|
|
||||||
do not idle, and after a couple of milli seconds a another !REQ_IDLE
|
|
||||||
request comes in, again it will be scheduled after 1second. Repeat it
|
|
||||||
and notice how a workload can lose its disk share and suffer due to
|
|
||||||
multiple sequential readers.
|
|
||||||
|
|
||||||
fsync can generate dependent IO where bunch of data is written in the
|
|
||||||
context of fsync, and later some journaling data is written. Journaling
|
|
||||||
data comes in only after fsync has finished its IO (atleast for ext4
|
|
||||||
that seemed to be the case). Now if one decides not to idle on fsync
|
|
||||||
thread due to !REQ_IDLE, then next journaling write will not get
|
|
||||||
scheduled for another second. A process doing small fsync, will suffer
|
|
||||||
badly in presence of multiple sequential readers.
|
|
||||||
|
|
||||||
Hence doing tree idling on threads using !REQ_IDLE flag on requests
|
|
||||||
provides isolation from multiple sequential readers and at the same
|
|
||||||
time we do not idle on individual threads.
|
|
||||||
|
|
||||||
Q2. When to specify REQ_IDLE
|
|
||||||
A2. I would think whenever one is doing synchronous write and expecting
|
|
||||||
more writes to be dispatched from same context soon, should be able
|
|
||||||
to specify REQ_IDLE on writes and that probably should work well for
|
|
||||||
most of the cases.
|
|
|
@ -3,67 +3,6 @@ if BLOCK
|
||||||
|
|
||||||
menu "IO Schedulers"
|
menu "IO Schedulers"
|
||||||
|
|
||||||
config IOSCHED_NOOP
|
|
||||||
bool
|
|
||||||
default y
|
|
||||||
---help---
|
|
||||||
The no-op I/O scheduler is a minimal scheduler that does basic merging
|
|
||||||
and sorting. Its main uses include non-disk based block devices like
|
|
||||||
memory devices, and specialised software or hardware environments
|
|
||||||
that do their own scheduling and require only minimal assistance from
|
|
||||||
the kernel.
|
|
||||||
|
|
||||||
config IOSCHED_DEADLINE
|
|
||||||
tristate "Deadline I/O scheduler"
|
|
||||||
default y
|
|
||||||
---help---
|
|
||||||
The deadline I/O scheduler is simple and compact. It will provide
|
|
||||||
CSCAN service with FIFO expiration of requests, switching to
|
|
||||||
a new point in the service tree and doing a batch of IO from there
|
|
||||||
in case of expiry.
|
|
||||||
|
|
||||||
config IOSCHED_CFQ
|
|
||||||
tristate "CFQ I/O scheduler"
|
|
||||||
default y
|
|
||||||
---help---
|
|
||||||
The CFQ I/O scheduler tries to distribute bandwidth equally
|
|
||||||
among all processes in the system. It should provide a fair
|
|
||||||
and low latency working environment, suitable for both desktop
|
|
||||||
and server systems.
|
|
||||||
|
|
||||||
This is the default I/O scheduler.
|
|
||||||
|
|
||||||
config CFQ_GROUP_IOSCHED
|
|
||||||
bool "CFQ Group Scheduling support"
|
|
||||||
depends on IOSCHED_CFQ && BLK_CGROUP
|
|
||||||
---help---
|
|
||||||
Enable group IO scheduling in CFQ.
|
|
||||||
|
|
||||||
choice
|
|
||||||
|
|
||||||
prompt "Default I/O scheduler"
|
|
||||||
default DEFAULT_CFQ
|
|
||||||
help
|
|
||||||
Select the I/O scheduler which will be used by default for all
|
|
||||||
block devices.
|
|
||||||
|
|
||||||
config DEFAULT_DEADLINE
|
|
||||||
bool "Deadline" if IOSCHED_DEADLINE=y
|
|
||||||
|
|
||||||
config DEFAULT_CFQ
|
|
||||||
bool "CFQ" if IOSCHED_CFQ=y
|
|
||||||
|
|
||||||
config DEFAULT_NOOP
|
|
||||||
bool "No-op"
|
|
||||||
|
|
||||||
endchoice
|
|
||||||
|
|
||||||
config DEFAULT_IOSCHED
|
|
||||||
string
|
|
||||||
default "deadline" if DEFAULT_DEADLINE
|
|
||||||
default "cfq" if DEFAULT_CFQ
|
|
||||||
default "noop" if DEFAULT_NOOP
|
|
||||||
|
|
||||||
config MQ_IOSCHED_DEADLINE
|
config MQ_IOSCHED_DEADLINE
|
||||||
tristate "MQ deadline I/O scheduler"
|
tristate "MQ deadline I/O scheduler"
|
||||||
default y
|
default y
|
||||||
|
|
|
@ -18,9 +18,6 @@ obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
|
||||||
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
|
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
|
||||||
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
|
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
|
||||||
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
|
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
|
||||||
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
|
|
||||||
obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
|
|
||||||
obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
|
|
||||||
obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
|
obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
|
||||||
obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
|
obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
|
||||||
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
|
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
|
||||||
|
|
4916
block/cfq-iosched.c
4916
block/cfq-iosched.c
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,560 +0,0 @@
|
||||||
/*
|
|
||||||
* Deadline i/o scheduler.
|
|
||||||
*
|
|
||||||
* Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
|
|
||||||
*/
|
|
||||||
#include <linux/kernel.h>
|
|
||||||
#include <linux/fs.h>
|
|
||||||
#include <linux/blkdev.h>
|
|
||||||
#include <linux/elevator.h>
|
|
||||||
#include <linux/bio.h>
|
|
||||||
#include <linux/module.h>
|
|
||||||
#include <linux/slab.h>
|
|
||||||
#include <linux/init.h>
|
|
||||||
#include <linux/compiler.h>
|
|
||||||
#include <linux/rbtree.h>
|
|
||||||
|
|
||||||
/*
|
|
||||||
* See Documentation/block/deadline-iosched.txt
|
|
||||||
*/
|
|
||||||
static const int read_expire = HZ / 2; /* max time before a read is submitted. */
|
|
||||||
static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
|
|
||||||
static const int writes_starved = 2; /* max times reads can starve a write */
|
|
||||||
static const int fifo_batch = 16; /* # of sequential requests treated as one
|
|
||||||
by the above parameters. For throughput. */
|
|
||||||
|
|
||||||
struct deadline_data {
|
|
||||||
/*
|
|
||||||
* run time data
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* requests (deadline_rq s) are present on both sort_list and fifo_list
|
|
||||||
*/
|
|
||||||
struct rb_root sort_list[2];
|
|
||||||
struct list_head fifo_list[2];
|
|
||||||
|
|
||||||
/*
|
|
||||||
* next in sort order. read, write or both are NULL
|
|
||||||
*/
|
|
||||||
struct request *next_rq[2];
|
|
||||||
unsigned int batching; /* number of sequential requests made */
|
|
||||||
unsigned int starved; /* times reads have starved writes */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* settings that change how the i/o scheduler behaves
|
|
||||||
*/
|
|
||||||
int fifo_expire[2];
|
|
||||||
int fifo_batch;
|
|
||||||
int writes_starved;
|
|
||||||
int front_merges;
|
|
||||||
};
|
|
||||||
|
|
||||||
static inline struct rb_root *
|
|
||||||
deadline_rb_root(struct deadline_data *dd, struct request *rq)
|
|
||||||
{
|
|
||||||
return &dd->sort_list[rq_data_dir(rq)];
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* get the request after `rq' in sector-sorted order
|
|
||||||
*/
|
|
||||||
static inline struct request *
|
|
||||||
deadline_latter_request(struct request *rq)
|
|
||||||
{
|
|
||||||
struct rb_node *node = rb_next(&rq->rb_node);
|
|
||||||
|
|
||||||
if (node)
|
|
||||||
return rb_entry_rq(node);
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
|
|
||||||
{
|
|
||||||
struct rb_root *root = deadline_rb_root(dd, rq);
|
|
||||||
|
|
||||||
elv_rb_add(root, rq);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void
|
|
||||||
deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
|
|
||||||
{
|
|
||||||
const int data_dir = rq_data_dir(rq);
|
|
||||||
|
|
||||||
if (dd->next_rq[data_dir] == rq)
|
|
||||||
dd->next_rq[data_dir] = deadline_latter_request(rq);
|
|
||||||
|
|
||||||
elv_rb_del(deadline_rb_root(dd, rq), rq);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* add rq to rbtree and fifo
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
deadline_add_request(struct request_queue *q, struct request *rq)
|
|
||||||
{
|
|
||||||
struct deadline_data *dd = q->elevator->elevator_data;
|
|
||||||
const int data_dir = rq_data_dir(rq);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This may be a requeue of a write request that has locked its
|
|
||||||
* target zone. If it is the case, this releases the zone lock.
|
|
||||||
*/
|
|
||||||
blk_req_zone_write_unlock(rq);
|
|
||||||
|
|
||||||
deadline_add_rq_rb(dd, rq);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* set expire time and add to fifo list
|
|
||||||
*/
|
|
||||||
rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
|
|
||||||
list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* remove rq from rbtree and fifo.
|
|
||||||
*/
|
|
||||||
static void deadline_remove_request(struct request_queue *q, struct request *rq)
|
|
||||||
{
|
|
||||||
struct deadline_data *dd = q->elevator->elevator_data;
|
|
||||||
|
|
||||||
rq_fifo_clear(rq);
|
|
||||||
deadline_del_rq_rb(dd, rq);
|
|
||||||
}
|
|
||||||
|
|
||||||
static enum elv_merge
|
|
||||||
deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
|
|
||||||
{
|
|
||||||
struct deadline_data *dd = q->elevator->elevator_data;
|
|
||||||
struct request *__rq;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* check for front merge
|
|
||||||
*/
|
|
||||||
if (dd->front_merges) {
|
|
||||||
sector_t sector = bio_end_sector(bio);
|
|
||||||
|
|
||||||
__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
|
|
||||||
if (__rq) {
|
|
||||||
BUG_ON(sector != blk_rq_pos(__rq));
|
|
||||||
|
|
||||||
if (elv_bio_merge_ok(__rq, bio)) {
|
|
||||||
*req = __rq;
|
|
||||||
return ELEVATOR_FRONT_MERGE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return ELEVATOR_NO_MERGE;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void deadline_merged_request(struct request_queue *q,
|
|
||||||
struct request *req, enum elv_merge type)
|
|
||||||
{
|
|
||||||
struct deadline_data *dd = q->elevator->elevator_data;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* if the merge was a front merge, we need to reposition request
|
|
||||||
*/
|
|
||||||
if (type == ELEVATOR_FRONT_MERGE) {
|
|
||||||
elv_rb_del(deadline_rb_root(dd, req), req);
|
|
||||||
deadline_add_rq_rb(dd, req);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
deadline_merged_requests(struct request_queue *q, struct request *req,
|
|
||||||
struct request *next)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* if next expires before rq, assign its expire time to rq
|
|
||||||
* and move into next position (next will be deleted) in fifo
|
|
||||||
*/
|
|
||||||
if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
|
|
||||||
if (time_before((unsigned long)next->fifo_time,
|
|
||||||
(unsigned long)req->fifo_time)) {
|
|
||||||
list_move(&req->queuelist, &next->queuelist);
|
|
||||||
req->fifo_time = next->fifo_time;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* kill knowledge of next, this one is a goner
|
|
||||||
*/
|
|
||||||
deadline_remove_request(q, next);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* move request from sort list to dispatch queue.
|
|
||||||
*/
|
|
||||||
static inline void
|
|
||||||
deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
|
|
||||||
{
|
|
||||||
struct request_queue *q = rq->q;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* For a zoned block device, write requests must write lock their
|
|
||||||
* target zone.
|
|
||||||
*/
|
|
||||||
blk_req_zone_write_lock(rq);
|
|
||||||
|
|
||||||
deadline_remove_request(q, rq);
|
|
||||||
elv_dispatch_add_tail(q, rq);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* move an entry to dispatch queue
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
deadline_move_request(struct deadline_data *dd, struct request *rq)
|
|
||||||
{
|
|
||||||
const int data_dir = rq_data_dir(rq);
|
|
||||||
|
|
||||||
dd->next_rq[READ] = NULL;
|
|
||||||
dd->next_rq[WRITE] = NULL;
|
|
||||||
dd->next_rq[data_dir] = deadline_latter_request(rq);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* take it off the sort and fifo list, move
|
|
||||||
* to dispatch queue
|
|
||||||
*/
|
|
||||||
deadline_move_to_dispatch(dd, rq);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* deadline_check_fifo returns 0 if there are no expired requests on the fifo,
|
|
||||||
* 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
|
|
||||||
*/
|
|
||||||
static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
|
|
||||||
{
|
|
||||||
struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* rq is expired!
|
|
||||||
*/
|
|
||||||
if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* For the specified data direction, return the next request to dispatch using
|
|
||||||
* arrival ordered lists.
|
|
||||||
*/
|
|
||||||
static struct request *
|
|
||||||
deadline_fifo_request(struct deadline_data *dd, int data_dir)
|
|
||||||
{
|
|
||||||
struct request *rq;
|
|
||||||
|
|
||||||
if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
if (list_empty(&dd->fifo_list[data_dir]))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
|
|
||||||
if (data_dir == READ || !blk_queue_is_zoned(rq->q))
|
|
||||||
return rq;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Look for a write request that can be dispatched, that is one with
|
|
||||||
* an unlocked target zone.
|
|
||||||
*/
|
|
||||||
list_for_each_entry(rq, &dd->fifo_list[WRITE], queuelist) {
|
|
||||||
if (blk_req_can_dispatch_to_zone(rq))
|
|
||||||
return rq;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* For the specified data direction, return the next request to dispatch using
|
|
||||||
* sector position sorted lists.
|
|
||||||
*/
|
|
||||||
static struct request *
|
|
||||||
deadline_next_request(struct deadline_data *dd, int data_dir)
|
|
||||||
{
|
|
||||||
struct request *rq;
|
|
||||||
|
|
||||||
if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
rq = dd->next_rq[data_dir];
|
|
||||||
if (!rq)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
if (data_dir == READ || !blk_queue_is_zoned(rq->q))
|
|
||||||
return rq;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Look for a write request that can be dispatched, that is one with
|
|
||||||
* an unlocked target zone.
|
|
||||||
*/
|
|
||||||
while (rq) {
|
|
||||||
if (blk_req_can_dispatch_to_zone(rq))
|
|
||||||
return rq;
|
|
||||||
rq = deadline_latter_request(rq);
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* deadline_dispatch_requests selects the best request according to
|
|
||||||
* read/write expire, fifo_batch, etc
|
|
||||||
*/
|
|
||||||
static int deadline_dispatch_requests(struct request_queue *q, int force)
|
|
||||||
{
|
|
||||||
struct deadline_data *dd = q->elevator->elevator_data;
|
|
||||||
const int reads = !list_empty(&dd->fifo_list[READ]);
|
|
||||||
const int writes = !list_empty(&dd->fifo_list[WRITE]);
|
|
||||||
struct request *rq, *next_rq;
|
|
||||||
int data_dir;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* batches are currently reads XOR writes
|
|
||||||
*/
|
|
||||||
rq = deadline_next_request(dd, WRITE);
|
|
||||||
if (!rq)
|
|
||||||
rq = deadline_next_request(dd, READ);
|
|
||||||
|
|
||||||
if (rq && dd->batching < dd->fifo_batch)
|
|
||||||
/* we have a next request are still entitled to batch */
|
|
||||||
goto dispatch_request;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* at this point we are not running a batch. select the appropriate
|
|
||||||
* data direction (read / write)
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (reads) {
|
|
||||||
BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
|
|
||||||
|
|
||||||
if (deadline_fifo_request(dd, WRITE) &&
|
|
||||||
(dd->starved++ >= dd->writes_starved))
|
|
||||||
goto dispatch_writes;
|
|
||||||
|
|
||||||
data_dir = READ;
|
|
||||||
|
|
||||||
goto dispatch_find_request;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* there are either no reads or writes have been starved
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (writes) {
|
|
||||||
dispatch_writes:
|
|
||||||
BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
|
|
||||||
|
|
||||||
dd->starved = 0;
|
|
||||||
|
|
||||||
data_dir = WRITE;
|
|
||||||
|
|
||||||
goto dispatch_find_request;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
dispatch_find_request:
|
|
||||||
/*
|
|
||||||
* we are not running a batch, find best request for selected data_dir
|
|
||||||
*/
|
|
||||||
next_rq = deadline_next_request(dd, data_dir);
|
|
||||||
if (deadline_check_fifo(dd, data_dir) || !next_rq) {
|
|
||||||
/*
|
|
||||||
* A deadline has expired, the last request was in the other
|
|
||||||
* direction, or we have run out of higher-sectored requests.
|
|
||||||
* Start again from the request with the earliest expiry time.
|
|
||||||
*/
|
|
||||||
rq = deadline_fifo_request(dd, data_dir);
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* The last req was the same dir and we have a next request in
|
|
||||||
* sort order. No expired requests so continue on from here.
|
|
||||||
*/
|
|
||||||
rq = next_rq;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* For a zoned block device, if we only have writes queued and none of
|
|
||||||
* them can be dispatched, rq will be NULL.
|
|
||||||
*/
|
|
||||||
if (!rq)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
dd->batching = 0;
|
|
||||||
|
|
||||||
dispatch_request:
|
|
||||||
/*
|
|
||||||
* rq is the selected appropriate request.
|
|
||||||
*/
|
|
||||||
dd->batching++;
|
|
||||||
deadline_move_request(dd, rq);
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* For zoned block devices, write unlock the target zone of completed
|
|
||||||
* write requests.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
deadline_completed_request(struct request_queue *q, struct request *rq)
|
|
||||||
{
|
|
||||||
blk_req_zone_write_unlock(rq);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void deadline_exit_queue(struct elevator_queue *e)
|
|
||||||
{
|
|
||||||
struct deadline_data *dd = e->elevator_data;
|
|
||||||
|
|
||||||
BUG_ON(!list_empty(&dd->fifo_list[READ]));
|
|
||||||
BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
|
|
||||||
|
|
||||||
kfree(dd);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* initialize elevator private data (deadline_data).
|
|
||||||
*/
|
|
||||||
static int deadline_init_queue(struct request_queue *q, struct elevator_type *e)
|
|
||||||
{
|
|
||||||
struct deadline_data *dd;
|
|
||||||
struct elevator_queue *eq;
|
|
||||||
|
|
||||||
eq = elevator_alloc(q, e);
|
|
||||||
if (!eq)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
|
|
||||||
if (!dd) {
|
|
||||||
kobject_put(&eq->kobj);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
eq->elevator_data = dd;
|
|
||||||
|
|
||||||
INIT_LIST_HEAD(&dd->fifo_list[READ]);
|
|
||||||
INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
|
|
||||||
dd->sort_list[READ] = RB_ROOT;
|
|
||||||
dd->sort_list[WRITE] = RB_ROOT;
|
|
||||||
dd->fifo_expire[READ] = read_expire;
|
|
||||||
dd->fifo_expire[WRITE] = write_expire;
|
|
||||||
dd->writes_starved = writes_starved;
|
|
||||||
dd->front_merges = 1;
|
|
||||||
dd->fifo_batch = fifo_batch;
|
|
||||||
|
|
||||||
spin_lock_irq(q->queue_lock);
|
|
||||||
q->elevator = eq;
|
|
||||||
spin_unlock_irq(q->queue_lock);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* sysfs parts below
|
|
||||||
*/
|
|
||||||
|
|
||||||
static ssize_t
|
|
||||||
deadline_var_show(int var, char *page)
|
|
||||||
{
|
|
||||||
return sprintf(page, "%d\n", var);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
deadline_var_store(int *var, const char *page)
|
|
||||||
{
|
|
||||||
char *p = (char *) page;
|
|
||||||
|
|
||||||
*var = simple_strtol(p, &p, 10);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
|
|
||||||
static ssize_t __FUNC(struct elevator_queue *e, char *page) \
|
|
||||||
{ \
|
|
||||||
struct deadline_data *dd = e->elevator_data; \
|
|
||||||
int __data = __VAR; \
|
|
||||||
if (__CONV) \
|
|
||||||
__data = jiffies_to_msecs(__data); \
|
|
||||||
return deadline_var_show(__data, (page)); \
|
|
||||||
}
|
|
||||||
SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
|
|
||||||
SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
|
|
||||||
SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
|
|
||||||
SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
|
|
||||||
SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
|
|
||||||
#undef SHOW_FUNCTION
|
|
||||||
|
|
||||||
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
|
|
||||||
static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
|
|
||||||
{ \
|
|
||||||
struct deadline_data *dd = e->elevator_data; \
|
|
||||||
int __data; \
|
|
||||||
deadline_var_store(&__data, (page)); \
|
|
||||||
if (__data < (MIN)) \
|
|
||||||
__data = (MIN); \
|
|
||||||
else if (__data > (MAX)) \
|
|
||||||
__data = (MAX); \
|
|
||||||
if (__CONV) \
|
|
||||||
*(__PTR) = msecs_to_jiffies(__data); \
|
|
||||||
else \
|
|
||||||
*(__PTR) = __data; \
|
|
||||||
return count; \
|
|
||||||
}
|
|
||||||
STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
|
|
||||||
STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
|
|
||||||
STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
|
|
||||||
STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
|
|
||||||
STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
|
|
||||||
#undef STORE_FUNCTION
|
|
||||||
|
|
||||||
#define DD_ATTR(name) \
|
|
||||||
__ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
|
|
||||||
|
|
||||||
static struct elv_fs_entry deadline_attrs[] = {
|
|
||||||
DD_ATTR(read_expire),
|
|
||||||
DD_ATTR(write_expire),
|
|
||||||
DD_ATTR(writes_starved),
|
|
||||||
DD_ATTR(front_merges),
|
|
||||||
DD_ATTR(fifo_batch),
|
|
||||||
__ATTR_NULL
|
|
||||||
};
|
|
||||||
|
|
||||||
static struct elevator_type iosched_deadline = {
|
|
||||||
.ops.sq = {
|
|
||||||
.elevator_merge_fn = deadline_merge,
|
|
||||||
.elevator_merged_fn = deadline_merged_request,
|
|
||||||
.elevator_merge_req_fn = deadline_merged_requests,
|
|
||||||
.elevator_dispatch_fn = deadline_dispatch_requests,
|
|
||||||
.elevator_completed_req_fn = deadline_completed_request,
|
|
||||||
.elevator_add_req_fn = deadline_add_request,
|
|
||||||
.elevator_former_req_fn = elv_rb_former_request,
|
|
||||||
.elevator_latter_req_fn = elv_rb_latter_request,
|
|
||||||
.elevator_init_fn = deadline_init_queue,
|
|
||||||
.elevator_exit_fn = deadline_exit_queue,
|
|
||||||
},
|
|
||||||
|
|
||||||
.elevator_attrs = deadline_attrs,
|
|
||||||
.elevator_name = "deadline",
|
|
||||||
.elevator_owner = THIS_MODULE,
|
|
||||||
};
|
|
||||||
|
|
||||||
static int __init deadline_init(void)
|
|
||||||
{
|
|
||||||
return elv_register(&iosched_deadline);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __exit deadline_exit(void)
|
|
||||||
{
|
|
||||||
elv_unregister(&iosched_deadline);
|
|
||||||
}
|
|
||||||
|
|
||||||
module_init(deadline_init);
|
|
||||||
module_exit(deadline_exit);
|
|
||||||
|
|
||||||
MODULE_AUTHOR("Jens Axboe");
|
|
||||||
MODULE_LICENSE("GPL");
|
|
||||||
MODULE_DESCRIPTION("deadline IO scheduler");
|
|
|
@ -225,8 +225,6 @@ int elevator_init(struct request_queue *q)
|
||||||
chosen_elevator);
|
chosen_elevator);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!e)
|
|
||||||
e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false);
|
|
||||||
if (!e) {
|
if (!e) {
|
||||||
printk(KERN_ERR
|
printk(KERN_ERR
|
||||||
"Default I/O scheduler not found. Using noop.\n");
|
"Default I/O scheduler not found. Using noop.\n");
|
||||||
|
@ -356,68 +354,6 @@ struct request *elv_rb_find(struct rb_root *root, sector_t sector)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(elv_rb_find);
|
EXPORT_SYMBOL(elv_rb_find);
|
||||||
|
|
||||||
/*
|
|
||||||
* Insert rq into dispatch queue of q. Queue lock must be held on
|
|
||||||
* entry. rq is sort instead into the dispatch queue. To be used by
|
|
||||||
* specific elevators.
|
|
||||||
*/
|
|
||||||
void elv_dispatch_sort(struct request_queue *q, struct request *rq)
|
|
||||||
{
|
|
||||||
sector_t boundary;
|
|
||||||
struct list_head *entry;
|
|
||||||
|
|
||||||
if (q->last_merge == rq)
|
|
||||||
q->last_merge = NULL;
|
|
||||||
|
|
||||||
elv_rqhash_del(q, rq);
|
|
||||||
|
|
||||||
q->nr_sorted--;
|
|
||||||
|
|
||||||
boundary = q->end_sector;
|
|
||||||
list_for_each_prev(entry, &q->queue_head) {
|
|
||||||
struct request *pos = list_entry_rq(entry);
|
|
||||||
|
|
||||||
if (req_op(rq) != req_op(pos))
|
|
||||||
break;
|
|
||||||
if (rq_data_dir(rq) != rq_data_dir(pos))
|
|
||||||
break;
|
|
||||||
if (pos->rq_flags & (RQF_STARTED | RQF_SOFTBARRIER))
|
|
||||||
break;
|
|
||||||
if (blk_rq_pos(rq) >= boundary) {
|
|
||||||
if (blk_rq_pos(pos) < boundary)
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
if (blk_rq_pos(pos) >= boundary)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (blk_rq_pos(rq) >= blk_rq_pos(pos))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
list_add(&rq->queuelist, entry);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(elv_dispatch_sort);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Insert rq into dispatch queue of q. Queue lock must be held on
|
|
||||||
* entry. rq is added to the back of the dispatch queue. To be used by
|
|
||||||
* specific elevators.
|
|
||||||
*/
|
|
||||||
void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
|
|
||||||
{
|
|
||||||
if (q->last_merge == rq)
|
|
||||||
q->last_merge = NULL;
|
|
||||||
|
|
||||||
elv_rqhash_del(q, rq);
|
|
||||||
|
|
||||||
q->nr_sorted--;
|
|
||||||
|
|
||||||
q->end_sector = rq_end_sector(rq);
|
|
||||||
q->boundary_rq = rq;
|
|
||||||
list_add_tail(&rq->queuelist, &q->queue_head);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(elv_dispatch_add_tail);
|
|
||||||
|
|
||||||
enum elv_merge elv_merge(struct request_queue *q, struct request **req,
|
enum elv_merge elv_merge(struct request_queue *q, struct request **req,
|
||||||
struct bio *bio)
|
struct bio *bio)
|
||||||
{
|
{
|
||||||
|
@ -881,12 +817,6 @@ int elv_register(struct elevator_type *e)
|
||||||
list_add_tail(&e->list, &elv_list);
|
list_add_tail(&e->list, &elv_list);
|
||||||
spin_unlock(&elv_list_lock);
|
spin_unlock(&elv_list_lock);
|
||||||
|
|
||||||
/* print pretty message */
|
|
||||||
if (elevator_match(e, chosen_elevator) ||
|
|
||||||
(!*chosen_elevator &&
|
|
||||||
elevator_match(e, CONFIG_DEFAULT_IOSCHED)))
|
|
||||||
def = " (default)";
|
|
||||||
|
|
||||||
printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
|
printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
|
||||||
def);
|
def);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -1,124 +0,0 @@
|
||||||
/*
|
|
||||||
* elevator noop
|
|
||||||
*/
|
|
||||||
#include <linux/blkdev.h>
|
|
||||||
#include <linux/elevator.h>
|
|
||||||
#include <linux/bio.h>
|
|
||||||
#include <linux/module.h>
|
|
||||||
#include <linux/slab.h>
|
|
||||||
#include <linux/init.h>
|
|
||||||
|
|
||||||
struct noop_data {
|
|
||||||
struct list_head queue;
|
|
||||||
};
|
|
||||||
|
|
||||||
static void noop_merged_requests(struct request_queue *q, struct request *rq,
|
|
||||||
struct request *next)
|
|
||||||
{
|
|
||||||
list_del_init(&next->queuelist);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int noop_dispatch(struct request_queue *q, int force)
|
|
||||||
{
|
|
||||||
struct noop_data *nd = q->elevator->elevator_data;
|
|
||||||
struct request *rq;
|
|
||||||
|
|
||||||
rq = list_first_entry_or_null(&nd->queue, struct request, queuelist);
|
|
||||||
if (rq) {
|
|
||||||
list_del_init(&rq->queuelist);
|
|
||||||
elv_dispatch_sort(q, rq);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void noop_add_request(struct request_queue *q, struct request *rq)
|
|
||||||
{
|
|
||||||
struct noop_data *nd = q->elevator->elevator_data;
|
|
||||||
|
|
||||||
list_add_tail(&rq->queuelist, &nd->queue);
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct request *
|
|
||||||
noop_former_request(struct request_queue *q, struct request *rq)
|
|
||||||
{
|
|
||||||
struct noop_data *nd = q->elevator->elevator_data;
|
|
||||||
|
|
||||||
if (rq->queuelist.prev == &nd->queue)
|
|
||||||
return NULL;
|
|
||||||
return list_prev_entry(rq, queuelist);
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct request *
|
|
||||||
noop_latter_request(struct request_queue *q, struct request *rq)
|
|
||||||
{
|
|
||||||
struct noop_data *nd = q->elevator->elevator_data;
|
|
||||||
|
|
||||||
if (rq->queuelist.next == &nd->queue)
|
|
||||||
return NULL;
|
|
||||||
return list_next_entry(rq, queuelist);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int noop_init_queue(struct request_queue *q, struct elevator_type *e)
|
|
||||||
{
|
|
||||||
struct noop_data *nd;
|
|
||||||
struct elevator_queue *eq;
|
|
||||||
|
|
||||||
eq = elevator_alloc(q, e);
|
|
||||||
if (!eq)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node);
|
|
||||||
if (!nd) {
|
|
||||||
kobject_put(&eq->kobj);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
eq->elevator_data = nd;
|
|
||||||
|
|
||||||
INIT_LIST_HEAD(&nd->queue);
|
|
||||||
|
|
||||||
spin_lock_irq(q->queue_lock);
|
|
||||||
q->elevator = eq;
|
|
||||||
spin_unlock_irq(q->queue_lock);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void noop_exit_queue(struct elevator_queue *e)
|
|
||||||
{
|
|
||||||
struct noop_data *nd = e->elevator_data;
|
|
||||||
|
|
||||||
BUG_ON(!list_empty(&nd->queue));
|
|
||||||
kfree(nd);
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct elevator_type elevator_noop = {
|
|
||||||
.ops.sq = {
|
|
||||||
.elevator_merge_req_fn = noop_merged_requests,
|
|
||||||
.elevator_dispatch_fn = noop_dispatch,
|
|
||||||
.elevator_add_req_fn = noop_add_request,
|
|
||||||
.elevator_former_req_fn = noop_former_request,
|
|
||||||
.elevator_latter_req_fn = noop_latter_request,
|
|
||||||
.elevator_init_fn = noop_init_queue,
|
|
||||||
.elevator_exit_fn = noop_exit_queue,
|
|
||||||
},
|
|
||||||
.elevator_name = "noop",
|
|
||||||
.elevator_owner = THIS_MODULE,
|
|
||||||
};
|
|
||||||
|
|
||||||
static int __init noop_init(void)
|
|
||||||
{
|
|
||||||
return elv_register(&elevator_noop);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __exit noop_exit(void)
|
|
||||||
{
|
|
||||||
elv_unregister(&elevator_noop);
|
|
||||||
}
|
|
||||||
|
|
||||||
module_init(noop_init);
|
|
||||||
module_exit(noop_exit);
|
|
||||||
|
|
||||||
|
|
||||||
MODULE_AUTHOR("Jens Axboe");
|
|
||||||
MODULE_LICENSE("GPL");
|
|
||||||
MODULE_DESCRIPTION("No-op IO scheduler");
|
|
Загрузка…
Ссылка в новой задаче