From 488991e28e55b4fbca8067edf0259f69d1a6f92c Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Fri, 29 Jan 2010 09:04:08 +0100 Subject: [PATCH] block: Added in stricter no merge semantics for block I/O Updated 'nomerges' tunable to accept a value of '2' - indicating that _no_ merges at all are to be attempted (not even the simple one-hit cache). The following table illustrates the additional benefit - 5 minute runs of a random I/O load were applied to a dozen devices on a 16-way x86_64 system. nomerges Throughput %System Improvement (tput / %sys) -------- ------------ ----------- ------------------------- 0 12.45 MB/sec 0.669365609 1 12.50 MB/sec 0.641519199 0.40% / 2.71% 2 12.52 MB/sec 0.639849750 0.56% / 2.96% Signed-off-by: Alan D. Brunelle Signed-off-by: Jens Axboe --- Documentation/ABI/testing/sysfs-block | 14 ++++++++++++++ Documentation/block/queue-sysfs.txt | 10 +++++----- block/blk-sysfs.c | 11 +++++++---- block/elevator.c | 11 ++++++++++- include/linux/blkdev.h | 3 +++ 5 files changed, 39 insertions(+), 10 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index d2f90334bb93..4873c759d535 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -128,3 +128,17 @@ Description: preferred request size for workloads where sustained throughput is desired. If no optimal I/O size is reported this file contains 0. + +What: /sys/block//queue/nomerges +Date: January 2010 +Contact: +Description: + Standard I/O elevator operations include attempts to + merge contiguous I/Os. For known random I/O loads these + attempts will always fail and result in extra cycles + being spent in the kernel. This allows one to turn off + this behavior on one of two ways: When set to 1, complex + merge checks are disabled, but the simple one-shot merges + with the previous I/O request are enabled. When set to 2, + all merge tries are disabled. The default value is 0 - + which enables all types of merge tries. diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt index e164403f60e1..f65274081c8d 100644 --- a/Documentation/block/queue-sysfs.txt +++ b/Documentation/block/queue-sysfs.txt @@ -25,11 +25,11 @@ size allowed by the hardware. nomerges (RW) ------------- -This enables the user to disable the lookup logic involved with IO merging -requests in the block layer. Merging may still occur through a direct -1-hit cache, since that comes for (almost) free. The IO scheduler will not -waste cycles doing tree/hash lookups for merges if nomerges is 1. Defaults -to 0, enabling all merges. +This enables the user to disable the lookup logic involved with IO +merging requests in the block layer. By default (0) all merges are +enabled. When set to 1 only simple one-hit merges will be tried. When +set to 2 no merge algorithms will be tried (including one-hit or more +complex tree/hash lookups). nr_requests (RW) ---------------- diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 8606c9543fdd..e85442415db3 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -189,7 +189,8 @@ static ssize_t queue_nonrot_store(struct request_queue *q, const char *page, static ssize_t queue_nomerges_show(struct request_queue *q, char *page) { - return queue_var_show(blk_queue_nomerges(q), page); + return queue_var_show((blk_queue_nomerges(q) << 1) | + blk_queue_noxmerges(q), page); } static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, @@ -199,10 +200,12 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, ssize_t ret = queue_var_store(&nm, page, count); spin_lock_irq(q->queue_lock); - if (nm) + queue_flag_clear(QUEUE_FLAG_NOMERGES, q); + queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); + if (nm == 2) queue_flag_set(QUEUE_FLAG_NOMERGES, q); - else - queue_flag_clear(QUEUE_FLAG_NOMERGES, q); + else if (nm) + queue_flag_set(QUEUE_FLAG_NOXMERGES, q); spin_unlock_irq(q->queue_lock); return ret; diff --git a/block/elevator.c b/block/elevator.c index 9ad5ccc4c5ee..ee3a883840f2 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -473,6 +473,15 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) struct request *__rq; int ret; + /* + * Levels of merges: + * nomerges: No merges at all attempted + * noxmerges: Only simple one-hit cache try + * merges: All merge tries attempted + */ + if (blk_queue_nomerges(q)) + return ELEVATOR_NO_MERGE; + /* * First try one-hit cache. */ @@ -484,7 +493,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) } } - if (blk_queue_nomerges(q)) + if (blk_queue_noxmerges(q)) return ELEVATOR_NO_MERGE; /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ffb13ad35716..f71f5c58620c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -463,6 +463,7 @@ struct request_queue #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ #define QUEUE_FLAG_CQ 16 /* hardware does queuing */ #define QUEUE_FLAG_DISCARD 17 /* supports DISCARD */ +#define QUEUE_FLAG_NOXMERGES 18 /* No extended merges */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ @@ -589,6 +590,8 @@ enum { #define blk_queue_queuing(q) test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) +#define blk_queue_noxmerges(q) \ + test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq)