diff --git a/MAINTAINERS b/MAINTAINERS index 28e1c7b9244a..f0cd5a3f6de6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -501,7 +501,7 @@ S: Maintained BLOCK LAYER P: Jens Axboe -M: axboe@suse.de +M: axboe@kernel.dk L: linux-kernel@vger.kernel.org T: git kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git S: Maintained @@ -1380,7 +1380,7 @@ S: Maintained IDE/ATAPI CDROM DRIVER P: Jens Axboe -M: axboe@suse.de +M: axboe@kernel.dk L: linux-kernel@vger.kernel.org W: http://www.kernel.dk S: Maintained @@ -2531,7 +2531,7 @@ S: Maintained SCSI CDROM DRIVER P: Jens Axboe -M: axboe@suse.de +M: axboe@kernel.dk L: linux-scsi@vger.kernel.org W: http://www.kernel.dk S: Maintained @@ -2976,7 +2976,7 @@ S: Maintained UNIFORM CDROM DRIVER P: Jens Axboe -M: axboe@suse.de +M: axboe@kernel.dk L: linux-kernel@vger.kernel.org W: http://www.kernel.dk S: Maintained diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c index 477c5334ec1b..50c17eaa7f25 100644 --- a/arch/mips/kernel/signal_n32.c +++ b/arch/mips/kernel/signal_n32.c @@ -42,6 +42,8 @@ #include "signal-common.h" +extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); + /* * Including would give use the 64-bit syscall numbers ... */ @@ -81,8 +83,6 @@ struct rt_sigframe_n32 { #endif }; -extern void sigset_from_compat (sigset_t *set, compat_sigset_t *compat); - save_static_function(sysn32_rt_sigsuspend); __attribute_used__ noinline static int _sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 5fa4c8e258a4..fda4a3940698 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -981,8 +981,6 @@ static int prepare_request(struct request *req, struct io_thread_req *io_req) __u64 offset; int len; - if(req->rq_status == RQ_INACTIVE) return(1); - /* This should be impossible now */ if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ printk("Write attempted on readonly ubd device %s\n", diff --git a/block/Kconfig b/block/Kconfig index b6f5f0a79655..9af6c614dfde 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -1,6 +1,24 @@ # # Block layer core configuration # +config BLOCK + bool "Enable the block layer" + default y + help + This permits the block layer to be removed from the kernel if it's not + needed (on some embedded devices for example). If this option is + disabled, then blockdev files will become unusable and some + filesystems (such as ext3) will become unavailable. + + This option will also disable SCSI character devices and USB storage + since they make use of various block layer definitions and + facilities. + + Say Y here unless you know you really don't want to mount disks and + suchlike. + +if BLOCK + #XXX - it makes sense to enable this only for 32-bit subarch's, not for x86_64 #for instance. config LBD @@ -33,4 +51,6 @@ config LSF If unsure, say Y. +endif + source block/Kconfig.iosched diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 48d090e266fc..903f0d3b6852 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -1,3 +1,4 @@ +if BLOCK menu "IO Schedulers" @@ -67,3 +68,5 @@ config DEFAULT_IOSCHED default "noop" if DEFAULT_NOOP endmenu + +endif diff --git a/block/Makefile b/block/Makefile index c05de0e0037f..4b84d0d5947b 100644 --- a/block/Makefile +++ b/block/Makefile @@ -2,7 +2,7 @@ # Makefile for the kernel block layer # -obj-y := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o +obj-$(CONFIG_BLOCK) := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_AS) += as-iosched.o diff --git a/block/as-iosched.c b/block/as-iosched.c index 5da56d48fbd3..165509e8659e 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -1,7 +1,7 @@ /* * Anticipatory & deadline i/o scheduler. * - * Copyright (C) 2002 Jens Axboe + * Copyright (C) 2002 Jens Axboe * Nick Piggin * */ @@ -14,7 +14,6 @@ #include #include #include -#include #include #include @@ -93,9 +92,8 @@ struct as_data { struct rb_root sort_list[2]; struct list_head fifo_list[2]; - struct as_rq *next_arq[2]; /* next in sort order */ + struct request *next_rq[2]; /* next in sort order */ sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ - struct hlist_head *hash; /* request hash */ unsigned long exit_prob; /* probability a task will exit while being waited on */ @@ -115,7 +113,6 @@ struct as_data { int write_batch_count; /* max # of reqs in a write batch */ int current_write_count; /* how many requests left this batch */ int write_batch_idled; /* has the write batch gone idle? */ - mempool_t *arq_pool; enum anticipation_status antic_status; unsigned long antic_start; /* jiffies: when it started */ @@ -133,8 +130,6 @@ struct as_data { unsigned long antic_expire; }; -#define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo) - /* * per-request data. */ @@ -150,40 +145,14 @@ enum arq_state { AS_RQ_POSTSCHED, /* when they shouldn't be */ }; -struct as_rq { - /* - * rbtree index, key is the starting offset - */ - struct rb_node rb_node; - sector_t rb_key; +#define RQ_IOC(rq) ((struct io_context *) (rq)->elevator_private) +#define RQ_STATE(rq) ((enum arq_state)(rq)->elevator_private2) +#define RQ_SET_STATE(rq, state) ((rq)->elevator_private2 = (void *) state) - struct request *request; - - struct io_context *io_context; /* The submitting task */ - - /* - * request hash, key is the ending offset (for back merge lookup) - */ - struct hlist_node hash; - - /* - * expire fifo - */ - struct list_head fifo; - unsigned long expires; - - unsigned int is_sync; - enum arq_state state; -}; - -#define RQ_DATA(rq) ((struct as_rq *) (rq)->elevator_private) - -static kmem_cache_t *arq_pool; - -static atomic_t ioc_count = ATOMIC_INIT(0); +static DEFINE_PER_CPU(unsigned long, ioc_count); static struct completion *ioc_gone; -static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq); +static void as_move_to_dispatch(struct as_data *ad, struct request *rq); static void as_antic_stop(struct as_data *ad); /* @@ -194,7 +163,8 @@ static void as_antic_stop(struct as_data *ad); static void free_as_io_context(struct as_io_context *aic) { kfree(aic); - if (atomic_dec_and_test(&ioc_count) && ioc_gone) + elv_ioc_count_dec(ioc_count); + if (ioc_gone && !elv_ioc_count_read(ioc_count)) complete(ioc_gone); } @@ -230,7 +200,7 @@ static struct as_io_context *alloc_as_io_context(void) ret->seek_total = 0; ret->seek_samples = 0; ret->seek_mean = 0; - atomic_inc(&ioc_count); + elv_ioc_count_inc(ioc_count); } return ret; @@ -240,9 +210,9 @@ static struct as_io_context *alloc_as_io_context(void) * If the current task has no AS IO context then create one and initialise it. * Then take a ref on the task's io context and return it. */ -static struct io_context *as_get_io_context(void) +static struct io_context *as_get_io_context(int node) { - struct io_context *ioc = get_io_context(GFP_ATOMIC); + struct io_context *ioc = get_io_context(GFP_ATOMIC, node); if (ioc && !ioc->aic) { ioc->aic = alloc_as_io_context(); if (!ioc->aic) { @@ -253,194 +223,43 @@ static struct io_context *as_get_io_context(void) return ioc; } -static void as_put_io_context(struct as_rq *arq) +static void as_put_io_context(struct request *rq) { struct as_io_context *aic; - if (unlikely(!arq->io_context)) + if (unlikely(!RQ_IOC(rq))) return; - aic = arq->io_context->aic; + aic = RQ_IOC(rq)->aic; - if (arq->is_sync == REQ_SYNC && aic) { + if (rq_is_sync(rq) && aic) { spin_lock(&aic->lock); set_bit(AS_TASK_IORUNNING, &aic->state); aic->last_end_request = jiffies; spin_unlock(&aic->lock); } - put_io_context(arq->io_context); -} - -/* - * the back merge hash support functions - */ -static const int as_hash_shift = 6; -#define AS_HASH_BLOCK(sec) ((sec) >> 3) -#define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift)) -#define AS_HASH_ENTRIES (1 << as_hash_shift) -#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) - -static inline void __as_del_arq_hash(struct as_rq *arq) -{ - hlist_del_init(&arq->hash); -} - -static inline void as_del_arq_hash(struct as_rq *arq) -{ - if (!hlist_unhashed(&arq->hash)) - __as_del_arq_hash(arq); -} - -static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq) -{ - struct request *rq = arq->request; - - BUG_ON(!hlist_unhashed(&arq->hash)); - - hlist_add_head(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]); -} - -/* - * move hot entry to front of chain - */ -static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq) -{ - struct request *rq = arq->request; - struct hlist_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))]; - - if (hlist_unhashed(&arq->hash)) { - WARN_ON(1); - return; - } - - if (&arq->hash != head->first) { - hlist_del(&arq->hash); - hlist_add_head(&arq->hash, head); - } -} - -static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset) -{ - struct hlist_head *hash_list = &ad->hash[AS_HASH_FN(offset)]; - struct hlist_node *entry, *next; - struct as_rq *arq; - - hlist_for_each_entry_safe(arq, entry, next, hash_list, hash) { - struct request *__rq = arq->request; - - BUG_ON(hlist_unhashed(&arq->hash)); - - if (!rq_mergeable(__rq)) { - as_del_arq_hash(arq); - continue; - } - - if (rq_hash_key(__rq) == offset) - return __rq; - } - - return NULL; + put_io_context(RQ_IOC(rq)); } /* * rb tree support functions */ -#define rb_entry_arq(node) rb_entry((node), struct as_rq, rb_node) -#define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync]) -#define rq_rb_key(rq) (rq)->sector +#define RQ_RB_ROOT(ad, rq) (&(ad)->sort_list[rq_is_sync((rq))]) -/* - * as_find_first_arq finds the first (lowest sector numbered) request - * for the specified data_dir. Used to sweep back to the start of the disk - * (1-way elevator) after we process the last (highest sector) request. - */ -static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir) +static void as_add_rq_rb(struct as_data *ad, struct request *rq) { - struct rb_node *n = ad->sort_list[data_dir].rb_node; + struct request *alias; - if (n == NULL) - return NULL; - - for (;;) { - if (n->rb_left == NULL) - return rb_entry_arq(n); - - n = n->rb_left; - } -} - -/* - * Add the request to the rb tree if it is unique. If there is an alias (an - * existing request against the same sector), which can happen when using - * direct IO, then return the alias. - */ -static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq) -{ - struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; - struct rb_node *parent = NULL; - struct as_rq *__arq; - struct request *rq = arq->request; - - arq->rb_key = rq_rb_key(rq); - - while (*p) { - parent = *p; - __arq = rb_entry_arq(parent); - - if (arq->rb_key < __arq->rb_key) - p = &(*p)->rb_left; - else if (arq->rb_key > __arq->rb_key) - p = &(*p)->rb_right; - else - return __arq; - } - - rb_link_node(&arq->rb_node, parent, p); - rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq)); - - return NULL; -} - -static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq) -{ - struct as_rq *alias; - - while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) { + while ((unlikely(alias = elv_rb_add(RQ_RB_ROOT(ad, rq), rq)))) { as_move_to_dispatch(ad, alias); as_antic_stop(ad); } } -static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) +static inline void as_del_rq_rb(struct as_data *ad, struct request *rq) { - if (!RB_EMPTY_NODE(&arq->rb_node)) { - WARN_ON(1); - return; - } - - rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq)); - RB_CLEAR_NODE(&arq->rb_node); -} - -static struct request * -as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir) -{ - struct rb_node *n = ad->sort_list[data_dir].rb_node; - struct as_rq *arq; - - while (n) { - arq = rb_entry_arq(n); - - if (sector < arq->rb_key) - n = n->rb_left; - else if (sector > arq->rb_key) - n = n->rb_right; - else - return arq->request; - } - - return NULL; + elv_rb_del(RQ_RB_ROOT(ad, rq), rq); } /* @@ -458,26 +277,26 @@ as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir) * as_choose_req selects the preferred one of two requests of the same data_dir * ignoring time - eg. timeouts, which is the job of as_dispatch_request */ -static struct as_rq * -as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2) +static struct request * +as_choose_req(struct as_data *ad, struct request *rq1, struct request *rq2) { int data_dir; sector_t last, s1, s2, d1, d2; int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */ const sector_t maxback = MAXBACK; - if (arq1 == NULL || arq1 == arq2) - return arq2; - if (arq2 == NULL) - return arq1; + if (rq1 == NULL || rq1 == rq2) + return rq2; + if (rq2 == NULL) + return rq1; - data_dir = arq1->is_sync; + data_dir = rq_is_sync(rq1); last = ad->last_sector[data_dir]; - s1 = arq1->request->sector; - s2 = arq2->request->sector; + s1 = rq1->sector; + s2 = rq2->sector; - BUG_ON(data_dir != arq2->is_sync); + BUG_ON(data_dir != rq_is_sync(rq2)); /* * Strict one way elevator _except_ in the case where we allow @@ -504,61 +323,58 @@ as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2) /* Found required data */ if (!r1_wrap && r2_wrap) - return arq1; + return rq1; else if (!r2_wrap && r1_wrap) - return arq2; + return rq2; else if (r1_wrap && r2_wrap) { /* both behind the head */ if (s1 <= s2) - return arq1; + return rq1; else - return arq2; + return rq2; } /* Both requests in front of the head */ if (d1 < d2) - return arq1; + return rq1; else if (d2 < d1) - return arq2; + return rq2; else { if (s1 >= s2) - return arq1; + return rq1; else - return arq2; + return rq2; } } /* - * as_find_next_arq finds the next request after @prev in elevator order. + * as_find_next_rq finds the next request after @prev in elevator order. * this with as_choose_req form the basis for how the scheduler chooses * what request to process next. Anticipation works on top of this. */ -static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last) +static struct request * +as_find_next_rq(struct as_data *ad, struct request *last) { - const int data_dir = last->is_sync; - struct as_rq *ret; struct rb_node *rbnext = rb_next(&last->rb_node); struct rb_node *rbprev = rb_prev(&last->rb_node); - struct as_rq *arq_next, *arq_prev; + struct request *next = NULL, *prev = NULL; - BUG_ON(!RB_EMPTY_NODE(&last->rb_node)); + BUG_ON(RB_EMPTY_NODE(&last->rb_node)); if (rbprev) - arq_prev = rb_entry_arq(rbprev); - else - arq_prev = NULL; + prev = rb_entry_rq(rbprev); if (rbnext) - arq_next = rb_entry_arq(rbnext); + next = rb_entry_rq(rbnext); else { - arq_next = as_find_first_arq(ad, data_dir); - if (arq_next == last) - arq_next = NULL; + const int data_dir = rq_is_sync(last); + + rbnext = rb_first(&ad->sort_list[data_dir]); + if (rbnext && rbnext != &last->rb_node) + next = rb_entry_rq(rbnext); } - ret = as_choose_req(ad, arq_next, arq_prev); - - return ret; + return as_choose_req(ad, next, prev); } /* @@ -712,8 +528,7 @@ static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic, static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, struct request *rq) { - struct as_rq *arq = RQ_DATA(rq); - int data_dir = arq->is_sync; + int data_dir = rq_is_sync(rq); unsigned long thinktime = 0; sector_t seek_dist; @@ -752,11 +567,11 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, * previous one issued. */ static int as_close_req(struct as_data *ad, struct as_io_context *aic, - struct as_rq *arq) + struct request *rq) { unsigned long delay; /* milliseconds */ sector_t last = ad->last_sector[ad->batch_data_dir]; - sector_t next = arq->request->sector; + sector_t next = rq->sector; sector_t delta; /* acceptable close offset (in sectors) */ sector_t s; @@ -813,7 +628,7 @@ static int as_close_req(struct as_data *ad, struct as_io_context *aic, * * If this task has queued some other IO, do not enter enticipation. */ -static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) +static int as_can_break_anticipation(struct as_data *ad, struct request *rq) { struct io_context *ioc; struct as_io_context *aic; @@ -821,7 +636,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) ioc = ad->io_context; BUG_ON(!ioc); - if (arq && ioc == arq->io_context) { + if (rq && ioc == RQ_IOC(rq)) { /* request from same process */ return 1; } @@ -848,7 +663,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) return 1; } - if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) { + if (rq && rq_is_sync(rq) && as_close_req(ad, aic, rq)) { /* * Found a close request that is not one of ours. * @@ -864,7 +679,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) ad->exit_no_coop = (7*ad->exit_no_coop)/8; } - as_update_iohist(ad, aic, arq->request); + as_update_iohist(ad, aic, rq); return 1; } @@ -891,10 +706,10 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) } /* - * as_can_anticipate indicates whether we should either run arq + * as_can_anticipate indicates whether we should either run rq * or keep anticipating a better request. */ -static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) +static int as_can_anticipate(struct as_data *ad, struct request *rq) { if (!ad->io_context) /* @@ -908,7 +723,7 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) */ return 0; - if (as_can_break_anticipation(ad, arq)) + if (as_can_break_anticipation(ad, rq)) /* * This request is a good candidate. Don't keep anticipating, * run it. @@ -926,16 +741,16 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) } /* - * as_update_arq must be called whenever a request (arq) is added to + * as_update_rq must be called whenever a request (rq) is added to * the sort_list. This function keeps caches up to date, and checks if the * request might be one we are "anticipating" */ -static void as_update_arq(struct as_data *ad, struct as_rq *arq) +static void as_update_rq(struct as_data *ad, struct request *rq) { - const int data_dir = arq->is_sync; + const int data_dir = rq_is_sync(rq); - /* keep the next_arq cache up to date */ - ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]); + /* keep the next_rq cache up to date */ + ad->next_rq[data_dir] = as_choose_req(ad, rq, ad->next_rq[data_dir]); /* * have we been anticipating this request? @@ -944,7 +759,7 @@ static void as_update_arq(struct as_data *ad, struct as_rq *arq) */ if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { - if (as_can_break_anticipation(ad, arq)) + if (as_can_break_anticipation(ad, rq)) as_antic_stop(ad); } } @@ -984,12 +799,11 @@ static void update_write_batch(struct as_data *ad) static void as_completed_request(request_queue_t *q, struct request *rq) { struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = RQ_DATA(rq); WARN_ON(!list_empty(&rq->queuelist)); - if (arq->state != AS_RQ_REMOVED) { - printk("arq->state %d\n", arq->state); + if (RQ_STATE(rq) != AS_RQ_REMOVED) { + printk("rq->state %d\n", RQ_STATE(rq)); WARN_ON(1); goto out; } @@ -1009,14 +823,14 @@ static void as_completed_request(request_queue_t *q, struct request *rq) * actually serviced. This should help devices with big TCQ windows * and writeback caches */ - if (ad->new_batch && ad->batch_data_dir == arq->is_sync) { + if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) { update_write_batch(ad); ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; ad->new_batch = 0; } - if (ad->io_context == arq->io_context && ad->io_context) { + if (ad->io_context == RQ_IOC(rq) && ad->io_context) { ad->antic_start = jiffies; ad->ioc_finished = 1; if (ad->antic_status == ANTIC_WAIT_REQ) { @@ -1028,9 +842,9 @@ static void as_completed_request(request_queue_t *q, struct request *rq) } } - as_put_io_context(arq); + as_put_io_context(rq); out: - arq->state = AS_RQ_POSTSCHED; + RQ_SET_STATE(rq, AS_RQ_POSTSCHED); } /* @@ -1041,27 +855,27 @@ out: */ static void as_remove_queued_request(request_queue_t *q, struct request *rq) { - struct as_rq *arq = RQ_DATA(rq); - const int data_dir = arq->is_sync; + const int data_dir = rq_is_sync(rq); struct as_data *ad = q->elevator->elevator_data; + struct io_context *ioc; - WARN_ON(arq->state != AS_RQ_QUEUED); + WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED); - if (arq->io_context && arq->io_context->aic) { - BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued)); - atomic_dec(&arq->io_context->aic->nr_queued); + ioc = RQ_IOC(rq); + if (ioc && ioc->aic) { + BUG_ON(!atomic_read(&ioc->aic->nr_queued)); + atomic_dec(&ioc->aic->nr_queued); } /* - * Update the "next_arq" cache if we are about to remove its + * Update the "next_rq" cache if we are about to remove its * entry */ - if (ad->next_arq[data_dir] == arq) - ad->next_arq[data_dir] = as_find_next_arq(ad, arq); + if (ad->next_rq[data_dir] == rq) + ad->next_rq[data_dir] = as_find_next_rq(ad, rq); - list_del_init(&arq->fifo); - as_del_arq_hash(arq); - as_del_arq_rb(ad, arq); + rq_fifo_clear(rq); + as_del_rq_rb(ad, rq); } /* @@ -1074,7 +888,7 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq) */ static int as_fifo_expired(struct as_data *ad, int adir) { - struct as_rq *arq; + struct request *rq; long delta_jif; delta_jif = jiffies - ad->last_check_fifo[adir]; @@ -1088,9 +902,9 @@ static int as_fifo_expired(struct as_data *ad, int adir) if (list_empty(&ad->fifo_list[adir])) return 0; - arq = list_entry_fifo(ad->fifo_list[adir].next); + rq = rq_entry_fifo(ad->fifo_list[adir].next); - return time_after(jiffies, arq->expires); + return time_after(jiffies, rq_fifo_time(rq)); } /* @@ -1113,25 +927,25 @@ static inline int as_batch_expired(struct as_data *ad) /* * move an entry to dispatch queue */ -static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) +static void as_move_to_dispatch(struct as_data *ad, struct request *rq) { - struct request *rq = arq->request; - const int data_dir = arq->is_sync; + const int data_dir = rq_is_sync(rq); - BUG_ON(!RB_EMPTY_NODE(&arq->rb_node)); + BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); as_antic_stop(ad); ad->antic_status = ANTIC_OFF; /* * This has to be set in order to be correctly updated by - * as_find_next_arq + * as_find_next_rq */ ad->last_sector[data_dir] = rq->sector + rq->nr_sectors; if (data_dir == REQ_SYNC) { + struct io_context *ioc = RQ_IOC(rq); /* In case we have to anticipate after this */ - copy_io_context(&ad->io_context, &arq->io_context); + copy_io_context(&ad->io_context, &ioc); } else { if (ad->io_context) { put_io_context(ad->io_context); @@ -1143,19 +957,19 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) } ad->ioc_finished = 0; - ad->next_arq[data_dir] = as_find_next_arq(ad, arq); + ad->next_rq[data_dir] = as_find_next_rq(ad, rq); /* * take it off the sort and fifo list, add to dispatch queue */ as_remove_queued_request(ad->q, rq); - WARN_ON(arq->state != AS_RQ_QUEUED); + WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED); elv_dispatch_sort(ad->q, rq); - arq->state = AS_RQ_DISPATCHED; - if (arq->io_context && arq->io_context->aic) - atomic_inc(&arq->io_context->aic->nr_dispatched); + RQ_SET_STATE(rq, AS_RQ_DISPATCHED); + if (RQ_IOC(rq) && RQ_IOC(rq)->aic) + atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched); ad->nr_dispatched++; } @@ -1167,9 +981,9 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) static int as_dispatch_request(request_queue_t *q, int force) { struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq; const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]); + struct request *rq; if (unlikely(force)) { /* @@ -1185,14 +999,14 @@ static int as_dispatch_request(request_queue_t *q, int force) ad->changed_batch = 0; ad->new_batch = 0; - while (ad->next_arq[REQ_SYNC]) { - as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]); + while (ad->next_rq[REQ_SYNC]) { + as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]); dispatched++; } ad->last_check_fifo[REQ_SYNC] = jiffies; - while (ad->next_arq[REQ_ASYNC]) { - as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]); + while (ad->next_rq[REQ_ASYNC]) { + as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]); dispatched++; } ad->last_check_fifo[REQ_ASYNC] = jiffies; @@ -1216,19 +1030,19 @@ static int as_dispatch_request(request_queue_t *q, int force) /* * batch is still running or no reads or no writes */ - arq = ad->next_arq[ad->batch_data_dir]; + rq = ad->next_rq[ad->batch_data_dir]; if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) { if (as_fifo_expired(ad, REQ_SYNC)) goto fifo_expired; - if (as_can_anticipate(ad, arq)) { + if (as_can_anticipate(ad, rq)) { as_antic_waitreq(ad); return 0; } } - if (arq) { + if (rq) { /* we have a "next request" */ if (reads && !writes) ad->current_batch_expires = @@ -1256,7 +1070,7 @@ static int as_dispatch_request(request_queue_t *q, int force) ad->changed_batch = 1; } ad->batch_data_dir = REQ_SYNC; - arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); + rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next); ad->last_check_fifo[ad->batch_data_dir] = jiffies; goto dispatch_request; } @@ -1282,7 +1096,7 @@ dispatch_writes: ad->batch_data_dir = REQ_ASYNC; ad->current_write_count = ad->write_batch_count; ad->write_batch_idled = 0; - arq = ad->next_arq[ad->batch_data_dir]; + rq = ad->next_rq[ad->batch_data_dir]; goto dispatch_request; } @@ -1296,8 +1110,7 @@ dispatch_request: if (as_fifo_expired(ad, ad->batch_data_dir)) { fifo_expired: - arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); - BUG_ON(arq == NULL); + rq = rq_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); } if (ad->changed_batch) { @@ -1316,70 +1129,58 @@ fifo_expired: } /* - * arq is the selected appropriate request. + * rq is the selected appropriate request. */ - as_move_to_dispatch(ad, arq); + as_move_to_dispatch(ad, rq); return 1; } /* - * add arq to rbtree and fifo + * add rq to rbtree and fifo */ static void as_add_request(request_queue_t *q, struct request *rq) { struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = RQ_DATA(rq); int data_dir; - arq->state = AS_RQ_NEW; + RQ_SET_STATE(rq, AS_RQ_NEW); - if (rq_data_dir(arq->request) == READ - || (arq->request->flags & REQ_RW_SYNC)) - arq->is_sync = 1; - else - arq->is_sync = 0; - data_dir = arq->is_sync; + data_dir = rq_is_sync(rq); - arq->io_context = as_get_io_context(); + rq->elevator_private = as_get_io_context(q->node); - if (arq->io_context) { - as_update_iohist(ad, arq->io_context->aic, arq->request); - atomic_inc(&arq->io_context->aic->nr_queued); + if (RQ_IOC(rq)) { + as_update_iohist(ad, RQ_IOC(rq)->aic, rq); + atomic_inc(&RQ_IOC(rq)->aic->nr_queued); } - as_add_arq_rb(ad, arq); - if (rq_mergeable(arq->request)) - as_add_arq_hash(ad, arq); + as_add_rq_rb(ad, rq); /* * set expire time (only used for reads) and add to fifo list */ - arq->expires = jiffies + ad->fifo_expire[data_dir]; - list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); + rq_set_fifo_time(rq, jiffies + ad->fifo_expire[data_dir]); + list_add_tail(&rq->queuelist, &ad->fifo_list[data_dir]); - as_update_arq(ad, arq); /* keep state machine up to date */ - arq->state = AS_RQ_QUEUED; + as_update_rq(ad, rq); /* keep state machine up to date */ + RQ_SET_STATE(rq, AS_RQ_QUEUED); } static void as_activate_request(request_queue_t *q, struct request *rq) { - struct as_rq *arq = RQ_DATA(rq); - - WARN_ON(arq->state != AS_RQ_DISPATCHED); - arq->state = AS_RQ_REMOVED; - if (arq->io_context && arq->io_context->aic) - atomic_dec(&arq->io_context->aic->nr_dispatched); + WARN_ON(RQ_STATE(rq) != AS_RQ_DISPATCHED); + RQ_SET_STATE(rq, AS_RQ_REMOVED); + if (RQ_IOC(rq) && RQ_IOC(rq)->aic) + atomic_dec(&RQ_IOC(rq)->aic->nr_dispatched); } static void as_deactivate_request(request_queue_t *q, struct request *rq) { - struct as_rq *arq = RQ_DATA(rq); - - WARN_ON(arq->state != AS_RQ_REMOVED); - arq->state = AS_RQ_DISPATCHED; - if (arq->io_context && arq->io_context->aic) - atomic_inc(&arq->io_context->aic->nr_dispatched); + WARN_ON(RQ_STATE(rq) != AS_RQ_REMOVED); + RQ_SET_STATE(rq, AS_RQ_DISPATCHED); + if (RQ_IOC(rq) && RQ_IOC(rq)->aic) + atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched); } /* @@ -1396,93 +1197,35 @@ static int as_queue_empty(request_queue_t *q) && list_empty(&ad->fifo_list[REQ_SYNC]); } -static struct request *as_former_request(request_queue_t *q, - struct request *rq) -{ - struct as_rq *arq = RQ_DATA(rq); - struct rb_node *rbprev = rb_prev(&arq->rb_node); - struct request *ret = NULL; - - if (rbprev) - ret = rb_entry_arq(rbprev)->request; - - return ret; -} - -static struct request *as_latter_request(request_queue_t *q, - struct request *rq) -{ - struct as_rq *arq = RQ_DATA(rq); - struct rb_node *rbnext = rb_next(&arq->rb_node); - struct request *ret = NULL; - - if (rbnext) - ret = rb_entry_arq(rbnext)->request; - - return ret; -} - static int as_merge(request_queue_t *q, struct request **req, struct bio *bio) { struct as_data *ad = q->elevator->elevator_data; sector_t rb_key = bio->bi_sector + bio_sectors(bio); struct request *__rq; - int ret; - - /* - * see if the merge hash can satisfy a back merge - */ - __rq = as_find_arq_hash(ad, bio->bi_sector); - if (__rq) { - BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); - - if (elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_BACK_MERGE; - goto out; - } - } /* * check for front merge */ - __rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio)); - if (__rq) { - BUG_ON(rb_key != rq_rb_key(__rq)); - - if (elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_FRONT_MERGE; - goto out; - } + __rq = elv_rb_find(&ad->sort_list[bio_data_dir(bio)], rb_key); + if (__rq && elv_rq_merge_ok(__rq, bio)) { + *req = __rq; + return ELEVATOR_FRONT_MERGE; } return ELEVATOR_NO_MERGE; -out: - if (ret) { - if (rq_mergeable(__rq)) - as_hot_arq_hash(ad, RQ_DATA(__rq)); - } - *req = __rq; - return ret; } -static void as_merged_request(request_queue_t *q, struct request *req) +static void as_merged_request(request_queue_t *q, struct request *req, int type) { struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = RQ_DATA(req); - - /* - * hash always needs to be repositioned, key is end sector - */ - as_del_arq_hash(arq); - as_add_arq_hash(ad, arq); /* * if the merge was a front merge, we need to reposition request */ - if (rq_rb_key(req) != arq->rb_key) { - as_del_arq_rb(ad, arq); - as_add_arq_rb(ad, arq); + if (type == ELEVATOR_FRONT_MERGE) { + as_del_rq_rb(ad, req); + as_add_rq_rb(ad, req); /* * Note! At this stage of this and the next function, our next * request may not be optimal - eg the request may have "grown" @@ -1494,38 +1237,22 @@ static void as_merged_request(request_queue_t *q, struct request *req) static void as_merged_requests(request_queue_t *q, struct request *req, struct request *next) { - struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = RQ_DATA(req); - struct as_rq *anext = RQ_DATA(next); - - BUG_ON(!arq); - BUG_ON(!anext); - /* - * reposition arq (this is the merged request) in hash, and in rbtree - * in case of a front merge + * if next expires before rq, assign its expire time to arq + * and move into next position (next will be deleted) in fifo */ - as_del_arq_hash(arq); - as_add_arq_hash(ad, arq); + if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { + if (time_before(rq_fifo_time(next), rq_fifo_time(req))) { + struct io_context *rioc = RQ_IOC(req); + struct io_context *nioc = RQ_IOC(next); - if (rq_rb_key(req) != arq->rb_key) { - as_del_arq_rb(ad, arq); - as_add_arq_rb(ad, arq); - } - - /* - * if anext expires before arq, assign its expire time to arq - * and move into anext position (anext will be deleted) in fifo - */ - if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) { - if (time_before(anext->expires, arq->expires)) { - list_move(&arq->fifo, &anext->fifo); - arq->expires = anext->expires; + list_move(&req->queuelist, &next->queuelist); + rq_set_fifo_time(req, rq_fifo_time(next)); /* * Don't copy here but swap, because when anext is * removed below, it must contain the unused context */ - swap_io_context(&arq->io_context, &anext->io_context); + swap_io_context(&rioc, &nioc); } } @@ -1533,9 +1260,9 @@ static void as_merged_requests(request_queue_t *q, struct request *req, * kill knowledge of next, this one is a goner */ as_remove_queued_request(q, next); - as_put_io_context(anext); + as_put_io_context(next); - anext->state = AS_RQ_MERGED; + RQ_SET_STATE(next, AS_RQ_MERGED); } /* @@ -1553,61 +1280,18 @@ static void as_work_handler(void *data) unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - if (!as_queue_empty(q)) - q->request_fn(q); + blk_start_queueing(q); spin_unlock_irqrestore(q->queue_lock, flags); } -static void as_put_request(request_queue_t *q, struct request *rq) -{ - struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = RQ_DATA(rq); - - if (!arq) { - WARN_ON(1); - return; - } - - if (unlikely(arq->state != AS_RQ_POSTSCHED && - arq->state != AS_RQ_PRESCHED && - arq->state != AS_RQ_MERGED)) { - printk("arq->state %d\n", arq->state); - WARN_ON(1); - } - - mempool_free(arq, ad->arq_pool); - rq->elevator_private = NULL; -} - -static int as_set_request(request_queue_t *q, struct request *rq, - struct bio *bio, gfp_t gfp_mask) -{ - struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask); - - if (arq) { - memset(arq, 0, sizeof(*arq)); - RB_CLEAR_NODE(&arq->rb_node); - arq->request = rq; - arq->state = AS_RQ_PRESCHED; - arq->io_context = NULL; - INIT_HLIST_NODE(&arq->hash); - INIT_LIST_HEAD(&arq->fifo); - rq->elevator_private = arq; - return 0; - } - - return 1; -} - -static int as_may_queue(request_queue_t *q, int rw, struct bio *bio) +static int as_may_queue(request_queue_t *q, int rw) { int ret = ELV_MQUEUE_MAY; struct as_data *ad = q->elevator->elevator_data; struct io_context *ioc; if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { - ioc = as_get_io_context(); + ioc = as_get_io_context(q->node); if (ad->io_context == ioc) ret = ELV_MQUEUE_MUST; put_io_context(ioc); @@ -1626,23 +1310,16 @@ static void as_exit_queue(elevator_t *e) BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); - mempool_destroy(ad->arq_pool); put_io_context(ad->io_context); - kfree(ad->hash); kfree(ad); } /* - * initialize elevator private data (as_data), and alloc a arq for - * each request on the free lists + * initialize elevator private data (as_data). */ static void *as_init_queue(request_queue_t *q, elevator_t *e) { struct as_data *ad; - int i; - - if (!arq_pool) - return NULL; ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node); if (!ad) @@ -1651,30 +1328,12 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e) ad->q = q; /* Identify what queue the data belongs to */ - ad->hash = kmalloc_node(sizeof(struct hlist_head)*AS_HASH_ENTRIES, - GFP_KERNEL, q->node); - if (!ad->hash) { - kfree(ad); - return NULL; - } - - ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, - mempool_free_slab, arq_pool, q->node); - if (!ad->arq_pool) { - kfree(ad->hash); - kfree(ad); - return NULL; - } - /* anticipatory scheduling helpers */ ad->antic_timer.function = as_antic_timeout; ad->antic_timer.data = (unsigned long)q; init_timer(&ad->antic_timer); INIT_WORK(&ad->antic_work, as_work_handler, q); - for (i = 0; i < AS_HASH_ENTRIES; i++) - INIT_HLIST_HEAD(&ad->hash[i]); - INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]); INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); ad->sort_list[REQ_SYNC] = RB_ROOT; @@ -1787,10 +1446,8 @@ static struct elevator_type iosched_as = { .elevator_deactivate_req_fn = as_deactivate_request, .elevator_queue_empty_fn = as_queue_empty, .elevator_completed_req_fn = as_completed_request, - .elevator_former_req_fn = as_former_request, - .elevator_latter_req_fn = as_latter_request, - .elevator_set_req_fn = as_set_request, - .elevator_put_req_fn = as_put_request, + .elevator_former_req_fn = elv_rb_former_request, + .elevator_latter_req_fn = elv_rb_latter_request, .elevator_may_queue_fn = as_may_queue, .elevator_init_fn = as_init_queue, .elevator_exit_fn = as_exit_queue, @@ -1806,11 +1463,6 @@ static int __init as_init(void) { int ret; - arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq), - 0, 0, NULL, NULL); - if (!arq_pool) - return -ENOMEM; - ret = elv_register(&iosched_as); if (!ret) { /* @@ -1822,7 +1474,6 @@ static int __init as_init(void) return 0; } - kmem_cache_destroy(arq_pool); return ret; } @@ -1833,10 +1484,9 @@ static void __exit as_exit(void) ioc_gone = &all_gone; /* ioc_gone's update must be visible before reading ioc_count */ smp_wmb(); - if (atomic_read(&ioc_count)) + if (elv_ioc_count_read(ioc_count)) wait_for_completion(ioc_gone); synchronize_rcu(); - kmem_cache_destroy(arq_pool); } module_init(as_init); diff --git a/block/blktrace.c b/block/blktrace.c index 8ff33441d8a2..135593c8e45b 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006 Jens Axboe + * Copyright (C) 2006 Jens Axboe * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -69,7 +69,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK /* * Bio action bits of interest */ -static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD) }; +static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) }; /* * More could be added as needed, taking care to increment the decrementer @@ -81,6 +81,8 @@ static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_AC (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1)) #define trace_ahead_bit(rw) \ (((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD)) +#define trace_meta_bit(rw) \ + (((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3)) /* * The worker for the various blk_add_trace*() types. Fills out a @@ -103,6 +105,7 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= bio_act[trace_barrier_bit(rw)]; what |= bio_act[trace_sync_bit(rw)]; what |= bio_act[trace_ahead_bit(rw)]; + what |= bio_act[trace_meta_bit(rw)]; pid = tsk->pid; if (unlikely(act_log_check(bt, what, sector, pid))) @@ -473,6 +476,9 @@ static void blk_check_time(unsigned long long *t) *t -= (a + b) / 2; } +/* + * calibrate our inter-CPU timings + */ static void blk_trace_check_cpu_time(void *data) { unsigned long long *t; @@ -490,20 +496,6 @@ static void blk_trace_check_cpu_time(void *data) put_cpu(); } -/* - * Call blk_trace_check_cpu_time() on each CPU to calibrate our inter-CPU - * timings - */ -static void blk_trace_calibrate_offsets(void) -{ - unsigned long flags; - - smp_call_function(blk_trace_check_cpu_time, NULL, 1, 1); - local_irq_save(flags); - blk_trace_check_cpu_time(NULL); - local_irq_restore(flags); -} - static void blk_trace_set_ht_offsets(void) { #if defined(CONFIG_SCHED_SMT) @@ -532,7 +524,7 @@ static void blk_trace_set_ht_offsets(void) static __init int blk_trace_init(void) { mutex_init(&blk_tree_mutex); - blk_trace_calibrate_offsets(); + on_each_cpu(blk_trace_check_cpu_time, NULL, 1, 1); blk_trace_set_ht_offsets(); return 0; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 3a3aee08ec5f..99116e2a310a 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -4,7 +4,7 @@ * Based on ideas from a previously unfinished io * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. * - * Copyright (C) 2003 Jens Axboe + * Copyright (C) 2003 Jens Axboe */ #include #include @@ -17,7 +17,6 @@ * tunables */ static const int cfq_quantum = 4; /* max queue in one round of service */ -static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ static const int cfq_back_penalty = 2; /* penalty of a backwards seek */ @@ -32,8 +31,6 @@ static int cfq_slice_idle = HZ / 125; #define CFQ_KEY_ASYNC (0) -static DEFINE_SPINLOCK(cfq_exit_lock); - /* * for the hash of cfqq inside the cfqd */ @@ -41,37 +38,19 @@ static DEFINE_SPINLOCK(cfq_exit_lock); #define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT) #define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash) -/* - * for the hash of crq inside the cfqq - */ -#define CFQ_MHASH_SHIFT 6 -#define CFQ_MHASH_BLOCK(sec) ((sec) >> 3) -#define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT) -#define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT) -#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) -#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash) - #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) -#define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) -#define RQ_DATA(rq) (rq)->elevator_private +#define RQ_CIC(rq) ((struct cfq_io_context*)(rq)->elevator_private) +#define RQ_CFQQ(rq) ((rq)->elevator_private2) -/* - * rb-tree defines - */ -#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node) -#define rq_rb_key(rq) (rq)->sector - -static kmem_cache_t *crq_pool; static kmem_cache_t *cfq_pool; static kmem_cache_t *cfq_ioc_pool; -static atomic_t ioc_count = ATOMIC_INIT(0); +static DEFINE_PER_CPU(unsigned long, ioc_count); static struct completion *ioc_gone; #define CFQ_PRIO_LISTS IOPRIO_BE_NR #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) -#define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE) #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) #define ASYNC (0) @@ -102,29 +81,14 @@ struct cfq_data { struct list_head idle_rr; unsigned int busy_queues; - /* - * non-ordered list of empty cfqq's - */ - struct list_head empty_list; - /* * cfqq lookup hash */ struct hlist_head *cfq_hash; - /* - * global crq hash for all queues - */ - struct hlist_head *crq_hash; - - mempool_t *crq_pool; - int rq_in_driver; int hw_tag; - /* - * schedule slice state info - */ /* * idle window management */ @@ -141,13 +105,10 @@ struct cfq_data { sector_t last_sector; unsigned long last_end_request; - unsigned int rq_starved; - /* * tunables, see top of file */ unsigned int cfq_quantum; - unsigned int cfq_queued; unsigned int cfq_fifo_expire[2]; unsigned int cfq_back_penalty; unsigned int cfq_back_max; @@ -170,23 +131,24 @@ struct cfq_queue { struct hlist_node cfq_hash; /* hash key */ unsigned int key; - /* on either rr or empty list of cfqd */ + /* member of the rr/busy/cur/idle cfqd list */ struct list_head cfq_list; /* sorted list of pending requests */ struct rb_root sort_list; /* if fifo isn't expired, next request to serve */ - struct cfq_rq *next_crq; + struct request *next_rq; /* requests queued in sort_list */ int queued[2]; /* currently allocated requests */ int allocated[2]; + /* pending metadata requests */ + int meta_pending; /* fifo list of requests in sort_list */ struct list_head fifo; unsigned long slice_start; unsigned long slice_end; unsigned long slice_left; - unsigned long service_last; /* number of requests that are on the dispatch list */ int on_dispatch[2]; @@ -199,18 +161,6 @@ struct cfq_queue { unsigned int flags; }; -struct cfq_rq { - struct rb_node rb_node; - sector_t rb_key; - struct request *request; - struct hlist_node hash; - - struct cfq_queue *cfq_queue; - struct cfq_io_context *io_context; - - unsigned int crq_flags; -}; - enum cfqq_state_flags { CFQ_CFQQ_FLAG_on_rr = 0, CFQ_CFQQ_FLAG_wait_request, @@ -220,6 +170,7 @@ enum cfqq_state_flags { CFQ_CFQQ_FLAG_fifo_expire, CFQ_CFQQ_FLAG_idle_window, CFQ_CFQQ_FLAG_prio_changed, + CFQ_CFQQ_FLAG_queue_new, }; #define CFQ_CFQQ_FNS(name) \ @@ -244,69 +195,13 @@ CFQ_CFQQ_FNS(must_dispatch); CFQ_CFQQ_FNS(fifo_expire); CFQ_CFQQ_FNS(idle_window); CFQ_CFQQ_FNS(prio_changed); +CFQ_CFQQ_FNS(queue_new); #undef CFQ_CFQQ_FNS -enum cfq_rq_state_flags { - CFQ_CRQ_FLAG_is_sync = 0, -}; - -#define CFQ_CRQ_FNS(name) \ -static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \ -{ \ - crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name); \ -} \ -static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \ -{ \ - crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \ -} \ -static inline int cfq_crq_##name(const struct cfq_rq *crq) \ -{ \ - return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \ -} - -CFQ_CRQ_FNS(is_sync); -#undef CFQ_CRQ_FNS - static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short); -static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *); +static void cfq_dispatch_insert(request_queue_t *, struct request *); static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask); -/* - * lots of deadline iosched dupes, can be abstracted later... - */ -static inline void cfq_del_crq_hash(struct cfq_rq *crq) -{ - hlist_del_init(&crq->hash); -} - -static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq) -{ - const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request)); - - hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]); -} - -static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset) -{ - struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)]; - struct hlist_node *entry, *next; - - hlist_for_each_safe(entry, next, hash_list) { - struct cfq_rq *crq = list_entry_hash(entry); - struct request *__rq = crq->request; - - if (!rq_mergeable(__rq)) { - cfq_del_crq_hash(crq); - continue; - } - - if (rq_hash_key(__rq) == offset) - return __rq; - } - - return NULL; -} - /* * scheduler run of queue, if there are requests pending and no one in the * driver that will restart queueing @@ -333,12 +228,12 @@ static inline pid_t cfq_queue_pid(struct task_struct *task, int rw) } /* - * Lifted from AS - choose which of crq1 and crq2 that is best served now. + * Lifted from AS - choose which of rq1 and rq2 that is best served now. * We choose the request that is closest to the head right now. Distance * behind the head is penalized and only allowed to a certain extent. */ -static struct cfq_rq * -cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) +static struct request * +cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2) { sector_t last, s1, s2, d1 = 0, d2 = 0; unsigned long back_max; @@ -346,18 +241,22 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) #define CFQ_RQ2_WRAP 0x02 /* request 2 wraps */ unsigned wrap = 0; /* bit mask: requests behind the disk head? */ - if (crq1 == NULL || crq1 == crq2) - return crq2; - if (crq2 == NULL) - return crq1; + if (rq1 == NULL || rq1 == rq2) + return rq2; + if (rq2 == NULL) + return rq1; - if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2)) - return crq1; - else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1)) - return crq2; + if (rq_is_sync(rq1) && !rq_is_sync(rq2)) + return rq1; + else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) + return rq2; + if (rq_is_meta(rq1) && !rq_is_meta(rq2)) + return rq1; + else if (rq_is_meta(rq2) && !rq_is_meta(rq1)) + return rq2; - s1 = crq1->request->sector; - s2 = crq2->request->sector; + s1 = rq1->sector; + s2 = rq2->sector; last = cfqd->last_sector; @@ -392,23 +291,23 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) * check two variables for all permutations: --> faster! */ switch (wrap) { - case 0: /* common case for CFQ: crq1 and crq2 not wrapped */ + case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ if (d1 < d2) - return crq1; + return rq1; else if (d2 < d1) - return crq2; + return rq2; else { if (s1 >= s2) - return crq1; + return rq1; else - return crq2; + return rq2; } case CFQ_RQ2_WRAP: - return crq1; + return rq1; case CFQ_RQ1_WRAP: - return crq2; - case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both crqs wrapped */ + return rq2; + case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP): /* both rqs wrapped */ default: /* * Since both rqs are wrapped, @@ -417,50 +316,43 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) * since back seek takes more time than forward. */ if (s1 <= s2) - return crq1; + return rq1; else - return crq2; + return rq2; } } /* * would be nice to take fifo expire time into account as well */ -static struct cfq_rq * -cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq, - struct cfq_rq *last) +static struct request * +cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq, + struct request *last) { - struct cfq_rq *crq_next = NULL, *crq_prev = NULL; - struct rb_node *rbnext, *rbprev; + struct rb_node *rbnext = rb_next(&last->rb_node); + struct rb_node *rbprev = rb_prev(&last->rb_node); + struct request *next = NULL, *prev = NULL; - if (!(rbnext = rb_next(&last->rb_node))) { - rbnext = rb_first(&cfqq->sort_list); - if (rbnext == &last->rb_node) - rbnext = NULL; - } - - rbprev = rb_prev(&last->rb_node); + BUG_ON(RB_EMPTY_NODE(&last->rb_node)); if (rbprev) - crq_prev = rb_entry_crq(rbprev); + prev = rb_entry_rq(rbprev); + if (rbnext) - crq_next = rb_entry_crq(rbnext); + next = rb_entry_rq(rbnext); + else { + rbnext = rb_first(&cfqq->sort_list); + if (rbnext && rbnext != &last->rb_node) + next = rb_entry_rq(rbnext); + } - return cfq_choose_req(cfqd, crq_next, crq_prev); -} - -static void cfq_update_next_crq(struct cfq_rq *crq) -{ - struct cfq_queue *cfqq = crq->cfq_queue; - - if (cfqq->next_crq == crq) - cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq); + return cfq_choose_req(cfqd, next, prev); } static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) { struct cfq_data *cfqd = cfqq->cfqd; - struct list_head *list, *entry; + struct list_head *list; BUG_ON(!cfq_cfqq_on_rr(cfqq)); @@ -485,31 +377,26 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) } /* - * if queue was preempted, just add to front to be fair. busy_rr - * isn't sorted, but insert at the back for fairness. + * If this queue was preempted or is new (never been serviced), let + * it be added first for fairness but beind other new queues. + * Otherwise, just add to the back of the list. */ - if (preempted || list == &cfqd->busy_rr) { - if (preempted) - list = list->prev; + if (preempted || cfq_cfqq_queue_new(cfqq)) { + struct list_head *n = list; + struct cfq_queue *__cfqq; - list_add_tail(&cfqq->cfq_list, list); - return; + while (n->next != list) { + __cfqq = list_entry_cfqq(n->next); + if (!cfq_cfqq_queue_new(__cfqq)) + break; + + n = n->next; + } + + list = n; } - /* - * sort by when queue was last serviced - */ - entry = list; - while ((entry = entry->prev) != list) { - struct cfq_queue *__cfqq = list_entry_cfqq(entry); - - if (!__cfqq->service_last) - break; - if (time_before(__cfqq->service_last, cfqq->service_last)) - break; - } - - list_add(&cfqq->cfq_list, entry); + list_add_tail(&cfqq->cfq_list, list); } /* @@ -531,7 +418,7 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) { BUG_ON(!cfq_cfqq_on_rr(cfqq)); cfq_clear_cfqq_on_rr(cfqq); - list_move(&cfqq->cfq_list, &cfqd->empty_list); + list_del_init(&cfqq->cfq_list); BUG_ON(!cfqd->busy_queues); cfqd->busy_queues--; @@ -540,81 +427,43 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) /* * rb tree support functions */ -static inline void cfq_del_crq_rb(struct cfq_rq *crq) +static inline void cfq_del_rq_rb(struct request *rq) { - struct cfq_queue *cfqq = crq->cfq_queue; + struct cfq_queue *cfqq = RQ_CFQQ(rq); struct cfq_data *cfqd = cfqq->cfqd; - const int sync = cfq_crq_is_sync(crq); + const int sync = rq_is_sync(rq); BUG_ON(!cfqq->queued[sync]); cfqq->queued[sync]--; - cfq_update_next_crq(crq); - - rb_erase(&crq->rb_node, &cfqq->sort_list); + elv_rb_del(&cfqq->sort_list, rq); if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) cfq_del_cfqq_rr(cfqd, cfqq); } -static struct cfq_rq * -__cfq_add_crq_rb(struct cfq_rq *crq) +static void cfq_add_rq_rb(struct request *rq) { - struct rb_node **p = &crq->cfq_queue->sort_list.rb_node; - struct rb_node *parent = NULL; - struct cfq_rq *__crq; - - while (*p) { - parent = *p; - __crq = rb_entry_crq(parent); - - if (crq->rb_key < __crq->rb_key) - p = &(*p)->rb_left; - else if (crq->rb_key > __crq->rb_key) - p = &(*p)->rb_right; - else - return __crq; - } - - rb_link_node(&crq->rb_node, parent, p); - return NULL; -} - -static void cfq_add_crq_rb(struct cfq_rq *crq) -{ - struct cfq_queue *cfqq = crq->cfq_queue; + struct cfq_queue *cfqq = RQ_CFQQ(rq); struct cfq_data *cfqd = cfqq->cfqd; - struct request *rq = crq->request; - struct cfq_rq *__alias; + struct request *__alias; - crq->rb_key = rq_rb_key(rq); - cfqq->queued[cfq_crq_is_sync(crq)]++; + cfqq->queued[rq_is_sync(rq)]++; /* * looks a little odd, but the first insert might return an alias. * if that happens, put the alias on the dispatch list */ - while ((__alias = __cfq_add_crq_rb(crq)) != NULL) + while ((__alias = elv_rb_add(&cfqq->sort_list, rq)) != NULL) cfq_dispatch_insert(cfqd->queue, __alias); - - rb_insert_color(&crq->rb_node, &cfqq->sort_list); - - if (!cfq_cfqq_on_rr(cfqq)) - cfq_add_cfqq_rr(cfqd, cfqq); - - /* - * check if this request is a better next-serve candidate - */ - cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); } static inline void -cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) +cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) { - rb_erase(&crq->rb_node, &cfqq->sort_list); - cfqq->queued[cfq_crq_is_sync(crq)]--; - - cfq_add_crq_rb(crq); + elv_rb_del(&cfqq->sort_list, rq); + cfqq->queued[rq_is_sync(rq)]--; + cfq_add_rq_rb(rq); } static struct request * @@ -623,27 +472,14 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) struct task_struct *tsk = current; pid_t key = cfq_queue_pid(tsk, bio_data_dir(bio)); struct cfq_queue *cfqq; - struct rb_node *n; - sector_t sector; cfqq = cfq_find_cfq_hash(cfqd, key, tsk->ioprio); - if (!cfqq) - goto out; + if (cfqq) { + sector_t sector = bio->bi_sector + bio_sectors(bio); - sector = bio->bi_sector + bio_sectors(bio); - n = cfqq->sort_list.rb_node; - while (n) { - struct cfq_rq *crq = rb_entry_crq(n); - - if (sector < crq->rb_key) - n = n->rb_left; - else if (sector > crq->rb_key) - n = n->rb_right; - else - return crq->request; + return elv_rb_find(&cfqq->sort_list, sector); } -out: return NULL; } @@ -673,11 +509,18 @@ static void cfq_deactivate_request(request_queue_t *q, struct request *rq) static void cfq_remove_request(struct request *rq) { - struct cfq_rq *crq = RQ_DATA(rq); + struct cfq_queue *cfqq = RQ_CFQQ(rq); + + if (cfqq->next_rq == rq) + cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq); list_del_init(&rq->queuelist); - cfq_del_crq_rb(crq); - cfq_del_crq_hash(crq); + cfq_del_rq_rb(rq); + + if (rq_is_meta(rq)) { + WARN_ON(!cfqq->meta_pending); + cfqq->meta_pending--; + } } static int @@ -685,39 +528,23 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) { struct cfq_data *cfqd = q->elevator->elevator_data; struct request *__rq; - int ret; - - __rq = cfq_find_rq_hash(cfqd, bio->bi_sector); - if (__rq && elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_BACK_MERGE; - goto out; - } __rq = cfq_find_rq_fmerge(cfqd, bio); if (__rq && elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_FRONT_MERGE; - goto out; + *req = __rq; + return ELEVATOR_FRONT_MERGE; } return ELEVATOR_NO_MERGE; -out: - *req = __rq; - return ret; } -static void cfq_merged_request(request_queue_t *q, struct request *req) +static void cfq_merged_request(request_queue_t *q, struct request *req, + int type) { - struct cfq_data *cfqd = q->elevator->elevator_data; - struct cfq_rq *crq = RQ_DATA(req); + if (type == ELEVATOR_FRONT_MERGE) { + struct cfq_queue *cfqq = RQ_CFQQ(req); - cfq_del_crq_hash(crq); - cfq_add_crq_hash(cfqd, crq); - - if (rq_rb_key(req) != crq->rb_key) { - struct cfq_queue *cfqq = crq->cfq_queue; - - cfq_update_next_crq(crq); - cfq_reposition_crq_rb(cfqq, crq); + cfq_reposition_rq_rb(cfqq, req); } } @@ -725,8 +552,6 @@ static void cfq_merged_requests(request_queue_t *q, struct request *rq, struct request *next) { - cfq_merged_request(q, rq); - /* * reposition in fifo if next is older than rq */ @@ -768,13 +593,12 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfq_cfqq_wait_request(cfqq)) del_timer(&cfqd->idle_slice_timer); - if (!preempted && !cfq_cfqq_dispatched(cfqq)) { - cfqq->service_last = now; + if (!preempted && !cfq_cfqq_dispatched(cfqq)) cfq_schedule_dispatch(cfqd); - } cfq_clear_cfqq_must_dispatch(cfqq); cfq_clear_cfqq_wait_request(cfqq); + cfq_clear_cfqq_queue_new(cfqq); /* * store what was left of this slice, if the queue idled out @@ -868,26 +692,25 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) { struct cfq_queue *cfqq = NULL; - /* - * if current list is non-empty, grab first entry. if it is empty, - * get next prio level and grab first entry then if any are spliced - */ - if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) + if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) { + /* + * if current list is non-empty, grab first entry. if it is + * empty, get next prio level and grab first entry then if any + * are spliced + */ cfqq = list_entry_cfqq(cfqd->cur_rr.next); - - /* - * If no new queues are available, check if the busy list has some - * before falling back to idle io. - */ - if (!cfqq && !list_empty(&cfqd->busy_rr)) + } else if (!list_empty(&cfqd->busy_rr)) { + /* + * If no new queues are available, check if the busy list has + * some before falling back to idle io. + */ cfqq = list_entry_cfqq(cfqd->busy_rr.next); - - /* - * if we have idle queues and no rt or be queues had pending - * requests, either allow immediate service if the grace period - * has passed or arm the idle grace timer - */ - if (!cfqq && !list_empty(&cfqd->idle_rr)) { + } else if (!list_empty(&cfqd->idle_rr)) { + /* + * if we have idle queues and no rt or be queues had pending + * requests, either allow immediate service if the grace period + * has passed or arm the idle grace timer + */ unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE; if (time_after_eq(jiffies, end)) @@ -942,16 +765,14 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) return 1; } -static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq) +static void cfq_dispatch_insert(request_queue_t *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; - struct cfq_queue *cfqq = crq->cfq_queue; - struct request *rq; + struct cfq_queue *cfqq = RQ_CFQQ(rq); - cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq); - cfq_remove_request(crq->request); - cfqq->on_dispatch[cfq_crq_is_sync(crq)]++; - elv_dispatch_sort(q, crq->request); + cfq_remove_request(rq); + cfqq->on_dispatch[rq_is_sync(rq)]++; + elv_dispatch_sort(q, rq); rq = list_entry(q->queue_head.prev, struct request, queuelist); cfqd->last_sector = rq->sector + rq->nr_sectors; @@ -960,24 +781,23 @@ static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq) /* * return expired entry, or NULL to just start from scratch in rbtree */ -static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq) +static inline struct request *cfq_check_fifo(struct cfq_queue *cfqq) { struct cfq_data *cfqd = cfqq->cfqd; struct request *rq; - struct cfq_rq *crq; + int fifo; if (cfq_cfqq_fifo_expire(cfqq)) return NULL; + if (list_empty(&cfqq->fifo)) + return NULL; - if (!list_empty(&cfqq->fifo)) { - int fifo = cfq_cfqq_class_sync(cfqq); + fifo = cfq_cfqq_class_sync(cfqq); + rq = rq_entry_fifo(cfqq->fifo.next); - crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next)); - rq = crq->request; - if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) { - cfq_mark_cfqq_fifo_expire(cfqq); - return crq; - } + if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) { + cfq_mark_cfqq_fifo_expire(cfqq); + return rq; } return NULL; @@ -1063,25 +883,25 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); do { - struct cfq_rq *crq; + struct request *rq; /* * follow expired path, else get first next available */ - if ((crq = cfq_check_fifo(cfqq)) == NULL) - crq = cfqq->next_crq; + if ((rq = cfq_check_fifo(cfqq)) == NULL) + rq = cfqq->next_rq; /* * finally, insert request into driver dispatch list */ - cfq_dispatch_insert(cfqd->queue, crq); + cfq_dispatch_insert(cfqd->queue, rq); cfqd->dispatch_slice++; dispatched++; if (!cfqd->active_cic) { - atomic_inc(&crq->io_context->ioc->refcount); - cfqd->active_cic = crq->io_context; + atomic_inc(&RQ_CIC(rq)->ioc->refcount); + cfqd->active_cic = RQ_CIC(rq); } if (RB_EMPTY_ROOT(&cfqq->sort_list)) @@ -1112,13 +932,12 @@ static int cfq_forced_dispatch_cfqqs(struct list_head *list) { struct cfq_queue *cfqq, *next; - struct cfq_rq *crq; int dispatched; dispatched = 0; list_for_each_entry_safe(cfqq, next, list, cfq_list) { - while ((crq = cfqq->next_crq)) { - cfq_dispatch_insert(cfqq->cfqd->queue, crq); + while (cfqq->next_rq) { + cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq); dispatched++; } BUG_ON(!list_empty(&cfqq->fifo)); @@ -1194,8 +1013,8 @@ cfq_dispatch_requests(request_queue_t *q, int force) } /* - * task holds one reference to the queue, dropped when task exits. each crq - * in-flight on this queue also holds a reference, dropped when crq is freed. + * task holds one reference to the queue, dropped when task exits. each rq + * in-flight on this queue also holds a reference, dropped when rq is freed. * * queue lock must be held here. */ @@ -1223,7 +1042,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq) kmem_cache_free(cfq_pool, cfqq); } -static inline struct cfq_queue * +static struct cfq_queue * __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned int key, unsigned int prio, const int hashval) { @@ -1260,62 +1079,63 @@ static void cfq_free_io_context(struct io_context *ioc) freed++; } - if (atomic_sub_and_test(freed, &ioc_count) && ioc_gone) + elv_ioc_count_mod(ioc_count, -freed); + + if (ioc_gone && !elv_ioc_count_read(ioc_count)) complete(ioc_gone); } -static void cfq_trim(struct io_context *ioc) +static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - ioc->set_ioprio = NULL; - cfq_free_io_context(ioc); + if (unlikely(cfqq == cfqd->active_queue)) + __cfq_slice_expired(cfqd, cfqq, 0); + + cfq_put_queue(cfqq); } +static void __cfq_exit_single_io_context(struct cfq_data *cfqd, + struct cfq_io_context *cic) +{ + list_del_init(&cic->queue_list); + smp_wmb(); + cic->key = NULL; + + if (cic->cfqq[ASYNC]) { + cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]); + cic->cfqq[ASYNC] = NULL; + } + + if (cic->cfqq[SYNC]) { + cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]); + cic->cfqq[SYNC] = NULL; + } +} + + /* * Called with interrupts disabled */ static void cfq_exit_single_io_context(struct cfq_io_context *cic) { struct cfq_data *cfqd = cic->key; - request_queue_t *q; - if (!cfqd) - return; + if (cfqd) { + request_queue_t *q = cfqd->queue; - q = cfqd->queue; - - WARN_ON(!irqs_disabled()); - - spin_lock(q->queue_lock); - - if (cic->cfqq[ASYNC]) { - if (unlikely(cic->cfqq[ASYNC] == cfqd->active_queue)) - __cfq_slice_expired(cfqd, cic->cfqq[ASYNC], 0); - cfq_put_queue(cic->cfqq[ASYNC]); - cic->cfqq[ASYNC] = NULL; + spin_lock_irq(q->queue_lock); + __cfq_exit_single_io_context(cfqd, cic); + spin_unlock_irq(q->queue_lock); } - - if (cic->cfqq[SYNC]) { - if (unlikely(cic->cfqq[SYNC] == cfqd->active_queue)) - __cfq_slice_expired(cfqd, cic->cfqq[SYNC], 0); - cfq_put_queue(cic->cfqq[SYNC]); - cic->cfqq[SYNC] = NULL; - } - - cic->key = NULL; - list_del_init(&cic->queue_list); - spin_unlock(q->queue_lock); } static void cfq_exit_io_context(struct io_context *ioc) { struct cfq_io_context *__cic; - unsigned long flags; struct rb_node *n; /* * put the reference this task is holding to the various queues */ - spin_lock_irqsave(&cfq_exit_lock, flags); n = rb_first(&ioc->cic_root); while (n != NULL) { @@ -1324,22 +1144,21 @@ static void cfq_exit_io_context(struct io_context *ioc) cfq_exit_single_io_context(__cic); n = rb_next(n); } - - spin_unlock_irqrestore(&cfq_exit_lock, flags); } static struct cfq_io_context * cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) { - struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask); + struct cfq_io_context *cic; + cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask, cfqd->queue->node); if (cic) { memset(cic, 0, sizeof(*cic)); cic->last_end_request = jiffies; INIT_LIST_HEAD(&cic->queue_list); cic->dtor = cfq_free_io_context; cic->exit = cfq_exit_io_context; - atomic_inc(&ioc_count); + elv_ioc_count_inc(ioc_count); } return cic; @@ -1420,15 +1239,12 @@ static inline void changed_ioprio(struct cfq_io_context *cic) spin_unlock(cfqd->queue->queue_lock); } -/* - * callback from sys_ioprio_set, irqs are disabled - */ -static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) +static void cfq_ioc_set_ioprio(struct io_context *ioc) { struct cfq_io_context *cic; struct rb_node *n; - spin_lock(&cfq_exit_lock); + ioc->ioprio_changed = 0; n = rb_first(&ioc->cic_root); while (n != NULL) { @@ -1437,10 +1253,6 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) changed_ioprio(cic); n = rb_next(n); } - - spin_unlock(&cfq_exit_lock); - - return 0; } static struct cfq_queue * @@ -1460,12 +1272,18 @@ retry: cfqq = new_cfqq; new_cfqq = NULL; } else if (gfp_mask & __GFP_WAIT) { + /* + * Inform the allocator of the fact that we will + * just repeat this allocation if it fails, to allow + * the allocator to do whatever it needs to attempt to + * free memory. + */ spin_unlock_irq(cfqd->queue->queue_lock); - new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); + new_cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask|__GFP_NOFAIL, cfqd->queue->node); spin_lock_irq(cfqd->queue->queue_lock); goto retry; } else { - cfqq = kmem_cache_alloc(cfq_pool, gfp_mask); + cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask, cfqd->queue->node); if (!cfqq) goto out; } @@ -1480,13 +1298,13 @@ retry: hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); atomic_set(&cfqq->ref, 0); cfqq->cfqd = cfqd; - cfqq->service_last = 0; /* * set ->slice_left to allow preemption for a new process */ cfqq->slice_left = 2 * cfqd->cfq_slice_idle; cfq_mark_cfqq_idle_window(cfqq); cfq_mark_cfqq_prio_changed(cfqq); + cfq_mark_cfqq_queue_new(cfqq); cfq_init_prio_data(cfqq); } @@ -1502,12 +1320,10 @@ out: static void cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) { - spin_lock(&cfq_exit_lock); + WARN_ON(!list_empty(&cic->queue_list)); rb_erase(&cic->rb_node, &ioc->cic_root); - list_del_init(&cic->queue_list); - spin_unlock(&cfq_exit_lock); kmem_cache_free(cfq_ioc_pool, cic); - atomic_dec(&ioc_count); + elv_ioc_count_dec(ioc_count); } static struct cfq_io_context * @@ -1551,7 +1367,6 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, cic->ioc = ioc; cic->key = cfqd; - ioc->set_ioprio = cfq_ioc_set_ioprio; restart: parent = NULL; p = &ioc->cic_root.rb_node; @@ -1573,11 +1388,12 @@ restart: BUG(); } - spin_lock(&cfq_exit_lock); rb_link_node(&cic->rb_node, parent, p); rb_insert_color(&cic->rb_node, &ioc->cic_root); + + spin_lock_irq(cfqd->queue->queue_lock); list_add(&cic->queue_list, &cfqd->cic_list); - spin_unlock(&cfq_exit_lock); + spin_unlock_irq(cfqd->queue->queue_lock); } /* @@ -1593,7 +1409,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) might_sleep_if(gfp_mask & __GFP_WAIT); - ioc = get_io_context(gfp_mask); + ioc = get_io_context(gfp_mask, cfqd->queue->node); if (!ioc) return NULL; @@ -1607,6 +1423,10 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) cfq_cic_link(cfqd, ioc, cic); out: + smp_read_barrier_depends(); + if (unlikely(ioc->ioprio_changed)) + cfq_ioc_set_ioprio(ioc); + return cic; err: put_io_context(ioc); @@ -1640,15 +1460,15 @@ cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_io_context *cic) static void cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic, - struct cfq_rq *crq) + struct request *rq) { sector_t sdist; u64 total; - if (cic->last_request_pos < crq->request->sector) - sdist = crq->request->sector - cic->last_request_pos; + if (cic->last_request_pos < rq->sector) + sdist = rq->sector - cic->last_request_pos; else - sdist = cic->last_request_pos - crq->request->sector; + sdist = cic->last_request_pos - rq->sector; /* * Don't allow the seek distance to get too large from the @@ -1699,7 +1519,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, */ static int cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, - struct cfq_rq *crq) + struct request *rq) { struct cfq_queue *cfqq = cfqd->active_queue; @@ -1718,7 +1538,17 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, */ if (new_cfqq->slice_left < cfqd->cfq_slice_idle) return 0; - if (cfq_crq_is_sync(crq) && !cfq_cfqq_sync(cfqq)) + /* + * if the new request is sync, but the currently running queue is + * not, let the sync request have priority. + */ + if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq)) + return 1; + /* + * So both queues are sync. Let the new request get disk time if + * it's a metadata request and the current queue is doing regular IO. + */ + if (rq_is_meta(rq) && !cfqq->meta_pending) return 1; return 0; @@ -1730,47 +1560,45 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, */ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - struct cfq_queue *__cfqq, *next; - - list_for_each_entry_safe(__cfqq, next, &cfqd->cur_rr, cfq_list) - cfq_resort_rr_list(__cfqq, 1); + cfq_slice_expired(cfqd, 1); if (!cfqq->slice_left) cfqq->slice_left = cfq_prio_to_slice(cfqd, cfqq) / 2; + /* + * Put the new queue at the front of the of the current list, + * so we know that it will be selected next. + */ + BUG_ON(!cfq_cfqq_on_rr(cfqq)); + list_move(&cfqq->cfq_list, &cfqd->cur_rr); + cfqq->slice_end = cfqq->slice_left + jiffies; - cfq_slice_expired(cfqd, 1); - __cfq_set_active_queue(cfqd, cfqq); } /* - * should really be a ll_rw_blk.c helper - */ -static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq) -{ - request_queue_t *q = cfqd->queue; - - if (!blk_queue_plugged(q)) - q->request_fn(q); - else - __generic_unplug_device(q); -} - -/* - * Called when a new fs request (crq) is added (to cfqq). Check if there's + * Called when a new fs request (rq) is added (to cfqq). Check if there's * something we should do about it */ static void -cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, - struct cfq_rq *crq) +cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, + struct request *rq) { - struct cfq_io_context *cic = crq->io_context; + struct cfq_io_context *cic = RQ_CIC(rq); + + if (rq_is_meta(rq)) + cfqq->meta_pending++; + + /* + * check if this request is a better next-serve candidate)) { + */ + cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq); + BUG_ON(!cfqq->next_rq); /* * we never wait for an async request and we don't allow preemption * of an async request. so just return early */ - if (!cfq_crq_is_sync(crq)) { + if (!rq_is_sync(rq)) { /* * sync process issued an async request, if it's waiting * then expire it and kick rq handling. @@ -1778,17 +1606,17 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cic == cfqd->active_cic && del_timer(&cfqd->idle_slice_timer)) { cfq_slice_expired(cfqd, 0); - cfq_start_queueing(cfqd, cfqq); + blk_start_queueing(cfqd->queue); } return; } cfq_update_io_thinktime(cfqd, cic); - cfq_update_io_seektime(cfqd, cic, crq); + cfq_update_io_seektime(cfqd, cic, rq); cfq_update_idle_window(cfqd, cfqq, cic); cic->last_queue = jiffies; - cic->last_request_pos = crq->request->sector + crq->request->nr_sectors; + cic->last_request_pos = rq->sector + rq->nr_sectors; if (cfqq == cfqd->active_queue) { /* @@ -1799,9 +1627,9 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfq_cfqq_wait_request(cfqq)) { cfq_mark_cfqq_must_dispatch(cfqq); del_timer(&cfqd->idle_slice_timer); - cfq_start_queueing(cfqd, cfqq); + blk_start_queueing(cfqd->queue); } - } else if (cfq_should_preempt(cfqd, cfqq, crq)) { + } else if (cfq_should_preempt(cfqd, cfqq, rq)) { /* * not the active queue - expire current slice if it is * idle and has expired it's mean thinktime or this new queue @@ -1809,34 +1637,32 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, */ cfq_preempt_queue(cfqd, cfqq); cfq_mark_cfqq_must_dispatch(cfqq); - cfq_start_queueing(cfqd, cfqq); + blk_start_queueing(cfqd->queue); } } static void cfq_insert_request(request_queue_t *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; - struct cfq_rq *crq = RQ_DATA(rq); - struct cfq_queue *cfqq = crq->cfq_queue; + struct cfq_queue *cfqq = RQ_CFQQ(rq); cfq_init_prio_data(cfqq); - cfq_add_crq_rb(crq); + cfq_add_rq_rb(rq); + + if (!cfq_cfqq_on_rr(cfqq)) + cfq_add_cfqq_rr(cfqd, cfqq); list_add_tail(&rq->queuelist, &cfqq->fifo); - if (rq_mergeable(rq)) - cfq_add_crq_hash(cfqd, crq); - - cfq_crq_enqueued(cfqd, cfqq, crq); + cfq_rq_enqueued(cfqd, cfqq, rq); } static void cfq_completed_request(request_queue_t *q, struct request *rq) { - struct cfq_rq *crq = RQ_DATA(rq); - struct cfq_queue *cfqq = crq->cfq_queue; + struct cfq_queue *cfqq = RQ_CFQQ(rq); struct cfq_data *cfqd = cfqq->cfqd; - const int sync = cfq_crq_is_sync(crq); + const int sync = rq_is_sync(rq); unsigned long now; now = jiffies; @@ -1849,15 +1675,11 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq) if (!cfq_class_idle(cfqq)) cfqd->last_end_request = now; - if (!cfq_cfqq_dispatched(cfqq)) { - if (cfq_cfqq_on_rr(cfqq)) { - cfqq->service_last = now; - cfq_resort_rr_list(cfqq, 0); - } - } + if (!cfq_cfqq_dispatched(cfqq) && cfq_cfqq_on_rr(cfqq)) + cfq_resort_rr_list(cfqq, 0); if (sync) - crq->io_context->last_end_request = now; + RQ_CIC(rq)->last_end_request = now; /* * If this is the active queue, check if it needs to be expired, @@ -1873,30 +1695,6 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq) } } -static struct request * -cfq_former_request(request_queue_t *q, struct request *rq) -{ - struct cfq_rq *crq = RQ_DATA(rq); - struct rb_node *rbprev = rb_prev(&crq->rb_node); - - if (rbprev) - return rb_entry_crq(rbprev)->request; - - return NULL; -} - -static struct request * -cfq_latter_request(request_queue_t *q, struct request *rq) -{ - struct cfq_rq *crq = RQ_DATA(rq); - struct rb_node *rbnext = rb_next(&crq->rb_node); - - if (rbnext) - return rb_entry_crq(rbnext)->request; - - return NULL; -} - /* * we temporarily boost lower priority queues if they are holding fs exclusive * resources. they are boosted to normal prio (CLASS_BE/4) @@ -1933,9 +1731,7 @@ static void cfq_prio_boost(struct cfq_queue *cfqq) cfq_resort_rr_list(cfqq, 0); } -static inline int -__cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, - struct task_struct *task, int rw) +static inline int __cfq_may_queue(struct cfq_queue *cfqq) { if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && !cfq_cfqq_must_alloc_slice(cfqq)) { @@ -1946,7 +1742,7 @@ __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq, return ELV_MQUEUE_MAY; } -static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio) +static int cfq_may_queue(request_queue_t *q, int rw) { struct cfq_data *cfqd = q->elevator->elevator_data; struct task_struct *tsk = current; @@ -1963,48 +1759,30 @@ static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio) cfq_init_prio_data(cfqq); cfq_prio_boost(cfqq); - return __cfq_may_queue(cfqd, cfqq, tsk, rw); + return __cfq_may_queue(cfqq); } return ELV_MQUEUE_MAY; } -static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq) -{ - struct cfq_data *cfqd = q->elevator->elevator_data; - - if (unlikely(cfqd->rq_starved)) { - struct request_list *rl = &q->rq; - - smp_mb(); - if (waitqueue_active(&rl->wait[READ])) - wake_up(&rl->wait[READ]); - if (waitqueue_active(&rl->wait[WRITE])) - wake_up(&rl->wait[WRITE]); - } -} - /* * queue lock held here */ static void cfq_put_request(request_queue_t *q, struct request *rq) { - struct cfq_data *cfqd = q->elevator->elevator_data; - struct cfq_rq *crq = RQ_DATA(rq); + struct cfq_queue *cfqq = RQ_CFQQ(rq); - if (crq) { - struct cfq_queue *cfqq = crq->cfq_queue; + if (cfqq) { const int rw = rq_data_dir(rq); BUG_ON(!cfqq->allocated[rw]); cfqq->allocated[rw]--; - put_io_context(crq->io_context->ioc); + put_io_context(RQ_CIC(rq)->ioc); - mempool_free(crq, cfqd->crq_pool); rq->elevator_private = NULL; + rq->elevator_private2 = NULL; - cfq_check_waiters(q, cfqq); cfq_put_queue(cfqq); } } @@ -2013,8 +1791,7 @@ static void cfq_put_request(request_queue_t *q, struct request *rq) * Allocate cfq data structures associated with this request. */ static int -cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, - gfp_t gfp_mask) +cfq_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask) { struct cfq_data *cfqd = q->elevator->elevator_data; struct task_struct *tsk = current; @@ -2022,7 +1799,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, const int rw = rq_data_dir(rq); pid_t key = cfq_queue_pid(tsk, rw); struct cfq_queue *cfqq; - struct cfq_rq *crq; unsigned long flags; int is_sync = key != CFQ_KEY_ASYNC; @@ -2046,42 +1822,18 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio, cfqq->allocated[rw]++; cfq_clear_cfqq_must_alloc(cfqq); - cfqd->rq_starved = 0; atomic_inc(&cfqq->ref); + spin_unlock_irqrestore(q->queue_lock, flags); - crq = mempool_alloc(cfqd->crq_pool, gfp_mask); - if (crq) { - RB_CLEAR_NODE(&crq->rb_node); - crq->rb_key = 0; - crq->request = rq; - INIT_HLIST_NODE(&crq->hash); - crq->cfq_queue = cfqq; - crq->io_context = cic; + rq->elevator_private = cic; + rq->elevator_private2 = cfqq; + return 0; - if (is_sync) - cfq_mark_crq_is_sync(crq); - else - cfq_clear_crq_is_sync(crq); - - rq->elevator_private = crq; - return 0; - } - - spin_lock_irqsave(q->queue_lock, flags); - cfqq->allocated[rw]--; - if (!(cfqq->allocated[0] + cfqq->allocated[1])) - cfq_mark_cfqq_must_alloc(cfqq); - cfq_put_queue(cfqq); queue_fail: if (cic) put_io_context(cic->ioc); - /* - * mark us rq allocation starved. we need to kickstart the process - * ourselves if there are no pending requests that can do it for us. - * that would be an extremely rare OOM situation - */ - cfqd->rq_starved = 1; + cfq_schedule_dispatch(cfqd); spin_unlock_irqrestore(q->queue_lock, flags); return 1; @@ -2090,27 +1842,10 @@ queue_fail: static void cfq_kick_queue(void *data) { request_queue_t *q = data; - struct cfq_data *cfqd = q->elevator->elevator_data; unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - - if (cfqd->rq_starved) { - struct request_list *rl = &q->rq; - - /* - * we aren't guaranteed to get a request after this, but we - * have to be opportunistic - */ - smp_mb(); - if (waitqueue_active(&rl->wait[READ])) - wake_up(&rl->wait[READ]); - if (waitqueue_active(&rl->wait[WRITE])) - wake_up(&rl->wait[WRITE]); - } - - blk_remove_plug(q); - q->request_fn(q); + blk_start_queueing(q); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -2193,7 +1928,6 @@ static void cfq_exit_queue(elevator_t *e) cfq_shutdown_timer_wq(cfqd); - spin_lock(&cfq_exit_lock); spin_lock_irq(q->queue_lock); if (cfqd->active_queue) @@ -2203,25 +1937,14 @@ static void cfq_exit_queue(elevator_t *e) struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, struct cfq_io_context, queue_list); - if (cic->cfqq[ASYNC]) { - cfq_put_queue(cic->cfqq[ASYNC]); - cic->cfqq[ASYNC] = NULL; - } - if (cic->cfqq[SYNC]) { - cfq_put_queue(cic->cfqq[SYNC]); - cic->cfqq[SYNC] = NULL; - } - cic->key = NULL; - list_del_init(&cic->queue_list); + + __cfq_exit_single_io_context(cfqd, cic); } spin_unlock_irq(q->queue_lock); - spin_unlock(&cfq_exit_lock); cfq_shutdown_timer_wq(cfqd); - mempool_destroy(cfqd->crq_pool); - kfree(cfqd->crq_hash); kfree(cfqd->cfq_hash); kfree(cfqd); } @@ -2231,7 +1954,7 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e) struct cfq_data *cfqd; int i; - cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); + cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node); if (!cfqd) return NULL; @@ -2243,23 +1966,12 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e) INIT_LIST_HEAD(&cfqd->busy_rr); INIT_LIST_HEAD(&cfqd->cur_rr); INIT_LIST_HEAD(&cfqd->idle_rr); - INIT_LIST_HEAD(&cfqd->empty_list); INIT_LIST_HEAD(&cfqd->cic_list); - cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); - if (!cfqd->crq_hash) - goto out_crqhash; - - cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL); + cfqd->cfq_hash = kmalloc_node(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL, q->node); if (!cfqd->cfq_hash) - goto out_cfqhash; + goto out_free; - cfqd->crq_pool = mempool_create_slab_pool(BLKDEV_MIN_RQ, crq_pool); - if (!cfqd->crq_pool) - goto out_crqpool; - - for (i = 0; i < CFQ_MHASH_ENTRIES; i++) - INIT_HLIST_HEAD(&cfqd->crq_hash[i]); for (i = 0; i < CFQ_QHASH_ENTRIES; i++) INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); @@ -2275,7 +1987,6 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e) INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q); - cfqd->cfq_queued = cfq_queued; cfqd->cfq_quantum = cfq_quantum; cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0]; cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1]; @@ -2287,19 +1998,13 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e) cfqd->cfq_slice_idle = cfq_slice_idle; return cfqd; -out_crqpool: - kfree(cfqd->cfq_hash); -out_cfqhash: - kfree(cfqd->crq_hash); -out_crqhash: +out_free: kfree(cfqd); return NULL; } static void cfq_slab_kill(void) { - if (crq_pool) - kmem_cache_destroy(crq_pool); if (cfq_pool) kmem_cache_destroy(cfq_pool); if (cfq_ioc_pool) @@ -2308,11 +2013,6 @@ static void cfq_slab_kill(void) static int __init cfq_slab_setup(void) { - crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0, - NULL, NULL); - if (!crq_pool) - goto fail; - cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0, NULL, NULL); if (!cfq_pool) @@ -2358,7 +2058,6 @@ static ssize_t __FUNC(elevator_t *e, char *page) \ return cfq_var_show(__data, (page)); \ } SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0); -SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0); SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1); SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0); @@ -2386,7 +2085,6 @@ static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ return ret; \ } STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0); -STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0); STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); @@ -2402,7 +2100,6 @@ STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, UINT_MAX, static struct elv_fs_entry cfq_attrs[] = { CFQ_ATTR(quantum), - CFQ_ATTR(queued), CFQ_ATTR(fifo_expire_sync), CFQ_ATTR(fifo_expire_async), CFQ_ATTR(back_seek_max), @@ -2425,14 +2122,14 @@ static struct elevator_type iosched_cfq = { .elevator_deactivate_req_fn = cfq_deactivate_request, .elevator_queue_empty_fn = cfq_queue_empty, .elevator_completed_req_fn = cfq_completed_request, - .elevator_former_req_fn = cfq_former_request, - .elevator_latter_req_fn = cfq_latter_request, + .elevator_former_req_fn = elv_rb_former_request, + .elevator_latter_req_fn = elv_rb_latter_request, .elevator_set_req_fn = cfq_set_request, .elevator_put_req_fn = cfq_put_request, .elevator_may_queue_fn = cfq_may_queue, .elevator_init_fn = cfq_init_queue, .elevator_exit_fn = cfq_exit_queue, - .trim = cfq_trim, + .trim = cfq_free_io_context, }, .elevator_attrs = cfq_attrs, .elevator_name = "cfq", @@ -2468,7 +2165,7 @@ static void __exit cfq_exit(void) ioc_gone = &all_gone; /* ioc_gone's update must be visible before reading ioc_count */ smp_wmb(); - if (atomic_read(&ioc_count)) + if (elv_ioc_count_read(ioc_count)) wait_for_completion(ioc_gone); synchronize_rcu(); cfq_slab_kill(); diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index c7ca9f0b6498..b7c5b34cb7b4 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -1,7 +1,7 @@ /* * Deadline i/o scheduler. * - * Copyright (C) 2002 Jens Axboe + * Copyright (C) 2002 Jens Axboe */ #include #include @@ -12,7 +12,6 @@ #include #include #include -#include #include /* @@ -24,13 +23,6 @@ static const int writes_starved = 2; /* max times reads can starve a write */ static const int fifo_batch = 16; /* # of sequential requests treated as one by the above parameters. For throughput. */ -static const int deadline_hash_shift = 5; -#define DL_HASH_BLOCK(sec) ((sec) >> 3) -#define DL_HASH_FN(sec) (hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift)) -#define DL_HASH_ENTRIES (1 << deadline_hash_shift) -#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) -#define ON_HASH(drq) (!hlist_unhashed(&(drq)->hash)) - struct deadline_data { /* * run time data @@ -45,8 +37,7 @@ struct deadline_data { /* * next in sort order. read, write or both are NULL */ - struct deadline_rq *next_drq[2]; - struct hlist_head *hash; /* request hash */ + struct request *next_rq[2]; unsigned int batching; /* number of sequential requests made */ sector_t last_sector; /* head position */ unsigned int starved; /* times reads have starved writes */ @@ -58,240 +49,69 @@ struct deadline_data { int fifo_batch; int writes_starved; int front_merges; - - mempool_t *drq_pool; }; -/* - * pre-request data. - */ -struct deadline_rq { - /* - * rbtree index, key is the starting offset - */ - struct rb_node rb_node; - sector_t rb_key; +static void deadline_move_request(struct deadline_data *, struct request *); - struct request *request; - - /* - * request hash, key is the ending offset (for back merge lookup) - */ - struct hlist_node hash; - - /* - * expire fifo - */ - struct list_head fifo; - unsigned long expires; -}; - -static void deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq); - -static kmem_cache_t *drq_pool; - -#define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private) - -/* - * the back merge hash support functions - */ -static inline void __deadline_del_drq_hash(struct deadline_rq *drq) -{ - hlist_del_init(&drq->hash); -} - -static inline void deadline_del_drq_hash(struct deadline_rq *drq) -{ - if (ON_HASH(drq)) - __deadline_del_drq_hash(drq); -} - -static inline void -deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq) -{ - struct request *rq = drq->request; - - BUG_ON(ON_HASH(drq)); - - hlist_add_head(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]); -} - -/* - * move hot entry to front of chain - */ -static inline void -deadline_hot_drq_hash(struct deadline_data *dd, struct deadline_rq *drq) -{ - struct request *rq = drq->request; - struct hlist_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))]; - - if (ON_HASH(drq) && &drq->hash != head->first) { - hlist_del(&drq->hash); - hlist_add_head(&drq->hash, head); - } -} - -static struct request * -deadline_find_drq_hash(struct deadline_data *dd, sector_t offset) -{ - struct hlist_head *hash_list = &dd->hash[DL_HASH_FN(offset)]; - struct hlist_node *entry, *next; - struct deadline_rq *drq; - - hlist_for_each_entry_safe(drq, entry, next, hash_list, hash) { - struct request *__rq = drq->request; - - BUG_ON(!ON_HASH(drq)); - - if (!rq_mergeable(__rq)) { - __deadline_del_drq_hash(drq); - continue; - } - - if (rq_hash_key(__rq) == offset) - return __rq; - } - - return NULL; -} - -/* - * rb tree support functions - */ -#define rb_entry_drq(node) rb_entry((node), struct deadline_rq, rb_node) -#define DRQ_RB_ROOT(dd, drq) (&(dd)->sort_list[rq_data_dir((drq)->request)]) -#define rq_rb_key(rq) (rq)->sector - -static struct deadline_rq * -__deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) -{ - struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node; - struct rb_node *parent = NULL; - struct deadline_rq *__drq; - - while (*p) { - parent = *p; - __drq = rb_entry_drq(parent); - - if (drq->rb_key < __drq->rb_key) - p = &(*p)->rb_left; - else if (drq->rb_key > __drq->rb_key) - p = &(*p)->rb_right; - else - return __drq; - } - - rb_link_node(&drq->rb_node, parent, p); - return NULL; -} +#define RQ_RB_ROOT(dd, rq) (&(dd)->sort_list[rq_data_dir((rq))]) static void -deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) +deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) { - struct deadline_rq *__alias; - - drq->rb_key = rq_rb_key(drq->request); + struct rb_root *root = RQ_RB_ROOT(dd, rq); + struct request *__alias; retry: - __alias = __deadline_add_drq_rb(dd, drq); - if (!__alias) { - rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq)); - return; + __alias = elv_rb_add(root, rq); + if (unlikely(__alias)) { + deadline_move_request(dd, __alias); + goto retry; } - - deadline_move_request(dd, __alias); - goto retry; } static inline void -deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq) +deadline_del_rq_rb(struct deadline_data *dd, struct request *rq) { - const int data_dir = rq_data_dir(drq->request); + const int data_dir = rq_data_dir(rq); - if (dd->next_drq[data_dir] == drq) { - struct rb_node *rbnext = rb_next(&drq->rb_node); + if (dd->next_rq[data_dir] == rq) { + struct rb_node *rbnext = rb_next(&rq->rb_node); - dd->next_drq[data_dir] = NULL; + dd->next_rq[data_dir] = NULL; if (rbnext) - dd->next_drq[data_dir] = rb_entry_drq(rbnext); + dd->next_rq[data_dir] = rb_entry_rq(rbnext); } - BUG_ON(!RB_EMPTY_NODE(&drq->rb_node)); - rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq)); - RB_CLEAR_NODE(&drq->rb_node); -} - -static struct request * -deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir) -{ - struct rb_node *n = dd->sort_list[data_dir].rb_node; - struct deadline_rq *drq; - - while (n) { - drq = rb_entry_drq(n); - - if (sector < drq->rb_key) - n = n->rb_left; - else if (sector > drq->rb_key) - n = n->rb_right; - else - return drq->request; - } - - return NULL; + elv_rb_del(RQ_RB_ROOT(dd, rq), rq); } /* - * deadline_find_first_drq finds the first (lowest sector numbered) request - * for the specified data_dir. Used to sweep back to the start of the disk - * (1-way elevator) after we process the last (highest sector) request. - */ -static struct deadline_rq * -deadline_find_first_drq(struct deadline_data *dd, int data_dir) -{ - struct rb_node *n = dd->sort_list[data_dir].rb_node; - - for (;;) { - if (n->rb_left == NULL) - return rb_entry_drq(n); - - n = n->rb_left; - } -} - -/* - * add drq to rbtree and fifo + * add rq to rbtree and fifo */ static void deadline_add_request(struct request_queue *q, struct request *rq) { struct deadline_data *dd = q->elevator->elevator_data; - struct deadline_rq *drq = RQ_DATA(rq); + const int data_dir = rq_data_dir(rq); - const int data_dir = rq_data_dir(drq->request); + deadline_add_rq_rb(dd, rq); - deadline_add_drq_rb(dd, drq); /* * set expire time (only used for reads) and add to fifo list */ - drq->expires = jiffies + dd->fifo_expire[data_dir]; - list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]); - - if (rq_mergeable(rq)) - deadline_add_drq_hash(dd, drq); + rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]); + list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]); } /* - * remove rq from rbtree, fifo, and hash + * remove rq from rbtree and fifo. */ static void deadline_remove_request(request_queue_t *q, struct request *rq) { - struct deadline_rq *drq = RQ_DATA(rq); struct deadline_data *dd = q->elevator->elevator_data; - list_del_init(&drq->fifo); - deadline_del_drq_rb(dd, drq); - deadline_del_drq_hash(drq); + rq_fifo_clear(rq); + deadline_del_rq_rb(dd, rq); } static int @@ -301,28 +121,15 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio) struct request *__rq; int ret; - /* - * see if the merge hash can satisfy a back merge - */ - __rq = deadline_find_drq_hash(dd, bio->bi_sector); - if (__rq) { - BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); - - if (elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_BACK_MERGE; - goto out; - } - } - /* * check for front merge */ if (dd->front_merges) { - sector_t rb_key = bio->bi_sector + bio_sectors(bio); + sector_t sector = bio->bi_sector + bio_sectors(bio); - __rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio)); + __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector); if (__rq) { - BUG_ON(rb_key != rq_rb_key(__rq)); + BUG_ON(sector != __rq->sector); if (elv_rq_merge_ok(__rq, bio)) { ret = ELEVATOR_FRONT_MERGE; @@ -333,29 +140,21 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio) return ELEVATOR_NO_MERGE; out: - if (ret) - deadline_hot_drq_hash(dd, RQ_DATA(__rq)); *req = __rq; return ret; } -static void deadline_merged_request(request_queue_t *q, struct request *req) +static void deadline_merged_request(request_queue_t *q, struct request *req, + int type) { struct deadline_data *dd = q->elevator->elevator_data; - struct deadline_rq *drq = RQ_DATA(req); - - /* - * hash always needs to be repositioned, key is end sector - */ - deadline_del_drq_hash(drq); - deadline_add_drq_hash(dd, drq); /* * if the merge was a front merge, we need to reposition request */ - if (rq_rb_key(req) != drq->rb_key) { - deadline_del_drq_rb(dd, drq); - deadline_add_drq_rb(dd, drq); + if (type == ELEVATOR_FRONT_MERGE) { + elv_rb_del(RQ_RB_ROOT(dd, req), req); + deadline_add_rq_rb(dd, req); } } @@ -363,33 +162,14 @@ static void deadline_merged_requests(request_queue_t *q, struct request *req, struct request *next) { - struct deadline_data *dd = q->elevator->elevator_data; - struct deadline_rq *drq = RQ_DATA(req); - struct deadline_rq *dnext = RQ_DATA(next); - - BUG_ON(!drq); - BUG_ON(!dnext); - /* - * reposition drq (this is the merged request) in hash, and in rbtree - * in case of a front merge + * if next expires before rq, assign its expire time to rq + * and move into next position (next will be deleted) in fifo */ - deadline_del_drq_hash(drq); - deadline_add_drq_hash(dd, drq); - - if (rq_rb_key(req) != drq->rb_key) { - deadline_del_drq_rb(dd, drq); - deadline_add_drq_rb(dd, drq); - } - - /* - * if dnext expires before drq, assign its expire time to drq - * and move into dnext position (dnext will be deleted) in fifo - */ - if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) { - if (time_before(dnext->expires, drq->expires)) { - list_move(&drq->fifo, &dnext->fifo); - drq->expires = dnext->expires; + if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { + if (time_before(rq_fifo_time(next), rq_fifo_time(req))) { + list_move(&req->queuelist, &next->queuelist); + rq_set_fifo_time(req, rq_fifo_time(next)); } } @@ -403,52 +183,50 @@ deadline_merged_requests(request_queue_t *q, struct request *req, * move request from sort list to dispatch queue. */ static inline void -deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq) +deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq) { - request_queue_t *q = drq->request->q; + request_queue_t *q = rq->q; - deadline_remove_request(q, drq->request); - elv_dispatch_add_tail(q, drq->request); + deadline_remove_request(q, rq); + elv_dispatch_add_tail(q, rq); } /* * move an entry to dispatch queue */ static void -deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq) +deadline_move_request(struct deadline_data *dd, struct request *rq) { - const int data_dir = rq_data_dir(drq->request); - struct rb_node *rbnext = rb_next(&drq->rb_node); + const int data_dir = rq_data_dir(rq); + struct rb_node *rbnext = rb_next(&rq->rb_node); - dd->next_drq[READ] = NULL; - dd->next_drq[WRITE] = NULL; + dd->next_rq[READ] = NULL; + dd->next_rq[WRITE] = NULL; if (rbnext) - dd->next_drq[data_dir] = rb_entry_drq(rbnext); + dd->next_rq[data_dir] = rb_entry_rq(rbnext); - dd->last_sector = drq->request->sector + drq->request->nr_sectors; + dd->last_sector = rq->sector + rq->nr_sectors; /* * take it off the sort and fifo list, move * to dispatch queue */ - deadline_move_to_dispatch(dd, drq); + deadline_move_to_dispatch(dd, rq); } -#define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo) - /* * deadline_check_fifo returns 0 if there are no expired reads on the fifo, * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) */ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) { - struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next); + struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next); /* - * drq is expired! + * rq is expired! */ - if (time_after(jiffies, drq->expires)) + if (time_after(jiffies, rq_fifo_time(rq))) return 1; return 0; @@ -463,21 +241,21 @@ static int deadline_dispatch_requests(request_queue_t *q, int force) struct deadline_data *dd = q->elevator->elevator_data; const int reads = !list_empty(&dd->fifo_list[READ]); const int writes = !list_empty(&dd->fifo_list[WRITE]); - struct deadline_rq *drq; + struct request *rq; int data_dir; /* * batches are currently reads XOR writes */ - if (dd->next_drq[WRITE]) - drq = dd->next_drq[WRITE]; + if (dd->next_rq[WRITE]) + rq = dd->next_rq[WRITE]; else - drq = dd->next_drq[READ]; + rq = dd->next_rq[READ]; - if (drq) { + if (rq) { /* we have a "next request" */ - if (dd->last_sector != drq->request->sector) + if (dd->last_sector != rq->sector) /* end the batch on a non sequential request */ dd->batching += dd->fifo_batch; @@ -526,30 +304,33 @@ dispatch_find_request: if (deadline_check_fifo(dd, data_dir)) { /* An expired request exists - satisfy it */ dd->batching = 0; - drq = list_entry_fifo(dd->fifo_list[data_dir].next); + rq = rq_entry_fifo(dd->fifo_list[data_dir].next); - } else if (dd->next_drq[data_dir]) { + } else if (dd->next_rq[data_dir]) { /* * The last req was the same dir and we have a next request in * sort order. No expired requests so continue on from here. */ - drq = dd->next_drq[data_dir]; + rq = dd->next_rq[data_dir]; } else { + struct rb_node *node; /* * The last req was the other direction or we have run out of * higher-sectored requests. Go back to the lowest sectored * request (1 way elevator) and start a new batch. */ dd->batching = 0; - drq = deadline_find_first_drq(dd, data_dir); + node = rb_first(&dd->sort_list[data_dir]); + if (node) + rq = rb_entry_rq(node); } dispatch_request: /* - * drq is the selected appropriate request. + * rq is the selected appropriate request. */ dd->batching++; - deadline_move_request(dd, drq); + deadline_move_request(dd, rq); return 1; } @@ -562,30 +343,6 @@ static int deadline_queue_empty(request_queue_t *q) && list_empty(&dd->fifo_list[READ]); } -static struct request * -deadline_former_request(request_queue_t *q, struct request *rq) -{ - struct deadline_rq *drq = RQ_DATA(rq); - struct rb_node *rbprev = rb_prev(&drq->rb_node); - - if (rbprev) - return rb_entry_drq(rbprev)->request; - - return NULL; -} - -static struct request * -deadline_latter_request(request_queue_t *q, struct request *rq) -{ - struct deadline_rq *drq = RQ_DATA(rq); - struct rb_node *rbnext = rb_next(&drq->rb_node); - - if (rbnext) - return rb_entry_drq(rbnext)->request; - - return NULL; -} - static void deadline_exit_queue(elevator_t *e) { struct deadline_data *dd = e->elevator_data; @@ -593,46 +350,21 @@ static void deadline_exit_queue(elevator_t *e) BUG_ON(!list_empty(&dd->fifo_list[READ])); BUG_ON(!list_empty(&dd->fifo_list[WRITE])); - mempool_destroy(dd->drq_pool); - kfree(dd->hash); kfree(dd); } /* - * initialize elevator private data (deadline_data), and alloc a drq for - * each request on the free lists + * initialize elevator private data (deadline_data). */ static void *deadline_init_queue(request_queue_t *q, elevator_t *e) { struct deadline_data *dd; - int i; - - if (!drq_pool) - return NULL; dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node); if (!dd) return NULL; memset(dd, 0, sizeof(*dd)); - dd->hash = kmalloc_node(sizeof(struct hlist_head)*DL_HASH_ENTRIES, - GFP_KERNEL, q->node); - if (!dd->hash) { - kfree(dd); - return NULL; - } - - dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, - mempool_free_slab, drq_pool, q->node); - if (!dd->drq_pool) { - kfree(dd->hash); - kfree(dd); - return NULL; - } - - for (i = 0; i < DL_HASH_ENTRIES; i++) - INIT_HLIST_HEAD(&dd->hash[i]); - INIT_LIST_HEAD(&dd->fifo_list[READ]); INIT_LIST_HEAD(&dd->fifo_list[WRITE]); dd->sort_list[READ] = RB_ROOT; @@ -645,39 +377,6 @@ static void *deadline_init_queue(request_queue_t *q, elevator_t *e) return dd; } -static void deadline_put_request(request_queue_t *q, struct request *rq) -{ - struct deadline_data *dd = q->elevator->elevator_data; - struct deadline_rq *drq = RQ_DATA(rq); - - mempool_free(drq, dd->drq_pool); - rq->elevator_private = NULL; -} - -static int -deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio, - gfp_t gfp_mask) -{ - struct deadline_data *dd = q->elevator->elevator_data; - struct deadline_rq *drq; - - drq = mempool_alloc(dd->drq_pool, gfp_mask); - if (drq) { - memset(drq, 0, sizeof(*drq)); - RB_CLEAR_NODE(&drq->rb_node); - drq->request = rq; - - INIT_HLIST_NODE(&drq->hash); - - INIT_LIST_HEAD(&drq->fifo); - - rq->elevator_private = drq; - return 0; - } - - return 1; -} - /* * sysfs parts below */ @@ -757,10 +456,8 @@ static struct elevator_type iosched_deadline = { .elevator_dispatch_fn = deadline_dispatch_requests, .elevator_add_req_fn = deadline_add_request, .elevator_queue_empty_fn = deadline_queue_empty, - .elevator_former_req_fn = deadline_former_request, - .elevator_latter_req_fn = deadline_latter_request, - .elevator_set_req_fn = deadline_set_request, - .elevator_put_req_fn = deadline_put_request, + .elevator_former_req_fn = elv_rb_former_request, + .elevator_latter_req_fn = elv_rb_latter_request, .elevator_init_fn = deadline_init_queue, .elevator_exit_fn = deadline_exit_queue, }, @@ -772,24 +469,11 @@ static struct elevator_type iosched_deadline = { static int __init deadline_init(void) { - int ret; - - drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq), - 0, 0, NULL, NULL); - - if (!drq_pool) - return -ENOMEM; - - ret = elv_register(&iosched_deadline); - if (ret) - kmem_cache_destroy(drq_pool); - - return ret; + return elv_register(&iosched_deadline); } static void __exit deadline_exit(void) { - kmem_cache_destroy(drq_pool); elv_unregister(&iosched_deadline); } diff --git a/block/elevator.c b/block/elevator.c index 9b72dc7c8a5c..487dd3da8853 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -3,7 +3,7 @@ * * Copyright (C) 2000 Andrea Arcangeli SuSE * - * 30042000 Jens Axboe : + * 30042000 Jens Axboe : * * Split the elevator a bit so that it is possible to choose a different * one or even write a new "plug in". There are three pieces: @@ -33,12 +33,23 @@ #include #include #include +#include #include static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); +/* + * Merge hash stuff. + */ +static const int elv_hash_shift = 6; +#define ELV_HASH_BLOCK(sec) ((sec) >> 3) +#define ELV_HASH_FN(sec) (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) +#define ELV_HASH_ENTRIES (1 << elv_hash_shift) +#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) +#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) + /* * can we safely merge with this request? */ @@ -56,8 +67,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) /* * same device and no special stuff set, merge is ok */ - if (rq->rq_disk == bio->bi_bdev->bd_disk && - !rq->waiting && !rq->special) + if (rq->rq_disk == bio->bi_bdev->bd_disk && !rq->special) return 1; return 0; @@ -151,27 +161,44 @@ __setup("elevator=", elevator_setup); static struct kobj_type elv_ktype; -static elevator_t *elevator_alloc(struct elevator_type *e) +static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e) { - elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL); - if (eq) { - memset(eq, 0, sizeof(*eq)); - eq->ops = &e->ops; - eq->elevator_type = e; - kobject_init(&eq->kobj); - snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); - eq->kobj.ktype = &elv_ktype; - mutex_init(&eq->sysfs_lock); - } else { - elevator_put(e); - } + elevator_t *eq; + int i; + + eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node); + if (unlikely(!eq)) + goto err; + + memset(eq, 0, sizeof(*eq)); + eq->ops = &e->ops; + eq->elevator_type = e; + kobject_init(&eq->kobj); + snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched"); + eq->kobj.ktype = &elv_ktype; + mutex_init(&eq->sysfs_lock); + + eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, + GFP_KERNEL, q->node); + if (!eq->hash) + goto err; + + for (i = 0; i < ELV_HASH_ENTRIES; i++) + INIT_HLIST_HEAD(&eq->hash[i]); + return eq; +err: + kfree(eq); + elevator_put(e); + return NULL; } static void elevator_release(struct kobject *kobj) { elevator_t *e = container_of(kobj, elevator_t, kobj); + elevator_put(e->elevator_type); + kfree(e->hash); kfree(e); } @@ -198,7 +225,7 @@ int elevator_init(request_queue_t *q, char *name) e = elevator_get("noop"); } - eq = elevator_alloc(e); + eq = elevator_alloc(q, e); if (!eq) return -ENOMEM; @@ -212,6 +239,8 @@ int elevator_init(request_queue_t *q, char *name) return ret; } +EXPORT_SYMBOL(elevator_init); + void elevator_exit(elevator_t *e) { mutex_lock(&e->sysfs_lock); @@ -223,10 +252,118 @@ void elevator_exit(elevator_t *e) kobject_put(&e->kobj); } +EXPORT_SYMBOL(elevator_exit); + +static inline void __elv_rqhash_del(struct request *rq) +{ + hlist_del_init(&rq->hash); +} + +static void elv_rqhash_del(request_queue_t *q, struct request *rq) +{ + if (ELV_ON_HASH(rq)) + __elv_rqhash_del(rq); +} + +static void elv_rqhash_add(request_queue_t *q, struct request *rq) +{ + elevator_t *e = q->elevator; + + BUG_ON(ELV_ON_HASH(rq)); + hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); +} + +static void elv_rqhash_reposition(request_queue_t *q, struct request *rq) +{ + __elv_rqhash_del(rq); + elv_rqhash_add(q, rq); +} + +static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset) +{ + elevator_t *e = q->elevator; + struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; + struct hlist_node *entry, *next; + struct request *rq; + + hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) { + BUG_ON(!ELV_ON_HASH(rq)); + + if (unlikely(!rq_mergeable(rq))) { + __elv_rqhash_del(rq); + continue; + } + + if (rq_hash_key(rq) == offset) + return rq; + } + + return NULL; +} + +/* + * RB-tree support functions for inserting/lookup/removal of requests + * in a sorted RB tree. + */ +struct request *elv_rb_add(struct rb_root *root, struct request *rq) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct request *__rq; + + while (*p) { + parent = *p; + __rq = rb_entry(parent, struct request, rb_node); + + if (rq->sector < __rq->sector) + p = &(*p)->rb_left; + else if (rq->sector > __rq->sector) + p = &(*p)->rb_right; + else + return __rq; + } + + rb_link_node(&rq->rb_node, parent, p); + rb_insert_color(&rq->rb_node, root); + return NULL; +} + +EXPORT_SYMBOL(elv_rb_add); + +void elv_rb_del(struct rb_root *root, struct request *rq) +{ + BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); + rb_erase(&rq->rb_node, root); + RB_CLEAR_NODE(&rq->rb_node); +} + +EXPORT_SYMBOL(elv_rb_del); + +struct request *elv_rb_find(struct rb_root *root, sector_t sector) +{ + struct rb_node *n = root->rb_node; + struct request *rq; + + while (n) { + rq = rb_entry(n, struct request, rb_node); + + if (sector < rq->sector) + n = n->rb_left; + else if (sector > rq->sector) + n = n->rb_right; + else + return rq; + } + + return NULL; +} + +EXPORT_SYMBOL(elv_rb_find); + /* * Insert rq into dispatch queue of q. Queue lock must be held on - * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be - * appended to the dispatch queue. To be used by specific elevators. + * entry. rq is sort insted into the dispatch queue. To be used by + * specific elevators. */ void elv_dispatch_sort(request_queue_t *q, struct request *rq) { @@ -235,6 +372,9 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq) if (q->last_merge == rq) q->last_merge = NULL; + + elv_rqhash_del(q, rq); + q->nr_sorted--; boundary = q->end_sector; @@ -242,7 +382,7 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq) list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); - if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) + if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED)) break; if (rq->sector >= boundary) { if (pos->sector < boundary) @@ -258,11 +398,38 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq) list_add(&rq->queuelist, entry); } +EXPORT_SYMBOL(elv_dispatch_sort); + +/* + * Insert rq into dispatch queue of q. Queue lock must be held on + * entry. rq is added to the back of the dispatch queue. To be used by + * specific elevators. + */ +void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) +{ + if (q->last_merge == rq) + q->last_merge = NULL; + + elv_rqhash_del(q, rq); + + q->nr_sorted--; + + q->end_sector = rq_end_sector(rq); + q->boundary_rq = rq; + list_add_tail(&rq->queuelist, &q->queue_head); +} + +EXPORT_SYMBOL(elv_dispatch_add_tail); + int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) { elevator_t *e = q->elevator; + struct request *__rq; int ret; + /* + * First try one-hit cache. + */ if (q->last_merge) { ret = elv_try_merge(q->last_merge, bio); if (ret != ELEVATOR_NO_MERGE) { @@ -271,18 +438,30 @@ int elv_merge(request_queue_t *q, struct request **req, struct bio *bio) } } + /* + * See if our hash lookup can find a potential backmerge. + */ + __rq = elv_rqhash_find(q, bio->bi_sector); + if (__rq && elv_rq_merge_ok(__rq, bio)) { + *req = __rq; + return ELEVATOR_BACK_MERGE; + } + if (e->ops->elevator_merge_fn) return e->ops->elevator_merge_fn(q, req, bio); return ELEVATOR_NO_MERGE; } -void elv_merged_request(request_queue_t *q, struct request *rq) +void elv_merged_request(request_queue_t *q, struct request *rq, int type) { elevator_t *e = q->elevator; if (e->ops->elevator_merged_fn) - e->ops->elevator_merged_fn(q, rq); + e->ops->elevator_merged_fn(q, rq, type); + + if (type == ELEVATOR_BACK_MERGE) + elv_rqhash_reposition(q, rq); q->last_merge = rq; } @@ -294,8 +473,11 @@ void elv_merge_requests(request_queue_t *q, struct request *rq, if (e->ops->elevator_merge_req_fn) e->ops->elevator_merge_req_fn(q, rq, next); - q->nr_sorted--; + elv_rqhash_reposition(q, rq); + elv_rqhash_del(q, next); + + q->nr_sorted--; q->last_merge = rq; } @@ -313,7 +495,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq) e->ops->elevator_deactivate_req_fn(q, rq); } - rq->flags &= ~REQ_STARTED; + rq->cmd_flags &= ~REQ_STARTED; elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); } @@ -344,13 +526,13 @@ void elv_insert(request_queue_t *q, struct request *rq, int where) switch (where) { case ELEVATOR_INSERT_FRONT: - rq->flags |= REQ_SOFTBARRIER; + rq->cmd_flags |= REQ_SOFTBARRIER; list_add(&rq->queuelist, &q->queue_head); break; case ELEVATOR_INSERT_BACK: - rq->flags |= REQ_SOFTBARRIER; + rq->cmd_flags |= REQ_SOFTBARRIER; elv_drain_elevator(q); list_add_tail(&rq->queuelist, &q->queue_head); /* @@ -369,10 +551,14 @@ void elv_insert(request_queue_t *q, struct request *rq, int where) case ELEVATOR_INSERT_SORT: BUG_ON(!blk_fs_request(rq)); - rq->flags |= REQ_SORTED; + rq->cmd_flags |= REQ_SORTED; q->nr_sorted++; - if (q->last_merge == NULL && rq_mergeable(rq)) - q->last_merge = rq; + if (rq_mergeable(rq)) { + elv_rqhash_add(q, rq); + if (!q->last_merge) + q->last_merge = rq; + } + /* * Some ioscheds (cfq) run q->request_fn directly, so * rq cannot be accessed after calling @@ -387,7 +573,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where) * insertion; otherwise, requests should be requeued * in ordseq order. */ - rq->flags |= REQ_SOFTBARRIER; + rq->cmd_flags |= REQ_SOFTBARRIER; if (q->ordseq == 0) { list_add(&rq->queuelist, &q->queue_head); @@ -429,9 +615,9 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, int plug) { if (q->ordcolor) - rq->flags |= REQ_ORDERED_COLOR; + rq->cmd_flags |= REQ_ORDERED_COLOR; - if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { + if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { /* * toggle ordered color */ @@ -452,7 +638,7 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } - } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) + } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT) where = ELEVATOR_INSERT_BACK; if (plug) @@ -461,6 +647,8 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, elv_insert(q, rq, where); } +EXPORT_SYMBOL(__elv_add_request); + void elv_add_request(request_queue_t *q, struct request *rq, int where, int plug) { @@ -471,6 +659,8 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where, spin_unlock_irqrestore(q->queue_lock, flags); } +EXPORT_SYMBOL(elv_add_request); + static inline struct request *__elv_next_request(request_queue_t *q) { struct request *rq; @@ -493,7 +683,7 @@ struct request *elv_next_request(request_queue_t *q) int ret; while ((rq = __elv_next_request(q)) != NULL) { - if (!(rq->flags & REQ_STARTED)) { + if (!(rq->cmd_flags & REQ_STARTED)) { elevator_t *e = q->elevator; /* @@ -510,7 +700,7 @@ struct request *elv_next_request(request_queue_t *q) * it, a request that has been delayed should * not be passed by new incoming requests */ - rq->flags |= REQ_STARTED; + rq->cmd_flags |= REQ_STARTED; blk_add_trace_rq(q, rq, BLK_TA_ISSUE); } @@ -519,7 +709,7 @@ struct request *elv_next_request(request_queue_t *q) q->boundary_rq = NULL; } - if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn) + if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn) break; ret = q->prep_rq_fn(q, rq); @@ -541,7 +731,7 @@ struct request *elv_next_request(request_queue_t *q) nr_bytes = rq->data_len; blkdev_dequeue_request(rq); - rq->flags |= REQ_QUIET; + rq->cmd_flags |= REQ_QUIET; end_that_request_chunk(rq, 0, nr_bytes); end_that_request_last(rq, 0); } else { @@ -554,9 +744,12 @@ struct request *elv_next_request(request_queue_t *q) return rq; } +EXPORT_SYMBOL(elv_next_request); + void elv_dequeue_request(request_queue_t *q, struct request *rq) { BUG_ON(list_empty(&rq->queuelist)); + BUG_ON(ELV_ON_HASH(rq)); list_del_init(&rq->queuelist); @@ -569,6 +762,8 @@ void elv_dequeue_request(request_queue_t *q, struct request *rq) q->in_flight++; } +EXPORT_SYMBOL(elv_dequeue_request); + int elv_queue_empty(request_queue_t *q) { elevator_t *e = q->elevator; @@ -582,6 +777,8 @@ int elv_queue_empty(request_queue_t *q) return 1; } +EXPORT_SYMBOL(elv_queue_empty); + struct request *elv_latter_request(request_queue_t *q, struct request *rq) { elevator_t *e = q->elevator; @@ -600,13 +797,12 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq) return NULL; } -int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio, - gfp_t gfp_mask) +int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask) { elevator_t *e = q->elevator; if (e->ops->elevator_set_req_fn) - return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask); + return e->ops->elevator_set_req_fn(q, rq, gfp_mask); rq->elevator_private = NULL; return 0; @@ -620,12 +816,12 @@ void elv_put_request(request_queue_t *q, struct request *rq) e->ops->elevator_put_req_fn(q, rq); } -int elv_may_queue(request_queue_t *q, int rw, struct bio *bio) +int elv_may_queue(request_queue_t *q, int rw) { elevator_t *e = q->elevator; if (e->ops->elevator_may_queue_fn) - return e->ops->elevator_may_queue_fn(q, rw, bio); + return e->ops->elevator_may_queue_fn(q, rw); return ELV_MQUEUE_MAY; } @@ -792,7 +988,7 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) /* * Allocate new elevator */ - e = elevator_alloc(new_e); + e = elevator_alloc(q, new_e); if (!e) return 0; @@ -908,11 +1104,26 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) return len; } -EXPORT_SYMBOL(elv_dispatch_sort); -EXPORT_SYMBOL(elv_add_request); -EXPORT_SYMBOL(__elv_add_request); -EXPORT_SYMBOL(elv_next_request); -EXPORT_SYMBOL(elv_dequeue_request); -EXPORT_SYMBOL(elv_queue_empty); -EXPORT_SYMBOL(elevator_exit); -EXPORT_SYMBOL(elevator_init); +struct request *elv_rb_former_request(request_queue_t *q, struct request *rq) +{ + struct rb_node *rbprev = rb_prev(&rq->rb_node); + + if (rbprev) + return rb_entry_rq(rbprev); + + return NULL; +} + +EXPORT_SYMBOL(elv_rb_former_request); + +struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq) +{ + struct rb_node *rbnext = rb_next(&rq->rb_node); + + if (rbnext) + return rb_entry_rq(rbnext); + + return NULL; +} + +EXPORT_SYMBOL(elv_rb_latter_request); diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 51dc0edf76e0..83425fb3c8db 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -39,6 +39,7 @@ static void blk_unplug_timeout(unsigned long data); static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); static void init_request_from_bio(struct request *req, struct bio *bio); static int __make_request(request_queue_t *q, struct bio *bio); +static struct io_context *current_io_context(gfp_t gfp_flags, int node); /* * For the allocated request tables @@ -277,19 +278,19 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) EXPORT_SYMBOL(blk_queue_make_request); -static inline void rq_init(request_queue_t *q, struct request *rq) +static void rq_init(request_queue_t *q, struct request *rq) { INIT_LIST_HEAD(&rq->queuelist); INIT_LIST_HEAD(&rq->donelist); rq->errors = 0; - rq->rq_status = RQ_ACTIVE; rq->bio = rq->biotail = NULL; + INIT_HLIST_NODE(&rq->hash); + RB_CLEAR_NODE(&rq->rb_node); rq->ioprio = 0; rq->buffer = NULL; rq->ref_count = 1; rq->q = q; - rq->waiting = NULL; rq->special = NULL; rq->data_len = 0; rq->data = NULL; @@ -382,8 +383,8 @@ unsigned blk_ordered_req_seq(struct request *rq) if (rq == &q->post_flush_rq) return QUEUE_ORDSEQ_POSTFLUSH; - if ((rq->flags & REQ_ORDERED_COLOR) == - (q->orig_bar_rq->flags & REQ_ORDERED_COLOR)) + if ((rq->cmd_flags & REQ_ORDERED_COLOR) == + (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) return QUEUE_ORDSEQ_DRAIN; else return QUEUE_ORDSEQ_DONE; @@ -446,11 +447,11 @@ static void queue_flush(request_queue_t *q, unsigned which) end_io = post_flush_end_io; } + rq->cmd_flags = REQ_HARDBARRIER; rq_init(q, rq); - rq->flags = REQ_HARDBARRIER; rq->elevator_private = NULL; + rq->elevator_private2 = NULL; rq->rq_disk = q->bar_rq.rq_disk; - rq->rl = NULL; rq->end_io = end_io; q->prepare_flush_fn(q, rq); @@ -471,11 +472,13 @@ static inline struct request *start_ordered(request_queue_t *q, blkdev_dequeue_request(rq); q->orig_bar_rq = rq; rq = &q->bar_rq; + rq->cmd_flags = 0; rq_init(q, rq); - rq->flags = bio_data_dir(q->orig_bar_rq->bio); - rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; + if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) + rq->cmd_flags |= REQ_RW; + rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; rq->elevator_private = NULL; - rq->rl = NULL; + rq->elevator_private2 = NULL; init_request_from_bio(rq, q->orig_bar_rq->bio); rq->end_io = bar_end_io; @@ -587,8 +590,8 @@ static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error) return 0; } -static inline int ordered_bio_endio(struct request *rq, struct bio *bio, - unsigned int nbytes, int error) +static int ordered_bio_endio(struct request *rq, struct bio *bio, + unsigned int nbytes, int error) { request_queue_t *q = rq->q; bio_end_io_t *endio; @@ -1124,7 +1127,7 @@ void blk_queue_end_tag(request_queue_t *q, struct request *rq) } list_del_init(&rq->queuelist); - rq->flags &= ~REQ_QUEUED; + rq->cmd_flags &= ~REQ_QUEUED; rq->tag = -1; if (unlikely(bqt->tag_index[tag] == NULL)) @@ -1160,7 +1163,7 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq) struct blk_queue_tag *bqt = q->queue_tags; int tag; - if (unlikely((rq->flags & REQ_QUEUED))) { + if (unlikely((rq->cmd_flags & REQ_QUEUED))) { printk(KERN_ERR "%s: request %p for device [%s] already tagged %d", __FUNCTION__, rq, @@ -1168,13 +1171,18 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq) BUG(); } - tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); - if (tag >= bqt->max_depth) - return 1; + /* + * Protect against shared tag maps, as we may not have exclusive + * access to the tag map. + */ + do { + tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); + if (tag >= bqt->max_depth) + return 1; - __set_bit(tag, bqt->tag_map); + } while (test_and_set_bit(tag, bqt->tag_map)); - rq->flags |= REQ_QUEUED; + rq->cmd_flags |= REQ_QUEUED; rq->tag = tag; bqt->tag_index[tag] = rq; blkdev_dequeue_request(rq); @@ -1210,65 +1218,31 @@ void blk_queue_invalidate_tags(request_queue_t *q) printk(KERN_ERR "%s: bad tag found on list\n", __FUNCTION__); list_del_init(&rq->queuelist); - rq->flags &= ~REQ_QUEUED; + rq->cmd_flags &= ~REQ_QUEUED; } else blk_queue_end_tag(q, rq); - rq->flags &= ~REQ_STARTED; + rq->cmd_flags &= ~REQ_STARTED; __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); } } EXPORT_SYMBOL(blk_queue_invalidate_tags); -static const char * const rq_flags[] = { - "REQ_RW", - "REQ_FAILFAST", - "REQ_SORTED", - "REQ_SOFTBARRIER", - "REQ_HARDBARRIER", - "REQ_FUA", - "REQ_CMD", - "REQ_NOMERGE", - "REQ_STARTED", - "REQ_DONTPREP", - "REQ_QUEUED", - "REQ_ELVPRIV", - "REQ_PC", - "REQ_BLOCK_PC", - "REQ_SENSE", - "REQ_FAILED", - "REQ_QUIET", - "REQ_SPECIAL", - "REQ_DRIVE_CMD", - "REQ_DRIVE_TASK", - "REQ_DRIVE_TASKFILE", - "REQ_PREEMPT", - "REQ_PM_SUSPEND", - "REQ_PM_RESUME", - "REQ_PM_SHUTDOWN", - "REQ_ORDERED_COLOR", -}; - void blk_dump_rq_flags(struct request *rq, char *msg) { int bit; - printk("%s: dev %s: flags = ", msg, - rq->rq_disk ? rq->rq_disk->disk_name : "?"); - bit = 0; - do { - if (rq->flags & (1 << bit)) - printk("%s ", rq_flags[bit]); - bit++; - } while (bit < __REQ_NR_BITS); + printk("%s: dev %s: type=%x, flags=%x\n", msg, + rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, + rq->cmd_flags); printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, rq->nr_sectors, rq->current_nr_sectors); printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); - if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) { + if (blk_pc_request(rq)) { printk("cdb: "); for (bit = 0; bit < sizeof(rq->cmd); bit++) printk("%02x ", rq->cmd[bit]); @@ -1441,7 +1415,7 @@ static inline int ll_new_mergeable(request_queue_t *q, int nr_phys_segs = bio_phys_segments(q, bio); if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { - req->flags |= REQ_NOMERGE; + req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; @@ -1464,7 +1438,7 @@ static inline int ll_new_hw_segment(request_queue_t *q, if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { - req->flags |= REQ_NOMERGE; + req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; @@ -1491,7 +1465,7 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req, max_sectors = q->max_sectors; if (req->nr_sectors + bio_sectors(bio) > max_sectors) { - req->flags |= REQ_NOMERGE; + req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; @@ -1530,7 +1504,7 @@ static int ll_front_merge_fn(request_queue_t *q, struct request *req, if (req->nr_sectors + bio_sectors(bio) > max_sectors) { - req->flags |= REQ_NOMERGE; + req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; return 0; @@ -2029,14 +2003,13 @@ EXPORT_SYMBOL(blk_get_queue); static inline void blk_free_request(request_queue_t *q, struct request *rq) { - if (rq->flags & REQ_ELVPRIV) + if (rq->cmd_flags & REQ_ELVPRIV) elv_put_request(q, rq); mempool_free(rq, q->rq.rq_pool); } -static inline struct request * -blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, - int priv, gfp_t gfp_mask) +static struct request * +blk_alloc_request(request_queue_t *q, int rw, int priv, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -2044,17 +2017,17 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, return NULL; /* - * first three bits are identical in rq->flags and bio->bi_rw, + * first three bits are identical in rq->cmd_flags and bio->bi_rw, * see bio.h and blkdev.h */ - rq->flags = rw; + rq->cmd_flags = rw | REQ_ALLOCED; if (priv) { - if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) { + if (unlikely(elv_set_request(q, rq, gfp_mask))) { mempool_free(rq, q->rq.rq_pool); return NULL; } - rq->flags |= REQ_ELVPRIV; + rq->cmd_flags |= REQ_ELVPRIV; } return rq; @@ -2141,13 +2114,13 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, struct io_context *ioc = NULL; int may_queue, priv; - may_queue = elv_may_queue(q, rw, bio); + may_queue = elv_may_queue(q, rw); if (may_queue == ELV_MQUEUE_NO) goto rq_starved; if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { if (rl->count[rw]+1 >= q->nr_requests) { - ioc = current_io_context(GFP_ATOMIC); + ioc = current_io_context(GFP_ATOMIC, q->node); /* * The queue will fill after this allocation, so set * it as full, and mark this process as "batching". @@ -2189,7 +2162,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, spin_unlock_irq(q->queue_lock); - rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); + rq = blk_alloc_request(q, rw, priv, gfp_mask); if (unlikely(!rq)) { /* * Allocation failed presumably due to memory. Undo anything @@ -2225,7 +2198,6 @@ rq_starved: ioc->nr_batch_requests--; rq_init(q, rq); - rq->rl = rl; blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); out: @@ -2268,7 +2240,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw, * up to a big batch of them for a small period time. * See ioc_batching, ioc_set_batching */ - ioc = current_io_context(GFP_NOIO); + ioc = current_io_context(GFP_NOIO, q->node); ioc_set_batching(q, ioc); spin_lock_irq(q->queue_lock); @@ -2299,6 +2271,25 @@ struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask) } EXPORT_SYMBOL(blk_get_request); +/** + * blk_start_queueing - initiate dispatch of requests to device + * @q: request queue to kick into gear + * + * This is basically a helper to remove the need to know whether a queue + * is plugged or not if someone just wants to initiate dispatch of requests + * for this queue. + * + * The queue lock must be held with interrupts disabled. + */ +void blk_start_queueing(request_queue_t *q) +{ + if (!blk_queue_plugged(q)) + q->request_fn(q); + else + __generic_unplug_device(q); +} +EXPORT_SYMBOL(blk_start_queueing); + /** * blk_requeue_request - put a request back on queue * @q: request queue where request should be inserted @@ -2351,7 +2342,8 @@ void blk_insert_request(request_queue_t *q, struct request *rq, * must not attempt merges on this) and that it acts as a soft * barrier */ - rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER; + rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_flags |= REQ_SOFTBARRIER; rq->special = data; @@ -2365,11 +2357,7 @@ void blk_insert_request(request_queue_t *q, struct request *rq, drive_stat_acct(rq, rq->nr_sectors, 1); __elv_add_request(q, rq, where, 0); - - if (blk_queue_plugged(q)) - __generic_unplug_device(q); - else - q->request_fn(q); + blk_start_queueing(q); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -2558,7 +2546,7 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; rq->rq_disk = bd_disk; - rq->flags |= REQ_NOMERGE; + rq->cmd_flags |= REQ_NOMERGE; rq->end_io = done; WARN_ON(irqs_disabled()); spin_lock_irq(q->queue_lock); @@ -2598,10 +2586,9 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, rq->sense_len = 0; } - rq->waiting = &wait; + rq->end_io_data = &wait; blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); wait_for_completion(&wait); - rq->waiting = NULL; if (rq->errors) err = -EIO; @@ -2710,8 +2697,6 @@ EXPORT_SYMBOL_GPL(disk_round_stats); */ void __blk_put_request(request_queue_t *q, struct request *req) { - struct request_list *rl = req->rl; - if (unlikely(!q)) return; if (unlikely(--req->ref_count)) @@ -2719,18 +2704,16 @@ void __blk_put_request(request_queue_t *q, struct request *req) elv_completed_request(q, req); - req->rq_status = RQ_INACTIVE; - req->rl = NULL; - /* * Request may not have originated from ll_rw_blk. if not, * it didn't come out of our reserved rq pools */ - if (rl) { + if (req->cmd_flags & REQ_ALLOCED) { int rw = rq_data_dir(req); - int priv = req->flags & REQ_ELVPRIV; + int priv = req->cmd_flags & REQ_ELVPRIV; BUG_ON(!list_empty(&req->queuelist)); + BUG_ON(!hlist_unhashed(&req->hash)); blk_free_request(q, req); freed_request(q, rw, priv); @@ -2764,9 +2747,9 @@ EXPORT_SYMBOL(blk_put_request); */ void blk_end_sync_rq(struct request *rq, int error) { - struct completion *waiting = rq->waiting; + struct completion *waiting = rq->end_io_data; - rq->waiting = NULL; + rq->end_io_data = NULL; __blk_put_request(rq->q, rq); /* @@ -2829,7 +2812,7 @@ static int attempt_merge(request_queue_t *q, struct request *req, if (rq_data_dir(req) != rq_data_dir(next) || req->rq_disk != next->rq_disk - || next->waiting || next->special) + || next->special) return 0; /* @@ -2890,22 +2873,24 @@ static inline int attempt_front_merge(request_queue_t *q, struct request *rq) static void init_request_from_bio(struct request *req, struct bio *bio) { - req->flags |= REQ_CMD; + req->cmd_type = REQ_TYPE_FS; /* * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) */ if (bio_rw_ahead(bio) || bio_failfast(bio)) - req->flags |= REQ_FAILFAST; + req->cmd_flags |= REQ_FAILFAST; /* * REQ_BARRIER implies no merging, but lets make it explicit */ if (unlikely(bio_barrier(bio))) - req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); + req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); if (bio_sync(bio)) - req->flags |= REQ_RW_SYNC; + req->cmd_flags |= REQ_RW_SYNC; + if (bio_rw_meta(bio)) + req->cmd_flags |= REQ_RW_META; req->errors = 0; req->hard_sector = req->sector = bio->bi_sector; @@ -2914,7 +2899,6 @@ static void init_request_from_bio(struct request *req, struct bio *bio) req->nr_phys_segments = bio_phys_segments(req->q, bio); req->nr_hw_segments = bio_hw_segments(req->q, bio); req->buffer = bio_data(bio); /* see ->buffer comment above */ - req->waiting = NULL; req->bio = req->biotail = bio; req->ioprio = bio_prio(bio); req->rq_disk = bio->bi_bdev->bd_disk; @@ -2924,17 +2908,11 @@ static void init_request_from_bio(struct request *req, struct bio *bio) static int __make_request(request_queue_t *q, struct bio *bio) { struct request *req; - int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync; - unsigned short prio; - sector_t sector; + int el_ret, nr_sectors, barrier, err; + const unsigned short prio = bio_prio(bio); + const int sync = bio_sync(bio); - sector = bio->bi_sector; nr_sectors = bio_sectors(bio); - cur_nr_sectors = bio_cur_sectors(bio); - prio = bio_prio(bio); - - rw = bio_data_dir(bio); - sync = bio_sync(bio); /* * low level driver can indicate that it wants pages above a @@ -2943,8 +2921,6 @@ static int __make_request(request_queue_t *q, struct bio *bio) */ blk_queue_bounce(q, &bio); - spin_lock_prefetch(q->queue_lock); - barrier = bio_barrier(bio); if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { err = -EOPNOTSUPP; @@ -2972,7 +2948,7 @@ static int __make_request(request_queue_t *q, struct bio *bio) req->ioprio = ioprio_best(req->ioprio, prio); drive_stat_acct(req, nr_sectors, 0); if (!attempt_back_merge(q, req)) - elv_merged_request(q, req); + elv_merged_request(q, req, el_ret); goto out; case ELEVATOR_FRONT_MERGE: @@ -2992,14 +2968,14 @@ static int __make_request(request_queue_t *q, struct bio *bio) * not touch req->buffer either... */ req->buffer = bio_data(bio); - req->current_nr_sectors = cur_nr_sectors; - req->hard_cur_sectors = cur_nr_sectors; - req->sector = req->hard_sector = sector; + req->current_nr_sectors = bio_cur_sectors(bio); + req->hard_cur_sectors = req->current_nr_sectors; + req->sector = req->hard_sector = bio->bi_sector; req->nr_sectors = req->hard_nr_sectors += nr_sectors; req->ioprio = ioprio_best(req->ioprio, prio); drive_stat_acct(req, nr_sectors, 0); if (!attempt_front_merge(q, req)) - elv_merged_request(q, req); + elv_merged_request(q, req, el_ret); goto out; /* ELV_NO_MERGE: elevator says don't/can't merge. */ @@ -3012,7 +2988,7 @@ get_rq: * Grab a free request. This is might sleep but can not fail. * Returns with the queue unlocked. */ - req = get_request_wait(q, rw, bio); + req = get_request_wait(q, bio_data_dir(bio), bio); /* * After dropping the lock and possibly sleeping here, our request @@ -3306,7 +3282,7 @@ static int __end_that_request_first(struct request *req, int uptodate, req->errors = 0; if (!uptodate) { - if (blk_fs_request(req) && !(req->flags & REQ_QUIET)) + if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET)) printk("end_request: I/O error, dev %s, sector %llu\n", req->rq_disk ? req->rq_disk->disk_name : "?", (unsigned long long)req->sector); @@ -3569,8 +3545,8 @@ EXPORT_SYMBOL(end_request); void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio) { - /* first two bits are identical in rq->flags and bio->bi_rw */ - rq->flags |= (bio->bi_rw & 3); + /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ + rq->cmd_flags |= (bio->bi_rw & 3); rq->nr_phys_segments = bio_phys_segments(q, bio); rq->nr_hw_segments = bio_hw_segments(q, bio); @@ -3658,25 +3634,22 @@ EXPORT_SYMBOL(put_io_context); /* Called by the exitting task */ void exit_io_context(void) { - unsigned long flags; struct io_context *ioc; struct cfq_io_context *cic; - local_irq_save(flags); task_lock(current); ioc = current->io_context; current->io_context = NULL; - ioc->task = NULL; task_unlock(current); - local_irq_restore(flags); + ioc->task = NULL; if (ioc->aic && ioc->aic->exit) ioc->aic->exit(ioc->aic); if (ioc->cic_root.rb_node != NULL) { cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node); cic->exit(ioc); } - + put_io_context(ioc); } @@ -3688,7 +3661,7 @@ void exit_io_context(void) * but since the current task itself holds a reference, the context can be * used in general code, so long as it stays within `current` context. */ -struct io_context *current_io_context(gfp_t gfp_flags) +static struct io_context *current_io_context(gfp_t gfp_flags, int node) { struct task_struct *tsk = current; struct io_context *ret; @@ -3697,11 +3670,11 @@ struct io_context *current_io_context(gfp_t gfp_flags) if (likely(ret)) return ret; - ret = kmem_cache_alloc(iocontext_cachep, gfp_flags); + ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); if (ret) { atomic_set(&ret->refcount, 1); ret->task = current; - ret->set_ioprio = NULL; + ret->ioprio_changed = 0; ret->last_waited = jiffies; /* doesn't matter... */ ret->nr_batch_requests = 0; /* because this is 0 */ ret->aic = NULL; @@ -3721,10 +3694,10 @@ EXPORT_SYMBOL(current_io_context); * * This is always called in the context of the task which submitted the I/O. */ -struct io_context *get_io_context(gfp_t gfp_flags) +struct io_context *get_io_context(gfp_t gfp_flags, int node) { struct io_context *ret; - ret = current_io_context(gfp_flags); + ret = current_io_context(gfp_flags, node); if (likely(ret)) atomic_inc(&ret->refcount); return ret; @@ -3837,9 +3810,6 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count) ssize_t ret = queue_var_store(&ra_kb, page, count); spin_lock_irq(q->queue_lock); - if (ra_kb > (q->max_sectors >> 1)) - ra_kb = (q->max_sectors >> 1); - q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); spin_unlock_irq(q->queue_lock); diff --git a/block/noop-iosched.c b/block/noop-iosched.c index 56a7c620574f..79af43179421 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c @@ -69,7 +69,7 @@ static void *noop_init_queue(request_queue_t *q, elevator_t *e) { struct noop_data *nd; - nd = kmalloc(sizeof(*nd), GFP_KERNEL); + nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node); if (!nd) return NULL; INIT_LIST_HEAD(&nd->queue); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index b33eda26e205..2dc326421a24 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -294,7 +294,7 @@ static int sg_io(struct file *file, request_queue_t *q, rq->sense = sense; rq->sense_len = 0; - rq->flags |= REQ_BLOCK_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; bio = rq->bio; /* @@ -470,7 +470,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q, memset(sense, 0, sizeof(sense)); rq->sense = sense; rq->sense_len = 0; - rq->flags |= REQ_BLOCK_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; blk_execute_rq(q, disk, rq, 0); @@ -502,7 +502,7 @@ static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int c int err; rq = blk_get_request(q, WRITE, __GFP_WAIT); - rq->flags |= REQ_BLOCK_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->data = NULL; rq->data_len = 0; rq->timeout = BLK_DEFAULT_TIMEOUT; diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index a360215dbce7..2568640430fb 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -3331,7 +3331,7 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_ Command->DmaDirection = PCI_DMA_TODEVICE; Command->CommandType = DAC960_WriteCommand; } - Command->Completion = Request->waiting; + Command->Completion = Request->end_io_data; Command->LogicalDriveNumber = (long)Request->rq_disk->private_data; Command->BlockNumber = Request->sector; Command->BlockCount = Request->nr_sectors; diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index b5382cedf0c0..422e31d5f8e5 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -2,6 +2,8 @@ # Block device driver configuration # +if BLOCK + menu "Block devices" config BLK_DEV_FD @@ -468,3 +470,5 @@ config ATA_OVER_ETH devices like the Coraid EtherDrive (R) Storage Blade. endmenu + +endif diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 2cd3391ff878..c211065ad829 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1229,7 +1229,6 @@ static inline void complete_buffers(struct bio *bio, int status) int nr_sectors = bio_sectors(bio); bio->bi_next = NULL; - blk_finished_io(len); bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO); bio = xbh; } diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 78082edc14b4..4abc193314ee 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -989,7 +989,6 @@ static inline void complete_buffers(struct bio *bio, int ok) xbh = bio->bi_next; bio->bi_next = NULL; - blk_finished_io(nr_sectors); bio_endio(bio, nr_sectors << 9, ok ? 0 : -EIO); bio = xbh; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index ad1d7065a1b2..629c5769d994 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2991,8 +2991,8 @@ static void do_fd_request(request_queue_t * q) if (usage_count == 0) { printk("warning: usage count=0, current_req=%p exiting\n", current_req); - printk("sect=%ld flags=%lx\n", (long)current_req->sector, - current_req->flags); + printk("sect=%ld type=%x flags=%x\n", (long)current_req->sector, + current_req->cmd_type, current_req->cmd_flags); return; } if (test_bit(0, &fdc_busy)) { diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 68b0471ad5a6..d6bb8da955a2 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include /* for invalidate_bdev() */ @@ -1165,6 +1166,162 @@ static int lo_ioctl(struct inode * inode, struct file * file, return err; } +#ifdef CONFIG_COMPAT +struct compat_loop_info { + compat_int_t lo_number; /* ioctl r/o */ + compat_dev_t lo_device; /* ioctl r/o */ + compat_ulong_t lo_inode; /* ioctl r/o */ + compat_dev_t lo_rdevice; /* ioctl r/o */ + compat_int_t lo_offset; + compat_int_t lo_encrypt_type; + compat_int_t lo_encrypt_key_size; /* ioctl w/o */ + compat_int_t lo_flags; /* ioctl r/o */ + char lo_name[LO_NAME_SIZE]; + unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ + compat_ulong_t lo_init[2]; + char reserved[4]; +}; + +/* + * Transfer 32-bit compatibility structure in userspace to 64-bit loop info + * - noinlined to reduce stack space usage in main part of driver + */ +static noinline int +loop_info64_from_compat(const struct compat_loop_info *arg, + struct loop_info64 *info64) +{ + struct compat_loop_info info; + + if (copy_from_user(&info, arg, sizeof(info))) + return -EFAULT; + + memset(info64, 0, sizeof(*info64)); + info64->lo_number = info.lo_number; + info64->lo_device = info.lo_device; + info64->lo_inode = info.lo_inode; + info64->lo_rdevice = info.lo_rdevice; + info64->lo_offset = info.lo_offset; + info64->lo_sizelimit = 0; + info64->lo_encrypt_type = info.lo_encrypt_type; + info64->lo_encrypt_key_size = info.lo_encrypt_key_size; + info64->lo_flags = info.lo_flags; + info64->lo_init[0] = info.lo_init[0]; + info64->lo_init[1] = info.lo_init[1]; + if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) + memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE); + else + memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE); + memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE); + return 0; +} + +/* + * Transfer 64-bit loop info to 32-bit compatibility structure in userspace + * - noinlined to reduce stack space usage in main part of driver + */ +static noinline int +loop_info64_to_compat(const struct loop_info64 *info64, + struct compat_loop_info __user *arg) +{ + struct compat_loop_info info; + + memset(&info, 0, sizeof(info)); + info.lo_number = info64->lo_number; + info.lo_device = info64->lo_device; + info.lo_inode = info64->lo_inode; + info.lo_rdevice = info64->lo_rdevice; + info.lo_offset = info64->lo_offset; + info.lo_encrypt_type = info64->lo_encrypt_type; + info.lo_encrypt_key_size = info64->lo_encrypt_key_size; + info.lo_flags = info64->lo_flags; + info.lo_init[0] = info64->lo_init[0]; + info.lo_init[1] = info64->lo_init[1]; + if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) + memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE); + else + memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE); + memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); + + /* error in case values were truncated */ + if (info.lo_device != info64->lo_device || + info.lo_rdevice != info64->lo_rdevice || + info.lo_inode != info64->lo_inode || + info.lo_offset != info64->lo_offset || + info.lo_init[0] != info64->lo_init[0] || + info.lo_init[1] != info64->lo_init[1]) + return -EOVERFLOW; + + if (copy_to_user(arg, &info, sizeof(info))) + return -EFAULT; + return 0; +} + +static int +loop_set_status_compat(struct loop_device *lo, + const struct compat_loop_info __user *arg) +{ + struct loop_info64 info64; + int ret; + + ret = loop_info64_from_compat(arg, &info64); + if (ret < 0) + return ret; + return loop_set_status(lo, &info64); +} + +static int +loop_get_status_compat(struct loop_device *lo, + struct compat_loop_info __user *arg) +{ + struct loop_info64 info64; + int err = 0; + + if (!arg) + err = -EINVAL; + if (!err) + err = loop_get_status(lo, &info64); + if (!err) + err = loop_info64_to_compat(&info64, arg); + return err; +} + +static long lo_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = file->f_dentry->d_inode; + struct loop_device *lo = inode->i_bdev->bd_disk->private_data; + int err; + + lock_kernel(); + switch(cmd) { + case LOOP_SET_STATUS: + mutex_lock(&lo->lo_ctl_mutex); + err = loop_set_status_compat( + lo, (const struct compat_loop_info __user *) arg); + mutex_unlock(&lo->lo_ctl_mutex); + break; + case LOOP_GET_STATUS: + mutex_lock(&lo->lo_ctl_mutex); + err = loop_get_status_compat( + lo, (struct compat_loop_info __user *) arg); + mutex_unlock(&lo->lo_ctl_mutex); + break; + case LOOP_CLR_FD: + case LOOP_GET_STATUS64: + case LOOP_SET_STATUS64: + arg = (unsigned long) compat_ptr(arg); + case LOOP_SET_FD: + case LOOP_CHANGE_FD: + err = lo_ioctl(inode, file, cmd, arg); + break; + default: + err = -ENOIOCTLCMD; + break; + } + unlock_kernel(); + return err; +} +#endif + static int lo_open(struct inode *inode, struct file *file) { struct loop_device *lo = inode->i_bdev->bd_disk->private_data; @@ -1192,6 +1349,9 @@ static struct block_device_operations lo_fops = { .open = lo_open, .release = lo_release, .ioctl = lo_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = lo_compat_ioctl, +#endif }; /* diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index bdbade9a5cf5..9d1035e8d9d8 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -407,10 +407,10 @@ static void do_nbd_request(request_queue_t * q) struct nbd_device *lo; blkdev_dequeue_request(req); - dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%lx)\n", - req->rq_disk->disk_name, req, req->flags); + dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n", + req->rq_disk->disk_name, req, req->cmd_type); - if (!(req->flags & REQ_CMD)) + if (!blk_fs_request(req)) goto error_out; lo = req->rq_disk->private_data; @@ -489,7 +489,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file, switch (cmd) { case NBD_DISCONNECT: printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name); - sreq.flags = REQ_SPECIAL; + sreq.cmd_type = REQ_TYPE_SPECIAL; nbd_cmd(&sreq) = NBD_CMD_DISC; /* * Set these to sane values in case server implementation diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 2403721f9db1..38578b9dbfd1 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -437,7 +437,7 @@ static char *pd_buf; /* buffer for request in progress */ static enum action do_pd_io_start(void) { - if (pd_req->flags & REQ_SPECIAL) { + if (blk_special_request(pd_req)) { phase = pd_special; return pd_special(); } @@ -719,14 +719,12 @@ static int pd_special_command(struct pd_unit *disk, memset(&rq, 0, sizeof(rq)); rq.errors = 0; - rq.rq_status = RQ_ACTIVE; rq.rq_disk = disk->gd; rq.ref_count = 1; - rq.waiting = &wait; + rq.end_io_data = &wait; rq.end_io = blk_end_sync_rq; blk_insert_request(disk->gd->queue, &rq, 0, func); wait_for_completion(&wait); - rq.waiting = NULL; if (rq.errors) err = -EIO; blk_put_request(&rq); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 451b996bba91..888d1aceeeff 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -365,17 +365,17 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq->sense = sense; memset(sense, 0, sizeof(sense)); rq->sense_len = 0; - rq->flags |= REQ_BLOCK_PC | REQ_HARDBARRIER; + rq->cmd_type = REQ_TYPE_BLOCK_PC; + rq->cmd_flags |= REQ_HARDBARRIER; if (cgc->quiet) - rq->flags |= REQ_QUIET; + rq->cmd_flags |= REQ_QUIET; memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE); if (sizeof(rq->cmd) > CDROM_PACKET_SIZE) memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE); rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); rq->ref_count++; - rq->flags |= REQ_NOMERGE; - rq->waiting = &wait; + rq->end_io_data = &wait; rq->end_io = blk_end_sync_rq; elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1); generic_unplug_device(q); diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index cc42e762396f..f2305ee792a1 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -319,8 +319,8 @@ static void start_request(struct floppy_state *fs) printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n", req->rq_disk->disk_name, req->cmd, (long)req->sector, req->nr_sectors, req->buffer); - printk(" rq_status=%d errors=%d current_nr_sectors=%ld\n", - req->rq_status, req->errors, req->current_nr_sectors); + printk(" errors=%d current_nr_sectors=%ld\n", + req->errors, req->current_nr_sectors); #endif if (req->sector < 0 || req->sector >= fs->total_secs) { diff --git a/drivers/block/swim_iop.c b/drivers/block/swim_iop.c index 89e3c2f8b776..dfda796eba56 100644 --- a/drivers/block/swim_iop.c +++ b/drivers/block/swim_iop.c @@ -529,8 +529,8 @@ static void start_request(struct floppy_state *fs) printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n", CURRENT->rq_disk->disk_name, CURRENT->cmd, CURRENT->sector, CURRENT->nr_sectors, CURRENT->buffer); - printk(" rq_status=%d errors=%d current_nr_sectors=%ld\n", - CURRENT->rq_status, CURRENT->errors, CURRENT->current_nr_sectors); + printk(" errors=%d current_nr_sectors=%ld\n", + CURRENT->errors, CURRENT->current_nr_sectors); #endif if (CURRENT->sector < 0 || CURRENT->sector >= fs->total_secs) { diff --git a/drivers/block/xd.c b/drivers/block/xd.c index e828e4cbd3e1..ebf3025721d1 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -313,7 +313,7 @@ static void do_xd_request (request_queue_t * q) int res = 0; int retry; - if (!(req->flags & REQ_CMD)) { + if (!blk_fs_request(req)) { end_request(req, 0); continue; } diff --git a/drivers/cdrom/Kconfig b/drivers/cdrom/Kconfig index ff5652d40619..4b12e9031fb3 100644 --- a/drivers/cdrom/Kconfig +++ b/drivers/cdrom/Kconfig @@ -3,7 +3,7 @@ # menu "Old CD-ROM drivers (not SCSI, not IDE)" - depends on ISA + depends on ISA && BLOCK config CD_NO_IDESCSI bool "Support non-SCSI/IDE/ATAPI CDROM drives" diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index d239cf8b20bd..b38c84a7a8e3 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2129,7 +2129,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, rq->cmd[9] = 0xf8; rq->cmd_len = 12; - rq->flags |= REQ_BLOCK_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->timeout = 60 * HZ; bio = rq->bio; diff --git a/drivers/cdrom/cdu31a.c b/drivers/cdrom/cdu31a.c index 37bdb0163f0d..ccd91c1a84bd 100644 --- a/drivers/cdrom/cdu31a.c +++ b/drivers/cdrom/cdu31a.c @@ -1338,8 +1338,10 @@ static void do_cdu31a_request(request_queue_t * q) } /* WTF??? */ - if (!(req->flags & REQ_CMD)) + if (!blk_fs_request(req)) { + end_request(req, 0); continue; + } if (rq_data_dir(req) == WRITE) { end_request(req, 0); continue; diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 4cc619edf424..bde1c665d9f4 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -1006,6 +1006,7 @@ config GPIO_VR41XX config RAW_DRIVER tristate "RAW driver (/dev/raw/rawN) (OBSOLETE)" + depends on BLOCK help The raw driver permits block devices to be bound to /dev/raw/rawN. Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O. diff --git a/drivers/char/random.c b/drivers/char/random.c index 4c3a5ca9d8f7..b430a12eb819 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -655,6 +655,7 @@ void add_interrupt_randomness(int irq) add_timer_randomness(irq_timer_state[irq], 0x100 + irq); } +#ifdef CONFIG_BLOCK void add_disk_randomness(struct gendisk *disk) { if (!disk || !disk->random) @@ -667,6 +668,7 @@ void add_disk_randomness(struct gendisk *disk) } EXPORT_SYMBOL(add_disk_randomness); +#endif #define EXTRACT_SIZE 10 @@ -918,6 +920,7 @@ void rand_initialize_irq(int irq) } } +#ifdef CONFIG_BLOCK void rand_initialize_disk(struct gendisk *disk) { struct timer_rand_state *state; @@ -932,6 +935,7 @@ void rand_initialize_disk(struct gendisk *disk) disk->random = state; } } +#endif static ssize_t random_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) diff --git a/drivers/fc4/fc.c b/drivers/fc4/fc.c index 1a159e8843ca..22d17474755f 100644 --- a/drivers/fc4/fc.c +++ b/drivers/fc4/fc.c @@ -974,7 +974,6 @@ int fcp_scsi_dev_reset(Scsi_Cmnd *SCpnt) */ fc->rst_pkt->device->host->eh_action = &sem; - fc->rst_pkt->request->rq_status = RQ_SCSI_BUSY; fc->rst_pkt->done = fcp_scsi_reset_done; diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index b6fb167e20f6..69d627bd537a 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -4,6 +4,8 @@ # Andre Hedrick # +if BLOCK + menu "ATA/ATAPI/MFM/RLL support" config IDE @@ -1082,3 +1084,5 @@ config BLK_DEV_HD endif endmenu + +endif diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 654d4cd09847..69bbb6206a00 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -372,7 +372,7 @@ static int cdrom_log_sense(ide_drive_t *drive, struct request *rq, { int log = 0; - if (!sense || !rq || (rq->flags & REQ_QUIET)) + if (!sense || !rq || (rq->cmd_flags & REQ_QUIET)) return 0; switch (sense->sense_key) { @@ -597,7 +597,7 @@ static void cdrom_prepare_request(ide_drive_t *drive, struct request *rq) struct cdrom_info *cd = drive->driver_data; ide_init_drive_cmd(rq); - rq->flags = REQ_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->rq_disk = cd->disk; } @@ -617,7 +617,7 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense, rq->cmd[0] = GPCMD_REQUEST_SENSE; rq->cmd[4] = rq->data_len = 18; - rq->flags = REQ_SENSE; + rq->cmd_type = REQ_TYPE_SENSE; /* NOTE! Save the failed command in "rq->buffer" */ rq->buffer = (void *) failed_command; @@ -630,10 +630,10 @@ static void cdrom_end_request (ide_drive_t *drive, int uptodate) struct request *rq = HWGROUP(drive)->rq; int nsectors = rq->hard_cur_sectors; - if ((rq->flags & REQ_SENSE) && uptodate) { + if (blk_sense_request(rq) && uptodate) { /* - * For REQ_SENSE, "rq->buffer" points to the original failed - * request + * For REQ_TYPE_SENSE, "rq->buffer" points to the original + * failed request */ struct request *failed = (struct request *) rq->buffer; struct cdrom_info *info = drive->driver_data; @@ -706,17 +706,17 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) return 1; } - if (rq->flags & REQ_SENSE) { + if (blk_sense_request(rq)) { /* We got an error trying to get sense info from the drive (probably while trying to recover from a former error). Just give up. */ - rq->flags |= REQ_FAILED; + rq->cmd_flags |= REQ_FAILED; cdrom_end_request(drive, 0); ide_error(drive, "request sense failure", stat); return 1; - } else if (rq->flags & (REQ_PC | REQ_BLOCK_PC)) { + } else if (blk_pc_request(rq)) { /* All other functions, except for READ. */ unsigned long flags; @@ -724,7 +724,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) * if we have an error, pass back CHECK_CONDITION as the * scsi status byte */ - if ((rq->flags & REQ_BLOCK_PC) && !rq->errors) + if (!rq->errors) rq->errors = SAM_STAT_CHECK_CONDITION; /* Check for tray open. */ @@ -735,12 +735,12 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) cdrom_saw_media_change (drive); /*printk("%s: media changed\n",drive->name);*/ return 0; - } else if (!(rq->flags & REQ_QUIET)) { + } else if (!(rq->cmd_flags & REQ_QUIET)) { /* Otherwise, print an error. */ ide_dump_status(drive, "packet command error", stat); } - rq->flags |= REQ_FAILED; + rq->cmd_flags |= REQ_FAILED; /* * instead of playing games with moving completions around, @@ -881,7 +881,7 @@ static int cdrom_timer_expiry(ide_drive_t *drive) wait = ATAPI_WAIT_PC; break; default: - if (!(rq->flags & REQ_QUIET)) + if (!(rq->cmd_flags & REQ_QUIET)) printk(KERN_INFO "ide-cd: cmd 0x%x timed out\n", rq->cmd[0]); wait = 0; break; @@ -1124,7 +1124,7 @@ static ide_startstop_t cdrom_read_intr (ide_drive_t *drive) if (rq->current_nr_sectors > 0) { printk (KERN_ERR "%s: cdrom_read_intr: data underrun (%d blocks)\n", drive->name, rq->current_nr_sectors); - rq->flags |= REQ_FAILED; + rq->cmd_flags |= REQ_FAILED; cdrom_end_request(drive, 0); } else cdrom_end_request(drive, 1); @@ -1456,7 +1456,7 @@ static ide_startstop_t cdrom_pc_intr (ide_drive_t *drive) printk ("%s: cdrom_pc_intr: data underrun %d\n", drive->name, pc->buflen); */ - rq->flags |= REQ_FAILED; + rq->cmd_flags |= REQ_FAILED; cdrom_end_request(drive, 0); } return ide_stopped; @@ -1509,7 +1509,7 @@ static ide_startstop_t cdrom_pc_intr (ide_drive_t *drive) rq->data += thislen; rq->data_len -= thislen; - if (rq->flags & REQ_SENSE) + if (blk_sense_request(rq)) rq->sense_len += thislen; } else { confused: @@ -1517,7 +1517,7 @@ confused: "appears confused (ireason = 0x%02x). " "Trying to recover by ending request.\n", drive->name, ireason); - rq->flags |= REQ_FAILED; + rq->cmd_flags |= REQ_FAILED; cdrom_end_request(drive, 0); return ide_stopped; } @@ -1546,7 +1546,7 @@ static ide_startstop_t cdrom_do_packet_command (ide_drive_t *drive) struct cdrom_info *info = drive->driver_data; info->dma = 0; - rq->flags &= ~REQ_FAILED; + rq->cmd_flags &= ~REQ_FAILED; len = rq->data_len; /* Start sending the command to the drive. */ @@ -1558,7 +1558,7 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq) { struct request_sense sense; int retries = 10; - unsigned int flags = rq->flags; + unsigned int flags = rq->cmd_flags; if (rq->sense == NULL) rq->sense = &sense; @@ -1567,14 +1567,14 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq) do { int error; unsigned long time = jiffies; - rq->flags = flags; + rq->cmd_flags = flags; error = ide_do_drive_cmd(drive, rq, ide_wait); time = jiffies - time; /* FIXME: we should probably abort/retry or something * in case of failure */ - if (rq->flags & REQ_FAILED) { + if (rq->cmd_flags & REQ_FAILED) { /* The request failed. Retry if it was due to a unit attention status (usually means media was changed). */ @@ -1596,10 +1596,10 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq) } /* End of retry loop. */ - } while ((rq->flags & REQ_FAILED) && retries >= 0); + } while ((rq->cmd_flags & REQ_FAILED) && retries >= 0); /* Return an error if the command failed. */ - return (rq->flags & REQ_FAILED) ? -EIO : 0; + return (rq->cmd_flags & REQ_FAILED) ? -EIO : 0; } /* @@ -1963,7 +1963,7 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) { struct cdrom_info *info = drive->driver_data; - rq->flags |= REQ_QUIET; + rq->cmd_flags |= REQ_QUIET; info->dma = 0; @@ -2023,11 +2023,11 @@ ide_do_rw_cdrom (ide_drive_t *drive, struct request *rq, sector_t block) } info->last_block = block; return action; - } else if (rq->flags & (REQ_PC | REQ_SENSE)) { + } else if (rq->cmd_type == REQ_TYPE_SENSE) { return cdrom_do_packet_command(drive); - } else if (rq->flags & REQ_BLOCK_PC) { + } else if (blk_pc_request(rq)) { return cdrom_do_block_pc(drive, rq); - } else if (rq->flags & REQ_SPECIAL) { + } else if (blk_special_request(rq)) { /* * right now this can only be a reset... */ @@ -2105,7 +2105,7 @@ static int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense) req.sense = sense; req.cmd[0] = GPCMD_TEST_UNIT_READY; - req.flags |= REQ_QUIET; + req.cmd_flags |= REQ_QUIET; #if ! STANDARD_ATAPI /* the Sanyo 3 CD changer uses byte 7 of TEST_UNIT_READY to @@ -2207,7 +2207,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity, req.cmd[0] = GPCMD_READ_CDVD_CAPACITY; req.data = (char *)&capbuf; req.data_len = sizeof(capbuf); - req.flags |= REQ_QUIET; + req.cmd_flags |= REQ_QUIET; stat = cdrom_queue_packet_command(drive, &req); if (stat == 0) { @@ -2230,7 +2230,7 @@ static int cdrom_read_tocentry(ide_drive_t *drive, int trackno, int msf_flag, req.sense = sense; req.data = buf; req.data_len = buflen; - req.flags |= REQ_QUIET; + req.cmd_flags |= REQ_QUIET; req.cmd[0] = GPCMD_READ_TOC_PMA_ATIP; req.cmd[6] = trackno; req.cmd[7] = (buflen >> 8); @@ -2531,7 +2531,7 @@ static int ide_cdrom_packet(struct cdrom_device_info *cdi, req.timeout = cgc->timeout; if (cgc->quiet) - req.flags |= REQ_QUIET; + req.cmd_flags |= REQ_QUIET; req.sense = cgc->sense; cgc->stat = cdrom_queue_packet_command(drive, &req); @@ -2629,7 +2629,8 @@ int ide_cdrom_reset (struct cdrom_device_info *cdi) int ret; cdrom_prepare_request(drive, &req); - req.flags = REQ_SPECIAL | REQ_QUIET; + req.cmd_type = REQ_TYPE_SPECIAL; + req.cmd_flags = REQ_QUIET; ret = ide_do_drive_cmd(drive, &req, ide_wait); /* @@ -3116,9 +3117,9 @@ static int ide_cdrom_prep_pc(struct request *rq) static int ide_cdrom_prep_fn(request_queue_t *q, struct request *rq) { - if (rq->flags & REQ_CMD) + if (blk_fs_request(rq)) return ide_cdrom_prep_fs(q, rq); - else if (rq->flags & REQ_BLOCK_PC) + else if (blk_pc_request(rq)) return ide_cdrom_prep_pc(rq); return 0; diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 7cf3eb023521..0a05a377d66a 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -699,7 +699,8 @@ static void idedisk_prepare_flush(request_queue_t *q, struct request *rq) rq->cmd[0] = WIN_FLUSH_CACHE; - rq->flags |= REQ_DRIVE_TASK; + rq->cmd_type = REQ_TYPE_ATA_TASK; + rq->cmd_flags |= REQ_SOFTBARRIER; rq->buffer = rq->cmd; } @@ -740,7 +741,7 @@ static int set_multcount(ide_drive_t *drive, int arg) if (drive->special.b.set_multmode) return -EBUSY; ide_init_drive_cmd (&rq); - rq.flags = REQ_DRIVE_CMD; + rq.cmd_type = REQ_TYPE_ATA_CMD; drive->mult_req = arg; drive->special.b.set_multmode = 1; (void) ide_do_drive_cmd (drive, &rq, ide_wait); diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index 7c3a13e1cf64..c3546fe9af63 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -205,7 +205,7 @@ int ide_build_sglist(ide_drive_t *drive, struct request *rq) ide_hwif_t *hwif = HWIF(drive); struct scatterlist *sg = hwif->sg_table; - BUG_ON((rq->flags & REQ_DRIVE_TASKFILE) && rq->nr_sectors > 256); + BUG_ON((rq->cmd_type == REQ_TYPE_ATA_TASKFILE) && rq->nr_sectors > 256); ide_map_sg(drive, rq); diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index adbe9f76a505..8ccee9c769f8 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -588,7 +588,7 @@ static int idefloppy_do_end_request(ide_drive_t *drive, int uptodate, int nsecs) /* Why does this happen? */ if (!rq) return 0; - if (!(rq->flags & REQ_SPECIAL)) { //if (!IDEFLOPPY_RQ_CMD (rq->cmd)) { + if (!blk_special_request(rq)) { /* our real local end request function */ ide_end_request(drive, uptodate, nsecs); return 0; @@ -689,7 +689,7 @@ static void idefloppy_queue_pc_head (ide_drive_t *drive,idefloppy_pc_t *pc,struc ide_init_drive_cmd(rq); rq->buffer = (char *) pc; - rq->flags = REQ_SPECIAL; //rq->cmd = IDEFLOPPY_PC_RQ; + rq->cmd_type = REQ_TYPE_SPECIAL; rq->rq_disk = floppy->disk; (void) ide_do_drive_cmd(drive, rq, ide_preempt); } @@ -1250,7 +1250,7 @@ static void idefloppy_create_rw_cmd (idefloppy_floppy_t *floppy, idefloppy_pc_t pc->callback = &idefloppy_rw_callback; pc->rq = rq; pc->b_count = cmd == READ ? 0 : rq->bio->bi_size; - if (rq->flags & REQ_RW) + if (rq->cmd_flags & REQ_RW) set_bit(PC_WRITING, &pc->flags); pc->buffer = NULL; pc->request_transfer = pc->buffer_size = blocks * floppy->block_size; @@ -1281,8 +1281,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request idefloppy_pc_t *pc; unsigned long block = (unsigned long)block_s; - debug_log(KERN_INFO "rq_status: %d, dev: %s, flags: %lx, errors: %d\n", - rq->rq_status, + debug_log(KERN_INFO "dev: %s, flags: %lx, errors: %d\n", rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->flags, rq->errors); debug_log(KERN_INFO "sector: %ld, nr_sectors: %ld, " @@ -1303,7 +1302,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request idefloppy_do_end_request(drive, 0, 0); return ide_stopped; } - if (rq->flags & REQ_CMD) { + if (blk_fs_request(rq)) { if (((long)rq->sector % floppy->bs_factor) || (rq->nr_sectors % floppy->bs_factor)) { printk("%s: unsupported r/w request size\n", @@ -1313,9 +1312,9 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request } pc = idefloppy_next_pc_storage(drive); idefloppy_create_rw_cmd(floppy, pc, rq, block); - } else if (rq->flags & REQ_SPECIAL) { + } else if (blk_special_request(rq)) { pc = (idefloppy_pc_t *) rq->buffer; - } else if (rq->flags & REQ_BLOCK_PC) { + } else if (blk_pc_request(rq)) { pc = idefloppy_next_pc_storage(drive); if (idefloppy_blockpc_cmd(floppy, pc, rq)) { idefloppy_do_end_request(drive, 0, 0); @@ -1343,7 +1342,7 @@ static int idefloppy_queue_pc_tail (ide_drive_t *drive,idefloppy_pc_t *pc) ide_init_drive_cmd (&rq); rq.buffer = (char *) pc; - rq.flags = REQ_SPECIAL; // rq.cmd = IDEFLOPPY_PC_RQ; + rq.cmd_type = REQ_TYPE_SPECIAL; rq.rq_disk = floppy->disk; return ide_do_drive_cmd(drive, &rq, ide_wait); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index fb6795236e76..38479a29d3e1 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -59,7 +59,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq, { int ret = 1; - BUG_ON(!(rq->flags & REQ_STARTED)); + BUG_ON(!blk_rq_started(rq)); /* * if failfast is set on a request, override number of sectors and @@ -141,7 +141,7 @@ enum { static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 stat, u8 error) { - struct request_pm_state *pm = rq->end_io_data; + struct request_pm_state *pm = rq->data; if (drive->media != ide_disk) return; @@ -164,7 +164,7 @@ static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 s static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->end_io_data; + struct request_pm_state *pm = rq->data; ide_task_t *args = rq->special; memset(args, 0, sizeof(*args)); @@ -244,7 +244,7 @@ int ide_end_dequeued_request(ide_drive_t *drive, struct request *rq, spin_lock_irqsave(&ide_lock, flags); - BUG_ON(!(rq->flags & REQ_STARTED)); + BUG_ON(!blk_rq_started(rq)); /* * if failfast is set on a request, override number of sectors and @@ -366,7 +366,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) rq = HWGROUP(drive)->rq; spin_unlock_irqrestore(&ide_lock, flags); - if (rq->flags & REQ_DRIVE_CMD) { + if (rq->cmd_type == REQ_TYPE_ATA_CMD) { u8 *args = (u8 *) rq->buffer; if (rq->errors == 0) rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); @@ -376,7 +376,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) args[1] = err; args[2] = hwif->INB(IDE_NSECTOR_REG); } - } else if (rq->flags & REQ_DRIVE_TASK) { + } else if (rq->cmd_type == REQ_TYPE_ATA_TASK) { u8 *args = (u8 *) rq->buffer; if (rq->errors == 0) rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); @@ -390,7 +390,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) args[5] = hwif->INB(IDE_HCYL_REG); args[6] = hwif->INB(IDE_SELECT_REG); } - } else if (rq->flags & REQ_DRIVE_TASKFILE) { + } else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { ide_task_t *args = (ide_task_t *) rq->special; if (rq->errors == 0) rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); @@ -421,7 +421,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) } } } else if (blk_pm_request(rq)) { - struct request_pm_state *pm = rq->end_io_data; + struct request_pm_state *pm = rq->data; #ifdef DEBUG_PM printk("%s: complete_power_step(step: %d, stat: %x, err: %x)\n", drive->name, rq->pm->pm_step, stat, err); @@ -587,7 +587,7 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat) return ide_stopped; /* retry only "normal" I/O: */ - if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) { + if (!blk_fs_request(rq)) { rq->errors = 1; ide_end_drive_cmd(drive, stat, err); return ide_stopped; @@ -638,7 +638,7 @@ ide_startstop_t ide_abort(ide_drive_t *drive, const char *msg) return ide_stopped; /* retry only "normal" I/O: */ - if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) { + if (!blk_fs_request(rq)) { rq->errors = 1; ide_end_drive_cmd(drive, BUSY_STAT, 0); return ide_stopped; @@ -808,7 +808,7 @@ void ide_map_sg(ide_drive_t *drive, struct request *rq) if (hwif->sg_mapped) /* needed by ide-scsi */ return; - if ((rq->flags & REQ_DRIVE_TASKFILE) == 0) { + if (rq->cmd_type != REQ_TYPE_ATA_TASKFILE) { hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg); } else { sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE); @@ -844,7 +844,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, struct request *rq) { ide_hwif_t *hwif = HWIF(drive); - if (rq->flags & REQ_DRIVE_TASKFILE) { + if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { ide_task_t *args = rq->special; if (!args) @@ -866,7 +866,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, if (args->tf_out_flags.all != 0) return flagged_taskfile(drive, args); return do_rw_taskfile(drive, args); - } else if (rq->flags & REQ_DRIVE_TASK) { + } else if (rq->cmd_type == REQ_TYPE_ATA_TASK) { u8 *args = rq->buffer; u8 sel; @@ -892,7 +892,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, hwif->OUTB(sel, IDE_SELECT_REG); ide_cmd(drive, args[0], args[2], &drive_cmd_intr); return ide_started; - } else if (rq->flags & REQ_DRIVE_CMD) { + } else if (rq->cmd_type == REQ_TYPE_ATA_CMD) { u8 *args = rq->buffer; if (!args) @@ -933,7 +933,7 @@ done: static void ide_check_pm_state(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->end_io_data; + struct request_pm_state *pm = rq->data; if (blk_pm_suspend_request(rq) && pm->pm_step == ide_pm_state_start_suspend) @@ -980,7 +980,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) ide_startstop_t startstop; sector_t block; - BUG_ON(!(rq->flags & REQ_STARTED)); + BUG_ON(!blk_rq_started(rq)); #ifdef DEBUG printk("%s: start_request: current=0x%08lx\n", @@ -1013,12 +1013,12 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) if (!drive->special.all) { ide_driver_t *drv; - if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) - return execute_drive_cmd(drive, rq); - else if (rq->flags & REQ_DRIVE_TASKFILE) + if (rq->cmd_type == REQ_TYPE_ATA_CMD || + rq->cmd_type == REQ_TYPE_ATA_TASK || + rq->cmd_type == REQ_TYPE_ATA_TASKFILE) return execute_drive_cmd(drive, rq); else if (blk_pm_request(rq)) { - struct request_pm_state *pm = rq->end_io_data; + struct request_pm_state *pm = rq->data; #ifdef DEBUG_PM printk("%s: start_power_step(step: %d)\n", drive->name, rq->pm->pm_step); @@ -1264,7 +1264,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq) * We count how many times we loop here to make sure we service * all drives in the hwgroup without looping for ever */ - if (drive->blocked && !blk_pm_request(rq) && !(rq->flags & REQ_PREEMPT)) { + if (drive->blocked && !blk_pm_request(rq) && !(rq->cmd_flags & REQ_PREEMPT)) { drive = drive->next ? drive->next : hwgroup->drive; if (loops++ < 4 && !blk_queue_plugged(drive->queue)) goto again; @@ -1670,7 +1670,7 @@ irqreturn_t ide_intr (int irq, void *dev_id, struct pt_regs *regs) void ide_init_drive_cmd (struct request *rq) { memset(rq, 0, sizeof(*rq)); - rq->flags = REQ_DRIVE_CMD; + rq->cmd_type = REQ_TYPE_ATA_CMD; rq->ref_count = 1; } @@ -1710,7 +1710,6 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio int must_wait = (action == ide_wait || action == ide_head_wait); rq->errors = 0; - rq->rq_status = RQ_ACTIVE; /* * we need to hold an extra reference to request for safe inspection @@ -1718,7 +1717,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio */ if (must_wait) { rq->ref_count++; - rq->waiting = &wait; + rq->end_io_data = &wait; rq->end_io = blk_end_sync_rq; } @@ -1727,7 +1726,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio hwgroup->rq = NULL; if (action == ide_preempt || action == ide_head_wait) { where = ELEVATOR_INSERT_FRONT; - rq->flags |= REQ_PREEMPT; + rq->cmd_flags |= REQ_PREEMPT; } __elv_add_request(drive->queue, rq, where, 0); ide_do_request(hwgroup, IDE_NO_IRQ); @@ -1736,7 +1735,6 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio err = 0; if (must_wait) { wait_for_completion(&wait); - rq->waiting = NULL; if (rq->errors) err = -EIO; diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c index 1feff23487d4..850ef63cc986 100644 --- a/drivers/ide/ide-lib.c +++ b/drivers/ide/ide-lib.c @@ -456,13 +456,14 @@ static void ide_dump_opcode(ide_drive_t *drive) spin_unlock(&ide_lock); if (!rq) return; - if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) { + if (rq->cmd_type == REQ_TYPE_ATA_CMD || + rq->cmd_type == REQ_TYPE_ATA_TASK) { char *args = rq->buffer; if (args) { opcode = args[0]; found = 1; } - } else if (rq->flags & REQ_DRIVE_TASKFILE) { + } else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { ide_task_t *args = rq->special; if (args) { task_struct_t *tf = (task_struct_t *) args->tfRegister; diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 7067ab997927..2ebc3760f261 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -1776,7 +1776,7 @@ static void idetape_create_request_sense_cmd (idetape_pc_t *pc) static void idetape_init_rq(struct request *rq, u8 cmd) { memset(rq, 0, sizeof(*rq)); - rq->flags = REQ_SPECIAL; + rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd[0] = cmd; } @@ -2423,8 +2423,8 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, #if IDETAPE_DEBUG_LOG #if 0 if (tape->debug_level >= 5) - printk(KERN_INFO "ide-tape: rq_status: %d, " - "dev: %s, cmd: %ld, errors: %d\n", rq->rq_status, + printk(KERN_INFO "ide-tape: %d, " + "dev: %s, cmd: %ld, errors: %d\n", rq->rq_disk->disk_name, rq->cmd[0], rq->errors); #endif if (tape->debug_level >= 2) @@ -2433,12 +2433,12 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, rq->sector, rq->nr_sectors, rq->current_nr_sectors); #endif /* IDETAPE_DEBUG_LOG */ - if ((rq->flags & REQ_SPECIAL) == 0) { + if (!blk_special_request(rq)) { /* * We do not support buffer cache originated requests. */ printk(KERN_NOTICE "ide-tape: %s: Unsupported request in " - "request queue (%ld)\n", drive->name, rq->flags); + "request queue (%d)\n", drive->name, rq->cmd_type); ide_end_request(drive, 0, 0); return ide_stopped; } @@ -2768,12 +2768,12 @@ static void idetape_wait_for_request (ide_drive_t *drive, struct request *rq) idetape_tape_t *tape = drive->driver_data; #if IDETAPE_DEBUG_BUGS - if (rq == NULL || (rq->flags & REQ_SPECIAL) == 0) { + if (rq == NULL || !blk_special_request(rq)) { printk (KERN_ERR "ide-tape: bug: Trying to sleep on non-valid request\n"); return; } #endif /* IDETAPE_DEBUG_BUGS */ - rq->waiting = &wait; + rq->end_io_data = &wait; rq->end_io = blk_end_sync_rq; spin_unlock_irq(&tape->spinlock); wait_for_completion(&wait); diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 97a9244312fc..1d0470c1f957 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -363,7 +363,7 @@ static ide_startstop_t task_error(ide_drive_t *drive, struct request *rq, static void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat) { - if (rq->flags & REQ_DRIVE_TASKFILE) { + if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { ide_task_t *task = rq->special; if (task->tf_out_flags.all) { @@ -474,7 +474,7 @@ static int ide_diag_taskfile(ide_drive_t *drive, ide_task_t *args, unsigned long struct request rq; memset(&rq, 0, sizeof(rq)); - rq.flags = REQ_DRIVE_TASKFILE; + rq.cmd_type = REQ_TYPE_ATA_TASKFILE; rq.buffer = buf; /* @@ -499,7 +499,7 @@ static int ide_diag_taskfile(ide_drive_t *drive, ide_task_t *args, unsigned long rq.hard_cur_sectors = rq.current_nr_sectors = rq.nr_sectors; if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE) - rq.flags |= REQ_RW; + rq.cmd_flags |= REQ_RW; } rq.special = args; @@ -737,7 +737,7 @@ static int ide_wait_cmd_task(ide_drive_t *drive, u8 *buf) struct request rq; ide_init_drive_cmd(&rq); - rq.flags = REQ_DRIVE_TASK; + rq.cmd_type = REQ_TYPE_ATA_TASK; rq.buffer = buf; return ide_do_drive_cmd(drive, &rq, ide_wait); } diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 9c8468de1a75..2b1a1389c318 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -1217,9 +1217,9 @@ static int generic_ide_suspend(struct device *dev, pm_message_t mesg) memset(&rq, 0, sizeof(rq)); memset(&rqpm, 0, sizeof(rqpm)); memset(&args, 0, sizeof(args)); - rq.flags = REQ_PM_SUSPEND; + rq.cmd_type = REQ_TYPE_PM_SUSPEND; rq.special = &args; - rq.end_io_data = &rqpm; + rq.data = &rqpm; rqpm.pm_step = ide_pm_state_start_suspend; if (mesg.event == PM_EVENT_PRETHAW) mesg.event = PM_EVENT_FREEZE; @@ -1238,9 +1238,9 @@ static int generic_ide_resume(struct device *dev) memset(&rq, 0, sizeof(rq)); memset(&rqpm, 0, sizeof(rqpm)); memset(&args, 0, sizeof(args)); - rq.flags = REQ_PM_RESUME; + rq.cmd_type = REQ_TYPE_PM_RESUME; rq.special = &args; - rq.end_io_data = &rqpm; + rq.data = &rqpm; rqpm.pm_step = ide_pm_state_start_resume; rqpm.pm_state = PM_EVENT_ON; diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c index aebecd8f51cc..4ab931145673 100644 --- a/drivers/ide/legacy/hd.c +++ b/drivers/ide/legacy/hd.c @@ -626,7 +626,7 @@ repeat: req->rq_disk->disk_name, (req->cmd == READ)?"read":"writ", cyl, head, sec, nsect, req->buffer); #endif - if (req->flags & REQ_CMD) { + if (blk_fs_request(req)) { switch (rq_data_dir(req)) { case READ: hd_out(disk,nsect,sec,head,cyl,WIN_READ,&read_intr); diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index bf869ed03eed..6dd31a291d84 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -2,6 +2,8 @@ # Block device driver configuration # +if BLOCK + menu "Multi-device support (RAID and LVM)" config MD @@ -251,3 +253,4 @@ config DM_MULTIPATH_EMC endmenu +endif diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c index 2a374ccb30dd..2b2d45d7baaa 100644 --- a/drivers/md/dm-emc.c +++ b/drivers/md/dm-emc.c @@ -126,7 +126,8 @@ static struct request *get_failover_req(struct emc_handler *h, memset(&rq->cmd, 0, BLK_MAX_CDB); rq->timeout = EMC_FAILOVER_TIMEOUT; - rq->flags |= (REQ_BLOCK_PC | REQ_FAILFAST | REQ_NOMERGE); + rq->cmd_type = REQ_TYPE_BLOCK_PC; + rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; return rq; } diff --git a/drivers/message/i2o/Kconfig b/drivers/message/i2o/Kconfig index fef677103880..6443392bffff 100644 --- a/drivers/message/i2o/Kconfig +++ b/drivers/message/i2o/Kconfig @@ -88,7 +88,7 @@ config I2O_BUS config I2O_BLOCK tristate "I2O Block OSM" - depends on I2O + depends on I2O && BLOCK ---help--- Include support for the I2O Block OSM. The Block OSM presents disk and other structured block devices to the operating system. If you diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 1ddc2fb429d5..eaba81bf2eca 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -390,9 +390,9 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req) } /* request is already processed by us, so return */ - if (req->flags & REQ_SPECIAL) { + if (blk_special_request(req)) { osm_debug("REQ_SPECIAL already set!\n"); - req->flags |= REQ_DONTPREP; + req->cmd_flags |= REQ_DONTPREP; return BLKPREP_OK; } @@ -411,7 +411,8 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req) ireq = req->special; /* do not come back here */ - req->flags |= REQ_DONTPREP | REQ_SPECIAL; + req->cmd_type = REQ_TYPE_SPECIAL; + req->cmd_flags |= REQ_DONTPREP; return BLKPREP_OK; }; diff --git a/drivers/mmc/Kconfig b/drivers/mmc/Kconfig index 45bcf098e762..f540bd88dc5a 100644 --- a/drivers/mmc/Kconfig +++ b/drivers/mmc/Kconfig @@ -21,7 +21,7 @@ config MMC_DEBUG config MMC_BLOCK tristate "MMC block device driver" - depends on MMC + depends on MMC && BLOCK default y help Say Y here to enable the MMC block device driver support. diff --git a/drivers/mmc/Makefile b/drivers/mmc/Makefile index d2957e35cc6f..b1f6e03e7aa9 100644 --- a/drivers/mmc/Makefile +++ b/drivers/mmc/Makefile @@ -24,7 +24,8 @@ obj-$(CONFIG_MMC_AU1X) += au1xmmc.o obj-$(CONFIG_MMC_OMAP) += omap.o obj-$(CONFIG_MMC_AT91RM9200) += at91_mci.o -mmc_core-y := mmc.o mmc_queue.o mmc_sysfs.o +mmc_core-y := mmc.o mmc_sysfs.o +mmc_core-$(CONFIG_BLOCK) += mmc_queue.o ifeq ($(CONFIG_MMC_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/drivers/mmc/mmc_queue.c b/drivers/mmc/mmc_queue.c index 74f8cdeeff0f..4ccdd82b680f 100644 --- a/drivers/mmc/mmc_queue.c +++ b/drivers/mmc/mmc_queue.c @@ -28,7 +28,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req) struct mmc_queue *mq = q->queuedata; int ret = BLKPREP_KILL; - if (req->flags & REQ_SPECIAL) { + if (blk_special_request(req)) { /* * Special commands already have the command * blocks already setup in req->special. @@ -36,7 +36,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req) BUG_ON(!req->special); ret = BLKPREP_OK; - } else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) { + } else if (blk_fs_request(req) || blk_pc_request(req)) { /* * Block I/O requests need translating according * to the protocol. @@ -50,7 +50,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req) } if (ret == BLKPREP_OK) - req->flags |= REQ_DONTPREP; + req->cmd_flags |= REQ_DONTPREP; return ret; } diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig index a03e862851db..a304b34c2632 100644 --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig @@ -166,7 +166,7 @@ config MTD_CHAR config MTD_BLOCK tristate "Caching block device access to MTD devices" - depends on MTD + depends on MTD && BLOCK ---help--- Although most flash chips have an erase size too large to be useful as block devices, it is possible to use MTD devices which are based @@ -188,7 +188,7 @@ config MTD_BLOCK config MTD_BLOCK_RO tristate "Readonly block device access to MTD devices" - depends on MTD_BLOCK!=y && MTD + depends on MTD_BLOCK!=y && MTD && BLOCK help This allows you to mount read-only file systems (such as cramfs) from an MTD device, without the overhead (and danger) of the caching @@ -199,7 +199,7 @@ config MTD_BLOCK_RO config FTL tristate "FTL (Flash Translation Layer) support" - depends on MTD + depends on MTD && BLOCK ---help--- This provides support for the original Flash Translation Layer which is part of the PCMCIA specification. It uses a kind of pseudo- @@ -215,7 +215,7 @@ config FTL config NFTL tristate "NFTL (NAND Flash Translation Layer) support" - depends on MTD + depends on MTD && BLOCK ---help--- This provides support for the NAND Flash Translation Layer which is used on M-Systems' DiskOnChip devices. It uses a kind of pseudo- @@ -238,7 +238,7 @@ config NFTL_RW config INFTL tristate "INFTL (Inverse NAND Flash Translation Layer) support" - depends on MTD + depends on MTD && BLOCK ---help--- This provides support for the Inverse NAND Flash Translation Layer which is used on M-Systems' newer DiskOnChip devices. It @@ -255,7 +255,7 @@ config INFTL config RFD_FTL tristate "Resident Flash Disk (Flash Translation Layer) support" - depends on MTD + depends on MTD && BLOCK ---help--- This provides support for the flash translation layer known as the Resident Flash Disk (RFD), as used by the Embedded BIOS diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig index 16c02b5ccf7e..440f6851da69 100644 --- a/drivers/mtd/devices/Kconfig +++ b/drivers/mtd/devices/Kconfig @@ -136,7 +136,7 @@ config MTDRAM_ABS_POS config MTD_BLOCK2MTD tristate "MTD using block device" - depends on MTD + depends on MTD && BLOCK help This driver allows a block device to appear as an MTD. It would generally be used in the following cases: diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 458d3c8ae1ee..6baf5fe14230 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -46,7 +46,7 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, nsect = req->current_nr_sectors; buf = req->buffer; - if (!(req->flags & REQ_CMD)) + if (!blk_fs_request(req)) return 0; if (block + nsect > get_capacity(req->rq_disk)) diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index 929d6fff6152..b250c5354503 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -1,4 +1,4 @@ -if S390 +if S390 && BLOCK comment "S/390 block device drivers" depends on S390 diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 9d051e5687ea..222a8a71a5e8 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -529,7 +529,7 @@ dasd_diag_build_cp(struct dasd_device * device, struct request *req) } cqr->retries = DIAG_MAX_RETRIES; cqr->buildclk = get_clock(); - if (req->flags & REQ_FAILFAST) + if (req->cmd_flags & REQ_FAILFAST) set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); cqr->device = device; cqr->expires = DIAG_TIMEOUT; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index b7a7fac3f7c3..5ecea3e4fdef 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1266,7 +1266,7 @@ dasd_eckd_build_cp(struct dasd_device * device, struct request *req) recid++; } } - if (req->flags & REQ_FAILFAST) + if (req->cmd_flags & REQ_FAILFAST) set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); cqr->device = device; cqr->expires = 5 * 60 * HZ; /* 5 minutes */ diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index e85015be109b..80926c548228 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -344,7 +344,7 @@ dasd_fba_build_cp(struct dasd_device * device, struct request *req) recid++; } } - if (req->flags & REQ_FAILFAST) + if (req->cmd_flags & REQ_FAILFAST) set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); cqr->device = device; cqr->expires = 5 * 60 * HZ; /* 5 minutes */ diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index c4dfcc91ddda..dab082002e6f 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -3,11 +3,13 @@ menu "SCSI device support" config RAID_ATTRS tristate "RAID Transport Class" default n + depends on BLOCK ---help--- Provides RAID config SCSI tristate "SCSI device support" + depends on BLOCK ---help--- If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or any other SCSI device under Linux, say Y and make sure that you know diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c index 5dcef48d414f..10353379a074 100644 --- a/drivers/scsi/aic7xxx_old.c +++ b/drivers/scsi/aic7xxx_old.c @@ -2862,7 +2862,7 @@ aic7xxx_done(struct aic7xxx_host *p, struct aic7xxx_scb *scb) aic_dev->r_total++; ptr = aic_dev->r_bins; } - if(cmd->device->simple_tags && cmd->request->flags & REQ_HARDBARRIER) + if(cmd->device->simple_tags && cmd->request->cmd_flags & REQ_HARDBARRIER) { aic_dev->barrier_total++; if(scb->tag_action == MSG_ORDERED_Q_TAG) @@ -10158,7 +10158,7 @@ aic7xxx_buildscb(struct aic7xxx_host *p, Scsi_Cmnd *cmd, /* We always force TEST_UNIT_READY to untagged */ if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags) { - if (req->flags & REQ_HARDBARRIER) + if (req->cmd_flags & REQ_HARDBARRIER) { if(sdptr->ordered_tags) { diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 94d1de55607f..1427a41e8441 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -344,7 +344,7 @@ static int idescsi_check_condition(ide_drive_t *drive, struct request *failed_co pc->buffer = buf; pc->c[0] = REQUEST_SENSE; pc->c[4] = pc->request_transfer = pc->buffer_size = SCSI_SENSE_BUFFERSIZE; - rq->flags = REQ_SENSE; + rq->cmd_type = REQ_TYPE_SENSE; pc->timeout = jiffies + WAIT_READY; /* NOTE! Save the failed packet command in "rq->buffer" */ rq->buffer = (void *) failed_command->special; @@ -398,12 +398,12 @@ static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs) int errors = rq->errors; unsigned long flags; - if (!(rq->flags & (REQ_SPECIAL|REQ_SENSE))) { + if (!blk_special_request(rq) && !blk_sense_request(rq)) { ide_end_request(drive, uptodate, nrsecs); return 0; } ide_end_drive_cmd (drive, 0, 0); - if (rq->flags & REQ_SENSE) { + if (blk_sense_request(rq)) { idescsi_pc_t *opc = (idescsi_pc_t *) rq->buffer; if (log) { printk ("ide-scsi: %s: wrap up check %lu, rst = ", drive->name, opc->scsi_cmd->serial_number); @@ -708,11 +708,11 @@ static ide_startstop_t idescsi_issue_pc (ide_drive_t *drive, idescsi_pc_t *pc) static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block) { #if IDESCSI_DEBUG_LOG - printk (KERN_INFO "rq_status: %d, dev: %s, cmd: %x, errors: %d\n",rq->rq_status, rq->rq_disk->disk_name,rq->cmd[0],rq->errors); + printk (KERN_INFO "dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name,rq->cmd[0],rq->errors); printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors); #endif /* IDESCSI_DEBUG_LOG */ - if (rq->flags & (REQ_SPECIAL|REQ_SENSE)) { + if (blk_sense_request(rq) || blk_special_request(rq)) { return idescsi_issue_pc (drive, (idescsi_pc_t *) rq->special); } blk_dump_rq_flags(rq, "ide-scsi: unsup command"); @@ -938,7 +938,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd, ide_init_drive_cmd (rq); rq->special = (char *) pc; - rq->flags = REQ_SPECIAL; + rq->cmd_type = REQ_TYPE_SPECIAL; spin_unlock_irq(host->host_lock); rq->rq_disk = scsi->disk; (void) ide_do_drive_cmd (drive, rq, ide_end); @@ -992,7 +992,7 @@ static int idescsi_eh_abort (struct scsi_cmnd *cmd) */ printk (KERN_ERR "ide-scsi: cmd aborted!\n"); - if (scsi->pc->rq->flags & REQ_SENSE) + if (blk_sense_request(scsi->pc->rq)) kfree(scsi->pc->buffer); kfree(scsi->pc->rq); kfree(scsi->pc); @@ -1042,7 +1042,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd) /* kill current request */ blkdev_dequeue_request(req); end_that_request_last(req, 0); - if (req->flags & REQ_SENSE) + if (blk_sense_request(req)) kfree(scsi->pc->buffer); kfree(scsi->pc); scsi->pc = NULL; diff --git a/drivers/scsi/pluto.c b/drivers/scsi/pluto.c index 0bd9c60e6455..aa60a5f1fbc3 100644 --- a/drivers/scsi/pluto.c +++ b/drivers/scsi/pluto.c @@ -67,7 +67,6 @@ static void __init pluto_detect_done(Scsi_Cmnd *SCpnt) static void __init pluto_detect_scsi_done(Scsi_Cmnd *SCpnt) { - SCpnt->request->rq_status = RQ_SCSI_DONE; PLND(("Detect done %08lx\n", (long)SCpnt)) if (atomic_dec_and_test (&fcss)) up(&fc_sem); @@ -166,7 +165,7 @@ int __init pluto_detect(struct scsi_host_template *tpnt) SCpnt->cmd_len = COMMAND_SIZE(INQUIRY); - SCpnt->request->rq_status = RQ_SCSI_BUSY; + SCpnt->request->cmd_flags &= ~REQ_STARTED; SCpnt->done = pluto_detect_done; SCpnt->request_bufflen = 256; @@ -178,7 +177,8 @@ int __init pluto_detect(struct scsi_host_template *tpnt) for (retry = 0; retry < 5; retry++) { for (i = 0; i < fcscount; i++) { if (!fcs[i].fc) break; - if (fcs[i].cmd.request->rq_status != RQ_SCSI_DONE) { + if (!(fcs[i].cmd.request->cmd_flags & REQ_STARTED)) { + fcs[i].cmd.request->cmd_flags |= REQ_STARTED; disable_irq(fcs[i].fc->irq); PLND(("queuecommand %d %d\n", retry, i)) fcp_scsi_queuecommand (&(fcs[i].cmd), diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 7a054f9d1ee3..da95bce907dd 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -592,12 +592,6 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) return rtn; } - -/* - * Per-CPU I/O completion queue. - */ -static DEFINE_PER_CPU(struct list_head, scsi_done_q); - /** * scsi_req_abort_cmd -- Request command recovery for the specified command * cmd: pointer to the SCSI command of interest @@ -1065,7 +1059,7 @@ int scsi_device_cancel(struct scsi_device *sdev, int recovery) spin_lock_irqsave(&sdev->list_lock, flags); list_for_each_entry(scmd, &sdev->cmd_list, list) { - if (scmd->request && scmd->request->rq_status != RQ_INACTIVE) { + if (scmd->request) { /* * If we are unable to remove the timer, it means * that the command has already timed out or @@ -1102,7 +1096,7 @@ MODULE_PARM_DESC(scsi_logging_level, "a bit mask of logging levels"); static int __init init_scsi(void) { - int error, i; + int error; error = scsi_init_queue(); if (error) @@ -1123,9 +1117,6 @@ static int __init init_scsi(void) if (error) goto cleanup_sysctl; - for_each_possible_cpu(i) - INIT_LIST_HEAD(&per_cpu(scsi_done_q, i)); - scsi_netlink_init(); printk(KERN_NOTICE "SCSI subsystem initialized\n"); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d6743b959a72..71084728eb42 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -82,7 +82,7 @@ static void scsi_unprep_request(struct request *req) { struct scsi_cmnd *cmd = req->special; - req->flags &= ~REQ_DONTPREP; + req->cmd_flags &= ~REQ_DONTPREP; req->special = NULL; scsi_put_command(cmd); @@ -196,7 +196,8 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, req->sense_len = 0; req->retries = retries; req->timeout = timeout; - req->flags |= flags | REQ_BLOCK_PC | REQ_SPECIAL | REQ_QUIET; + req->cmd_type = REQ_TYPE_BLOCK_PC; + req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT; /* * head injection *required* here otherwise quiesce won't work @@ -397,7 +398,8 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd, req = blk_get_request(sdev->request_queue, write, gfp); if (!req) goto free_sense; - req->flags |= REQ_BLOCK_PC | REQ_QUIET; + req->cmd_type = REQ_TYPE_BLOCK_PC; + req->cmd_flags |= REQ_QUIET; if (use_sg) err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp); @@ -933,7 +935,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) break; } } - if (!(req->flags & REQ_QUIET)) { + if (!(req->cmd_flags & REQ_QUIET)) { scmd_printk(KERN_INFO, cmd, "Device not ready: "); scsi_print_sense_hdr("", &sshdr); @@ -941,7 +943,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) scsi_end_request(cmd, 0, this_count, 1); return; case VOLUME_OVERFLOW: - if (!(req->flags & REQ_QUIET)) { + if (!(req->cmd_flags & REQ_QUIET)) { scmd_printk(KERN_INFO, cmd, "Volume overflow, CDB: "); __scsi_print_command(cmd->cmnd); @@ -963,7 +965,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) return; } if (result) { - if (!(req->flags & REQ_QUIET)) { + if (!(req->cmd_flags & REQ_QUIET)) { scmd_printk(KERN_INFO, cmd, "SCSI error: return code = 0x%08x\n", result); @@ -995,7 +997,7 @@ static int scsi_init_io(struct scsi_cmnd *cmd) /* * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer */ - if ((req->flags & REQ_BLOCK_PC) && !req->bio) { + if (blk_pc_request(req) && !req->bio) { cmd->request_bufflen = req->data_len; cmd->request_buffer = req->data; req->buffer = req->data; @@ -1139,13 +1141,12 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) * these two cases differently. We differentiate by looking * at request->cmd, as this tells us the real story. */ - if (req->flags & REQ_SPECIAL && req->special) { + if (blk_special_request(req) && req->special) cmd = req->special; - } else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) { - - if(unlikely(specials_only) && !(req->flags & REQ_SPECIAL)) { - if(specials_only == SDEV_QUIESCE || - specials_only == SDEV_BLOCK) + else if (blk_pc_request(req) || blk_fs_request(req)) { + if (unlikely(specials_only) && !(req->cmd_flags & REQ_PREEMPT)){ + if (specials_only == SDEV_QUIESCE || + specials_only == SDEV_BLOCK) goto defer; sdev_printk(KERN_ERR, sdev, @@ -1153,7 +1154,6 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) goto kill; } - /* * Now try and find a command block that we can use. */ @@ -1184,7 +1184,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) * lock. We hope REQ_STARTED prevents anything untoward from * happening now. */ - if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) { + if (blk_fs_request(req) || blk_pc_request(req)) { int ret; /* @@ -1216,7 +1216,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) /* * Initialize the actual SCSI command for this request. */ - if (req->flags & REQ_BLOCK_PC) { + if (blk_pc_request(req)) { scsi_setup_blk_pc_cmnd(cmd); } else if (req->rq_disk) { struct scsi_driver *drv; @@ -1233,7 +1233,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) /* * The request is now prepped, no need to come back here */ - req->flags |= REQ_DONTPREP; + req->cmd_flags |= REQ_DONTPREP; return BLKPREP_OK; defer: @@ -1454,8 +1454,9 @@ static void scsi_request_fn(struct request_queue *q) if (unlikely(cmd == NULL)) { printk(KERN_CRIT "impossible request in %s.\n" "please mail a stack trace to " - "linux-scsi@vger.kernel.org", + "linux-scsi@vger.kernel.org\n", __FUNCTION__); + blk_dump_rq_flags(req, "foo"); BUG(); } spin_lock(shost->host_lock); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 638cff41d436..10bc99c911fa 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -443,8 +443,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt) SCpnt->cmnd[0] = READ_6; SCpnt->sc_data_direction = DMA_FROM_DEVICE; } else { - printk(KERN_ERR "sd: Unknown command %lx\n", rq->flags); -/* overkill panic("Unknown sd command %lx\n", rq->flags); */ + printk(KERN_ERR "sd: Unknown command %x\n", rq->cmd_flags); return 0; } @@ -840,7 +839,7 @@ static int sd_issue_flush(struct device *dev, sector_t *error_sector) static void sd_prepare_flush(request_queue_t *q, struct request *rq) { memset(rq->cmd, 0, sizeof(rq->cmd)); - rq->flags |= REQ_BLOCK_PC; + rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->timeout = SD_TIMEOUT; rq->cmd[0] = SYNCHRONIZE_CACHE; rq->cmd_len = 10; diff --git a/drivers/scsi/sun3_NCR5380.c b/drivers/scsi/sun3_NCR5380.c index 2f8073b73bf3..7f9bcef6adfa 100644 --- a/drivers/scsi/sun3_NCR5380.c +++ b/drivers/scsi/sun3_NCR5380.c @@ -2017,7 +2017,7 @@ static void NCR5380_information_transfer (struct Scsi_Host *instance) if((count > SUN3_DMA_MINSIZE) && (sun3_dma_setup_done != cmd)) { - if(cmd->request->flags & REQ_CMD) { + if(blk_fs_request(cmd->request)) { sun3scsi_dma_setup(d, count, rq_data_dir(cmd->request)); sun3_dma_setup_done = cmd; diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c index 837173415d4c..44a99aeb8180 100644 --- a/drivers/scsi/sun3_scsi.c +++ b/drivers/scsi/sun3_scsi.c @@ -524,7 +524,7 @@ static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance) static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd, int write_flag) { - if(cmd->request->flags & REQ_CMD) + if(blk_fs_request(cmd->request)) return wanted; else return 0; diff --git a/drivers/scsi/sun3_scsi_vme.c b/drivers/scsi/sun3_scsi_vme.c index 008a82ab8521..f5742b84b27a 100644 --- a/drivers/scsi/sun3_scsi_vme.c +++ b/drivers/scsi/sun3_scsi_vme.c @@ -458,7 +458,7 @@ static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance) static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd, int write_flag) { - if(cmd->request->flags & REQ_CMD) + if(blk_fs_request(cmd->request)) return wanted; else return 0; diff --git a/drivers/usb/storage/Kconfig b/drivers/usb/storage/Kconfig index 86e48c42d6af..422a4b288e34 100644 --- a/drivers/usb/storage/Kconfig +++ b/drivers/usb/storage/Kconfig @@ -8,8 +8,7 @@ comment "may also be needed; see USB_STORAGE Help for more information" config USB_STORAGE tristate "USB Mass Storage support" - depends on USB - select SCSI + depends on USB && SCSI ---help--- Say Y here if you want to connect USB mass storage devices to your computer's USB port. This is the driver you need for USB @@ -18,7 +17,7 @@ config USB_STORAGE similar devices. This driver may also be used for some cameras and card readers. - This option 'selects' (turns on, enables) 'SCSI', but you + This option depends on 'SCSI' support being enabled, but you probably also need 'SCSI device support: SCSI disk support' (BLK_DEV_SD) for most USB storage devices. diff --git a/fs/Kconfig b/fs/Kconfig index 4fd9efac29ab..1453d2d164f7 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -4,6 +4,8 @@ menu "File systems" +if BLOCK + config EXT2_FS tristate "Second extended fs support" help @@ -399,6 +401,8 @@ config ROMFS_FS If you don't know whether you need it, then you don't need it: answer N. +endif + config INOTIFY bool "Inotify file change notification support" default y @@ -530,6 +534,7 @@ config FUSE_FS If you want to develop a userspace FS, or if you want to use a filesystem based on FUSE, answer Y or M. +if BLOCK menu "CD-ROM/DVD Filesystems" config ISO9660_FS @@ -597,7 +602,9 @@ config UDF_NLS depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y) endmenu +endif +if BLOCK menu "DOS/FAT/NT Filesystems" config FAT_FS @@ -782,6 +789,7 @@ config NTFS_RW It is perfectly safe to say N here. endmenu +endif menu "Pseudo filesystems" @@ -939,7 +947,7 @@ menu "Miscellaneous filesystems" config ADFS_FS tristate "ADFS file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL help The Acorn Disc Filing System is the standard file system of the RiscOS operating system which runs on Acorn's ARM-based Risc PC @@ -967,7 +975,7 @@ config ADFS_FS_RW config AFFS_FS tristate "Amiga FFS file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL help The Fast File System (FFS) is the common file system used on hard disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20). Say Y @@ -989,7 +997,7 @@ config AFFS_FS config HFS_FS tristate "Apple Macintosh file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL select NLS help If you say Y here, you will be able to mount Macintosh-formatted @@ -1002,6 +1010,7 @@ config HFS_FS config HFSPLUS_FS tristate "Apple Extended HFS file system support" + depends on BLOCK select NLS select NLS_UTF8 help @@ -1015,7 +1024,7 @@ config HFSPLUS_FS config BEFS_FS tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL select NLS help The BeOS File System (BeFS) is the native file system of Be, Inc's @@ -1042,7 +1051,7 @@ config BEFS_DEBUG config BFS_FS tristate "BFS file system support (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL help Boot File System (BFS) is a file system used under SCO UnixWare to allow the bootloader access to the kernel image and other important @@ -1064,7 +1073,7 @@ config BFS_FS config EFS_FS tristate "EFS file system support (read only) (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on BLOCK && EXPERIMENTAL help EFS is an older file system used for non-ISO9660 CD-ROMs and hard disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer @@ -1079,7 +1088,7 @@ config EFS_FS config JFFS_FS tristate "Journalling Flash File System (JFFS) support" - depends on MTD + depends on MTD && BLOCK help JFFS is the Journaling Flash File System developed by Axis Communications in Sweden, aimed at providing a crash/powerdown-safe @@ -1264,6 +1273,7 @@ endchoice config CRAMFS tristate "Compressed ROM file system support (cramfs)" + depends on BLOCK select ZLIB_INFLATE help Saying Y here includes support for CramFs (Compressed ROM File @@ -1283,6 +1293,7 @@ config CRAMFS config VXFS_FS tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)" + depends on BLOCK help FreeVxFS is a file system driver that support the VERITAS VxFS(TM) file system format. VERITAS VxFS(TM) is the standard file system @@ -1300,6 +1311,7 @@ config VXFS_FS config HPFS_FS tristate "OS/2 HPFS file system support" + depends on BLOCK help OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS is the file system used for organizing files on OS/2 hard disk @@ -1316,6 +1328,7 @@ config HPFS_FS config QNX4FS_FS tristate "QNX4 file system support (read only)" + depends on BLOCK help This is the file system used by the real-time operating systems QNX 4 and QNX 6 (the latter is also called QNX RTP). @@ -1343,6 +1356,7 @@ config QNX4FS_RW config SYSV_FS tristate "System V/Xenix/V7/Coherent file system support" + depends on BLOCK help SCO, Xenix and Coherent are commercial Unix systems for Intel machines, and Version 7 was used on the DEC PDP-11. Saying Y @@ -1381,6 +1395,7 @@ config SYSV_FS config UFS_FS tristate "UFS file system support (read only)" + depends on BLOCK help BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD, OpenBSD and NeXTstep) use a file system called UFS. Some System V @@ -1959,11 +1974,13 @@ config GENERIC_ACL endmenu +if BLOCK menu "Partition Types" source "fs/partitions/Kconfig" endmenu +endif source "fs/nls/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index 46b8cfe497b2..a503e6ce0f32 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -5,12 +5,18 @@ # Rewritten to use lists instead of if-statements. # -obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ - block_dev.o char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ +obj-y := open.o read_write.o file_table.o super.o \ + char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ - seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ - ioprio.o pnode.o drop_caches.o splice.o sync.o + seq_file.o xattr.o libfs.o fs-writeback.o \ + pnode.o drop_caches.o splice.o sync.o + +ifeq ($(CONFIG_BLOCK),y) +obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o +else +obj-y += no-block.o +endif obj-$(CONFIG_INOTIFY) += inotify.o obj-$(CONFIG_INOTIFY_USER) += inotify_user.o diff --git a/fs/afs/file.c b/fs/afs/file.c index 67d6634101fd..2e8c42639eaa 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -16,7 +16,6 @@ #include #include #include -#include #include "volume.h" #include "vnode.h" #include @@ -37,7 +36,6 @@ struct inode_operations afs_file_inode_operations = { const struct address_space_operations afs_fs_aops = { .readpage = afs_file_readpage, - .sync_page = block_sync_page, .set_page_dirty = __set_page_dirty_nobuffers, .releasepage = afs_file_releasepage, .invalidatepage = afs_file_invalidatepage, diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6eb48e1446ec..bad52433de69 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -46,7 +46,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); static int load_elf_library(struct file *); static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); -extern int dump_fpu (struct pt_regs *, elf_fpregset_t *); #ifndef elf_addr_t #define elf_addr_t unsigned long diff --git a/fs/bio.c b/fs/bio.c index 6a0b9ad8f8c9..8f93e939f213 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2001 Jens Axboe + * Copyright (C) 2001 Jens Axboe * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1142,7 +1142,7 @@ static int biovec_create_pools(struct bio_set *bs, int pool_entries, int scale) struct biovec_slab *bp = bvec_slabs + i; mempool_t **bvp = bs->bvec_pools + i; - if (i >= scale) + if (pool_entries > 1 && i >= scale) pool_entries >>= 1; *bvp = mempool_create_slab_pool(pool_entries, bp->slab); diff --git a/fs/block_dev.c b/fs/block_dev.c index 4346468139e8..0c361ea7e5a6 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -17,11 +17,13 @@ #include #include #include +#include #include #include #include #include #include +#include "internal.h" struct bdev_inode { struct block_device bdev; @@ -1313,3 +1315,24 @@ void close_bdev_excl(struct block_device *bdev) } EXPORT_SYMBOL(close_bdev_excl); + +int __invalidate_device(struct block_device *bdev) +{ + struct super_block *sb = get_super(bdev); + int res = 0; + + if (sb) { + /* + * no need to lock the super, get_super holds the + * read mutex so the filesystem cannot go away + * under us (->put_super runs with the write lock + * hold). + */ + shrink_dcache_sb(sb); + res = invalidate_inodes(sb); + drop_super(sb); + } + invalidate_bdev(bdev, 0); + return res; +} +EXPORT_SYMBOL(__invalidate_device); diff --git a/fs/buffer.c b/fs/buffer.c index 3b6d701073e7..16cfbcd254f1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -159,31 +159,6 @@ int sync_blockdev(struct block_device *bdev) } EXPORT_SYMBOL(sync_blockdev); -static void __fsync_super(struct super_block *sb) -{ - sync_inodes_sb(sb, 0); - DQUOT_SYNC(sb); - lock_super(sb); - if (sb->s_dirt && sb->s_op->write_super) - sb->s_op->write_super(sb); - unlock_super(sb); - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - sync_blockdev(sb->s_bdev); - sync_inodes_sb(sb, 1); -} - -/* - * Write out and wait upon all dirty data associated with this - * superblock. Filesystem data as well as the underlying block - * device. Takes the superblock lock. - */ -int fsync_super(struct super_block *sb) -{ - __fsync_super(sb); - return sync_blockdev(sb->s_bdev); -} - /* * Write out and wait upon all dirty data associated with this * device. Filesystem data as well as the underlying block @@ -259,118 +234,6 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb) } EXPORT_SYMBOL(thaw_bdev); -/* - * sync everything. Start out by waking pdflush, because that writes back - * all queues in parallel. - */ -static void do_sync(unsigned long wait) -{ - wakeup_pdflush(0); - sync_inodes(0); /* All mappings, inodes and their blockdevs */ - DQUOT_SYNC(NULL); - sync_supers(); /* Write the superblocks */ - sync_filesystems(0); /* Start syncing the filesystems */ - sync_filesystems(wait); /* Waitingly sync the filesystems */ - sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */ - if (!wait) - printk("Emergency Sync complete\n"); - if (unlikely(laptop_mode)) - laptop_sync_completion(); -} - -asmlinkage long sys_sync(void) -{ - do_sync(1); - return 0; -} - -void emergency_sync(void) -{ - pdflush_operation(do_sync, 0); -} - -/* - * Generic function to fsync a file. - * - * filp may be NULL if called via the msync of a vma. - */ - -int file_fsync(struct file *filp, struct dentry *dentry, int datasync) -{ - struct inode * inode = dentry->d_inode; - struct super_block * sb; - int ret, err; - - /* sync the inode to buffers */ - ret = write_inode_now(inode, 0); - - /* sync the superblock to buffers */ - sb = inode->i_sb; - lock_super(sb); - if (sb->s_op->write_super) - sb->s_op->write_super(sb); - unlock_super(sb); - - /* .. finally sync the buffers to disk */ - err = sync_blockdev(sb->s_bdev); - if (!ret) - ret = err; - return ret; -} - -long do_fsync(struct file *file, int datasync) -{ - int ret; - int err; - struct address_space *mapping = file->f_mapping; - - if (!file->f_op || !file->f_op->fsync) { - /* Why? We can still call filemap_fdatawrite */ - ret = -EINVAL; - goto out; - } - - ret = filemap_fdatawrite(mapping); - - /* - * We need to protect against concurrent writers, which could cause - * livelocks in fsync_buffers_list(). - */ - mutex_lock(&mapping->host->i_mutex); - err = file->f_op->fsync(file, file->f_dentry, datasync); - if (!ret) - ret = err; - mutex_unlock(&mapping->host->i_mutex); - err = filemap_fdatawait(mapping); - if (!ret) - ret = err; -out: - return ret; -} - -static long __do_fsync(unsigned int fd, int datasync) -{ - struct file *file; - int ret = -EBADF; - - file = fget(fd); - if (file) { - ret = do_fsync(file, datasync); - fput(file); - } - return ret; -} - -asmlinkage long sys_fsync(unsigned int fd) -{ - return __do_fsync(fd, 0); -} - -asmlinkage long sys_fdatasync(unsigned int fd) -{ - return __do_fsync(fd, 1); -} - /* * Various filesystems appear to want __find_get_block to be non-blocking. * But it's the page lock which protects the buffers. To get around this, @@ -1550,35 +1413,6 @@ static void discard_buffer(struct buffer_head * bh) unlock_buffer(bh); } -/** - * try_to_release_page() - release old fs-specific metadata on a page - * - * @page: the page which the kernel is trying to free - * @gfp_mask: memory allocation flags (and I/O mode) - * - * The address_space is to try to release any data against the page - * (presumably at page->private). If the release was successful, return `1'. - * Otherwise return zero. - * - * The @gfp_mask argument specifies whether I/O may be performed to release - * this page (__GFP_IO), and whether the call may block (__GFP_WAIT). - * - * NOTE: @gfp_mask may go away, and this function may become non-blocking. - */ -int try_to_release_page(struct page *page, gfp_t gfp_mask) -{ - struct address_space * const mapping = page->mapping; - - BUG_ON(!PageLocked(page)); - if (PageWriteback(page)) - return 0; - - if (mapping && mapping->a_ops->releasepage) - return mapping->a_ops->releasepage(page, gfp_mask); - return try_to_free_buffers(page); -} -EXPORT_SYMBOL(try_to_release_page); - /** * block_invalidatepage - invalidate part of all of a buffer-backed page * @@ -1630,14 +1464,6 @@ out: } EXPORT_SYMBOL(block_invalidatepage); -void do_invalidatepage(struct page *page, unsigned long offset) -{ - void (*invalidatepage)(struct page *, unsigned long); - invalidatepage = page->mapping->a_ops->invalidatepage ? : - block_invalidatepage; - (*invalidatepage)(page, offset); -} - /* * We attach and possibly dirty the buffers atomically wrt * __set_page_dirty_buffers() via private_lock. try_to_free_buffers diff --git a/fs/char_dev.c b/fs/char_dev.c index 1f3285affa39..a885f46ca001 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -24,6 +24,7 @@ #ifdef CONFIG_KMOD #include #endif +#include "internal.h" /* * capabilities for /dev/mem, /dev/kmem and similar directly mappable character diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ddb012a68023..976a691c5a68 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b88147c1dc27..05f874c7441b 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -19,7 +19,6 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include -#include #include #include #include diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index b0ea6687ab55..e34c7db00f6f 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -22,7 +22,6 @@ */ #include -#include #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" @@ -74,7 +73,7 @@ int cifs_ioctl (struct inode * inode, struct file * filep, } break; #ifdef CONFIG_CIFS_POSIX - case EXT2_IOC_GETFLAGS: + case FS_IOC_GETFLAGS: if(CIFS_UNIX_EXTATTR_CAP & caps) { if (pSMBFile == NULL) break; @@ -82,12 +81,12 @@ int cifs_ioctl (struct inode * inode, struct file * filep, &ExtAttrBits, &ExtAttrMask); if(rc == 0) rc = put_user(ExtAttrBits & - EXT2_FL_USER_VISIBLE, + FS_FL_USER_VISIBLE, (int __user *)arg); } break; - case EXT2_IOC_SETFLAGS: + case FS_IOC_SETFLAGS: if(CIFS_UNIX_EXTATTR_CAP & caps) { if(get_user(ExtAttrBits,(int __user *)arg)) { rc = -EFAULT; diff --git a/fs/compat.c b/fs/compat.c index ce982f6e8c80..122b4e3992b5 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -52,11 +52,12 @@ #include #include #include - -extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); +#include "internal.h" int compat_log = 1; +extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); + int compat_printk(const char *fmt, ...) { va_list ap; @@ -313,9 +314,6 @@ out: #define IOCTL_HASHSIZE 256 static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE]; -extern struct ioctl_trans ioctl_start[]; -extern int ioctl_table_size; - static inline unsigned long ioctl32_hash(unsigned long cmd) { return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE; @@ -838,8 +836,6 @@ static int do_nfs4_super_data_conv(void *raw_data) return 0; } -extern int copy_mount_options (const void __user *, unsigned long *); - #define SMBFS_NAME "smbfs" #define NCPFS_NAME "ncpfs" #define NFS4_NAME "nfs4" diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 4063a9396977..64b34533edea 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -40,15 +40,11 @@ #include #include #include -#include #include #include #include #include #include -#include -#include -#include #include #include #include @@ -60,7 +56,6 @@ #include #include #include -#include #include #include #include @@ -113,7 +108,6 @@ #include #include #include -#include #include #include @@ -124,21 +118,6 @@ #include #include -/* Aiee. Someone does not find a difference between int and long */ -#define EXT2_IOC32_GETFLAGS _IOR('f', 1, int) -#define EXT2_IOC32_SETFLAGS _IOW('f', 2, int) -#define EXT3_IOC32_GETVERSION _IOR('f', 3, int) -#define EXT3_IOC32_SETVERSION _IOW('f', 4, int) -#define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int) -#define EXT3_IOC32_SETRSVSZ _IOW('f', 6, int) -#define EXT3_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) -#ifdef CONFIG_JBD_DEBUG -#define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) -#endif - -#define EXT2_IOC32_GETVERSION _IOR('v', 1, int) -#define EXT2_IOC32_SETVERSION _IOW('v', 2, int) - static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd, unsigned long arg, struct file *f) { @@ -176,34 +155,6 @@ static int rw_long(unsigned int fd, unsigned int cmd, unsigned long arg) return err; } -static int do_ext2_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - /* These are just misnamed, they actually get/put from/to user an int */ - switch (cmd) { - case EXT2_IOC32_GETFLAGS: cmd = EXT2_IOC_GETFLAGS; break; - case EXT2_IOC32_SETFLAGS: cmd = EXT2_IOC_SETFLAGS; break; - case EXT2_IOC32_GETVERSION: cmd = EXT2_IOC_GETVERSION; break; - case EXT2_IOC32_SETVERSION: cmd = EXT2_IOC_SETVERSION; break; - } - return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg)); -} - -static int do_ext3_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - /* These are just misnamed, they actually get/put from/to user an int */ - switch (cmd) { - case EXT3_IOC32_GETVERSION: cmd = EXT3_IOC_GETVERSION; break; - case EXT3_IOC32_SETVERSION: cmd = EXT3_IOC_SETVERSION; break; - case EXT3_IOC32_GETRSVSZ: cmd = EXT3_IOC_GETRSVSZ; break; - case EXT3_IOC32_SETRSVSZ: cmd = EXT3_IOC_SETRSVSZ; break; - case EXT3_IOC32_GROUP_EXTEND: cmd = EXT3_IOC_GROUP_EXTEND; break; -#ifdef CONFIG_JBD_DEBUG - case EXT3_IOC32_WAIT_FOR_READONLY: cmd = EXT3_IOC_WAIT_FOR_READONLY; break; -#endif - } - return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg)); -} - struct compat_video_event { int32_t type; compat_time_t timestamp; @@ -694,6 +645,7 @@ out: } #endif +#ifdef CONFIG_BLOCK struct hd_geometry32 { unsigned char heads; unsigned char sectors; @@ -918,6 +870,7 @@ static int sg_grt_trans(unsigned int fd, unsigned int cmd, unsigned long arg) } return err; } +#endif /* CONFIG_BLOCK */ struct sock_fprog32 { unsigned short len; @@ -1041,6 +994,7 @@ static int ppp_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) } +#ifdef CONFIG_BLOCK struct mtget32 { compat_long_t mt_type; compat_long_t mt_resid; @@ -1213,73 +1167,7 @@ static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar return err; } - -struct loop_info32 { - compat_int_t lo_number; /* ioctl r/o */ - compat_dev_t lo_device; /* ioctl r/o */ - compat_ulong_t lo_inode; /* ioctl r/o */ - compat_dev_t lo_rdevice; /* ioctl r/o */ - compat_int_t lo_offset; - compat_int_t lo_encrypt_type; - compat_int_t lo_encrypt_key_size; /* ioctl w/o */ - compat_int_t lo_flags; /* ioctl r/o */ - char lo_name[LO_NAME_SIZE]; - unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ - compat_ulong_t lo_init[2]; - char reserved[4]; -}; - -static int loop_status(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - mm_segment_t old_fs = get_fs(); - struct loop_info l; - struct loop_info32 __user *ul; - int err = -EINVAL; - - ul = compat_ptr(arg); - switch(cmd) { - case LOOP_SET_STATUS: - err = get_user(l.lo_number, &ul->lo_number); - err |= __get_user(l.lo_device, &ul->lo_device); - err |= __get_user(l.lo_inode, &ul->lo_inode); - err |= __get_user(l.lo_rdevice, &ul->lo_rdevice); - err |= __copy_from_user(&l.lo_offset, &ul->lo_offset, - 8 + (unsigned long)l.lo_init - (unsigned long)&l.lo_offset); - if (err) { - err = -EFAULT; - } else { - set_fs (KERNEL_DS); - err = sys_ioctl (fd, cmd, (unsigned long)&l); - set_fs (old_fs); - } - break; - case LOOP_GET_STATUS: - set_fs (KERNEL_DS); - err = sys_ioctl (fd, cmd, (unsigned long)&l); - set_fs (old_fs); - if (!err) { - err = put_user(l.lo_number, &ul->lo_number); - err |= __put_user(l.lo_device, &ul->lo_device); - err |= __put_user(l.lo_inode, &ul->lo_inode); - err |= __put_user(l.lo_rdevice, &ul->lo_rdevice); - err |= __copy_to_user(&ul->lo_offset, &l.lo_offset, - (unsigned long)l.lo_init - (unsigned long)&l.lo_offset); - if (err) - err = -EFAULT; - } - break; - default: { - static int count; - if (++count <= 20) - printk("%s: Unknown loop ioctl cmd, fd(%d) " - "cmd(%08x) arg(%08lx)\n", - __FUNCTION__, fd, cmd, arg); - } - } - return err; -} - -extern int tty_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg); +#endif /* CONFIG_BLOCK */ #ifdef CONFIG_VT @@ -1607,6 +1495,7 @@ ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg) return -EINVAL; } +#ifdef CONFIG_BLOCK static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg) { /* The mkswap binary hard codes it to Intel value :-((( */ @@ -1641,12 +1530,14 @@ static int blkpg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar return sys_ioctl(fd, cmd, (unsigned long)a); } +#endif static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg) { return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg); } +#ifdef CONFIG_BLOCK /* Fix sizeof(sizeof()) breakage */ #define BLKBSZGET_32 _IOR(0x12,112,int) #define BLKBSZSET_32 _IOW(0x12,113,int) @@ -1667,6 +1558,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd, { return sys_ioctl(fd, BLKGETSIZE64, (unsigned long)compat_ptr(arg)); } +#endif /* Bluetooth ioctls */ #define HCIUARTSETPROTO _IOW('U', 200, int) @@ -1687,6 +1579,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd, #define HIDPGETCONNLIST _IOR('H', 210, int) #define HIDPGETCONNINFO _IOR('H', 211, int) +#ifdef CONFIG_BLOCK struct floppy_struct32 { compat_uint_t size; compat_uint_t sect; @@ -2011,6 +1904,7 @@ out: kfree(karg); return err; } +#endif struct mtd_oob_buf32 { u_int32_t start; @@ -2052,61 +1946,7 @@ static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg) return err; } -#define VFAT_IOCTL_READDIR_BOTH32 _IOR('r', 1, struct compat_dirent[2]) -#define VFAT_IOCTL_READDIR_SHORT32 _IOR('r', 2, struct compat_dirent[2]) - -static long -put_dirent32 (struct dirent *d, struct compat_dirent __user *d32) -{ - if (!access_ok(VERIFY_WRITE, d32, sizeof(struct compat_dirent))) - return -EFAULT; - - __put_user(d->d_ino, &d32->d_ino); - __put_user(d->d_off, &d32->d_off); - __put_user(d->d_reclen, &d32->d_reclen); - if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen)) - return -EFAULT; - - return 0; -} - -static int vfat_ioctl32(unsigned fd, unsigned cmd, unsigned long arg) -{ - struct compat_dirent __user *p = compat_ptr(arg); - int ret; - mm_segment_t oldfs = get_fs(); - struct dirent d[2]; - - switch(cmd) - { - case VFAT_IOCTL_READDIR_BOTH32: - cmd = VFAT_IOCTL_READDIR_BOTH; - break; - case VFAT_IOCTL_READDIR_SHORT32: - cmd = VFAT_IOCTL_READDIR_SHORT; - break; - } - - set_fs(KERNEL_DS); - ret = sys_ioctl(fd,cmd,(unsigned long)&d); - set_fs(oldfs); - if (ret >= 0) { - ret |= put_dirent32(&d[0], p); - ret |= put_dirent32(&d[1], p + 1); - } - return ret; -} - -#define REISERFS_IOC_UNPACK32 _IOW(0xCD,1,int) - -static int reiserfs_ioctl32(unsigned fd, unsigned cmd, unsigned long ptr) -{ - if (cmd == REISERFS_IOC_UNPACK32) - cmd = REISERFS_IOC_UNPACK; - - return sys_ioctl(fd,cmd,ptr); -} - +#ifdef CONFIG_BLOCK struct raw32_config_request { compat_int_t raw_minor; @@ -2171,6 +2011,7 @@ static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg) } return ret; } +#endif /* CONFIG_BLOCK */ struct serial_struct32 { compat_int_t type; @@ -2777,6 +2618,7 @@ HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc) HANDLE_IOCTL(SIOCRTMSG, ret_einval) HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp) #endif +#ifdef CONFIG_BLOCK HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo) HANDLE_IOCTL(BLKRAGET, w_long) HANDLE_IOCTL(BLKGETSIZE, w_long) @@ -2802,16 +2644,17 @@ HANDLE_IOCTL(FDGETFDCSTAT32, fd_ioctl_trans) HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans) HANDLE_IOCTL(SG_IO,sg_ioctl_trans) HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans) +#endif HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans) HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans) HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans) HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans) +#ifdef CONFIG_BLOCK HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans) HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans) HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans) HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans) -HANDLE_IOCTL(LOOP_SET_STATUS, loop_status) -HANDLE_IOCTL(LOOP_GET_STATUS, loop_status) +#endif #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int) HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout) #ifdef CONFIG_VT @@ -2821,19 +2664,6 @@ HANDLE_IOCTL(PIO_UNIMAP, do_unimap_ioctl) HANDLE_IOCTL(GIO_UNIMAP, do_unimap_ioctl) HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl) #endif -HANDLE_IOCTL(EXT2_IOC32_GETFLAGS, do_ext2_ioctl) -HANDLE_IOCTL(EXT2_IOC32_SETFLAGS, do_ext2_ioctl) -HANDLE_IOCTL(EXT2_IOC32_GETVERSION, do_ext2_ioctl) -HANDLE_IOCTL(EXT2_IOC32_SETVERSION, do_ext2_ioctl) -HANDLE_IOCTL(EXT3_IOC32_GETVERSION, do_ext3_ioctl) -HANDLE_IOCTL(EXT3_IOC32_SETVERSION, do_ext3_ioctl) -HANDLE_IOCTL(EXT3_IOC32_GETRSVSZ, do_ext3_ioctl) -HANDLE_IOCTL(EXT3_IOC32_SETRSVSZ, do_ext3_ioctl) -HANDLE_IOCTL(EXT3_IOC32_GROUP_EXTEND, do_ext3_ioctl) -COMPATIBLE_IOCTL(EXT3_IOC_GROUP_ADD) -#ifdef CONFIG_JBD_DEBUG -HANDLE_IOCTL(EXT3_IOC32_WAIT_FOR_READONLY, do_ext3_ioctl) -#endif /* One SMB ioctl needs translations. */ #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t) HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid) @@ -2863,16 +2693,14 @@ HANDLE_IOCTL(SONET_SETFRAMING, do_atm_ioctl) HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl) HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl) /* block stuff */ +#ifdef CONFIG_BLOCK HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget) HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset) HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64) -/* vfat */ -HANDLE_IOCTL(VFAT_IOCTL_READDIR_BOTH32, vfat_ioctl32) -HANDLE_IOCTL(VFAT_IOCTL_READDIR_SHORT32, vfat_ioctl32) -HANDLE_IOCTL(REISERFS_IOC_UNPACK32, reiserfs_ioctl32) /* Raw devices */ HANDLE_IOCTL(RAW_SETBIND, raw_ioctl) HANDLE_IOCTL(RAW_GETBIND, raw_ioctl) +#endif /* Serial */ HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl) HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl) diff --git a/fs/dcache.c b/fs/dcache.c index 17b392a2049e..fc2faa44f8d1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -32,6 +32,7 @@ #include #include #include +#include "internal.h" int sysctl_vfs_cache_pressure __read_mostly = 100; @@ -1877,9 +1878,6 @@ kmem_cache_t *filp_cachep __read_mostly; EXPORT_SYMBOL(d_genocide); -extern void bdev_cache_init(void); -extern void chrdev_init(void); - void __init vfs_caches_init_early(void) { dcache_init_early(); diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 92ea8265d7d5..3e7a84a1e509 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -661,5 +661,8 @@ const struct file_operations ext2_dir_operations = { .read = generic_read_dir, .readdir = ext2_readdir, .ioctl = ext2_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ext2_compat_ioctl, +#endif .fsync = ext2_sync_file, }; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index e65a019fc7a5..c19ac153f56b 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -137,6 +137,7 @@ extern void ext2_set_inode_flags(struct inode *inode); /* ioctl.c */ extern int ext2_ioctl (struct inode *, struct file *, unsigned int, unsigned long); +extern long ext2_compat_ioctl(struct file *, unsigned int, unsigned long); /* namei.c */ struct dentry *ext2_get_parent(struct dentry *child); diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 23e2c7ccec1d..e8bbed9dd268 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -46,6 +46,9 @@ const struct file_operations ext2_file_operations = { .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .ioctl = ext2_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ext2_compat_ioctl, +#endif .mmap = generic_file_mmap, .open = generic_file_open, .release = ext2_release_file, @@ -63,6 +66,9 @@ const struct file_operations ext2_xip_file_operations = { .read = xip_file_read, .write = xip_file_write, .ioctl = ext2_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ext2_compat_ioctl, +#endif .mmap = xip_file_mmap, .open = generic_file_open, .release = ext2_release_file, diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 3ca9afdf713d..1dfba77eab10 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include @@ -80,3 +82,33 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, return -ENOTTY; } } + +#ifdef CONFIG_COMPAT +long ext2_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = file->f_dentry->d_inode; + int ret; + + /* These are just misnamed, they actually get/put from/to user an int */ + switch (cmd) { + case EXT2_IOC32_GETFLAGS: + cmd = EXT2_IOC_GETFLAGS; + break; + case EXT2_IOC32_SETFLAGS: + cmd = EXT2_IOC_SETFLAGS; + break; + case EXT2_IOC32_GETVERSION: + cmd = EXT2_IOC_GETVERSION; + break; + case EXT2_IOC32_SETVERSION: + cmd = EXT2_IOC_SETVERSION; + break; + default: + return -ENOIOCTLCMD; + } + lock_kernel(); + ret = ext2_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); + unlock_kernel(); + return ret; +} +#endif diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 429acbb4e064..d0b54f30b914 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -44,6 +44,9 @@ const struct file_operations ext3_dir_operations = { .read = generic_read_dir, .readdir = ext3_readdir, /* we take BKL. needed?*/ .ioctl = ext3_ioctl, /* BKL held */ +#ifdef CONFIG_COMPAT + .compat_ioctl = ext3_compat_ioctl, +#endif .fsync = ext3_sync_file, /* BKL held */ #ifdef CONFIG_EXT3_INDEX .release = ext3_release_dir, diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 994efd189f4e..74ff20f9d09b 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -114,6 +114,9 @@ const struct file_operations ext3_file_operations = { .readv = generic_file_readv, .writev = generic_file_writev, .ioctl = ext3_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ext3_compat_ioctl, +#endif .mmap = generic_file_mmap, .open = generic_file_open, .release = ext3_release_file, diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index dcf4f1dd108b..03ba5bcab186 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -1073,7 +1074,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, return bh; if (buffer_uptodate(bh)) return bh; - ll_rw_block(READ, 1, &bh); + ll_rw_block(READ_META, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -2540,7 +2541,7 @@ make_io: */ get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); + submit_bh(READ_META, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { ext3_error(inode->i_sb, "ext3_get_inode_loc", diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 3a6b012d120c..12daa6869572 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -13,9 +13,10 @@ #include #include #include +#include +#include #include - int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, unsigned long arg) { @@ -252,3 +253,55 @@ flags_err: return -ENOTTY; } } + +#ifdef CONFIG_COMPAT +long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = file->f_dentry->d_inode; + int ret; + + /* These are just misnamed, they actually get/put from/to user an int */ + switch (cmd) { + case EXT3_IOC32_GETFLAGS: + cmd = EXT3_IOC_GETFLAGS; + break; + case EXT3_IOC32_SETFLAGS: + cmd = EXT3_IOC_SETFLAGS; + break; + case EXT3_IOC32_GETVERSION: + cmd = EXT3_IOC_GETVERSION; + break; + case EXT3_IOC32_SETVERSION: + cmd = EXT3_IOC_SETVERSION; + break; + case EXT3_IOC32_GROUP_EXTEND: + cmd = EXT3_IOC_GROUP_EXTEND; + break; + case EXT3_IOC32_GETVERSION_OLD: + cmd = EXT3_IOC_GETVERSION_OLD; + break; + case EXT3_IOC32_SETVERSION_OLD: + cmd = EXT3_IOC_SETVERSION_OLD; + break; +#ifdef CONFIG_JBD_DEBUG + case EXT3_IOC32_WAIT_FOR_READONLY: + cmd = EXT3_IOC_WAIT_FOR_READONLY; + break; +#endif + case EXT3_IOC32_GETRSVSZ: + cmd = EXT3_IOC_GETRSVSZ; + break; + case EXT3_IOC32_SETRSVSZ: + cmd = EXT3_IOC_SETRSVSZ; + break; + case EXT3_IOC_GROUP_ADD: + break; + default: + return -ENOIOCTLCMD; + } + lock_kernel(); + ret = ext3_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); + unlock_kernel(); + return ret; +} +#endif diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 85d132c37ee0..235e77b52ea5 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "namei.h" @@ -870,7 +871,7 @@ restart: bh = ext3_getblk(NULL, dir, b++, 0, &err); bh_use[ra_max] = bh; if (bh) - ll_rw_block(READ, 1, &bh); + ll_rw_block(READ_META, 1, &bh); } } if ((bh = bh_use[ra_ptr++]) == NULL) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 698b85bb1dd4..3e50a4166283 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -20,6 +20,7 @@ #include #include #include +#include #include static inline loff_t fat_make_i_pos(struct super_block *sb, @@ -741,10 +742,65 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp, return ret; } +#ifdef CONFIG_COMPAT +#define VFAT_IOCTL_READDIR_BOTH32 _IOR('r', 1, struct compat_dirent[2]) +#define VFAT_IOCTL_READDIR_SHORT32 _IOR('r', 2, struct compat_dirent[2]) + +static long fat_compat_put_dirent32(struct dirent *d, + struct compat_dirent __user *d32) +{ + if (!access_ok(VERIFY_WRITE, d32, sizeof(struct compat_dirent))) + return -EFAULT; + + __put_user(d->d_ino, &d32->d_ino); + __put_user(d->d_off, &d32->d_off); + __put_user(d->d_reclen, &d32->d_reclen); + if (__copy_to_user(d32->d_name, d->d_name, d->d_reclen)) + return -EFAULT; + + return 0; +} + +static long fat_compat_dir_ioctl(struct file *file, unsigned cmd, + unsigned long arg) +{ + struct compat_dirent __user *p = compat_ptr(arg); + int ret; + mm_segment_t oldfs = get_fs(); + struct dirent d[2]; + + switch (cmd) { + case VFAT_IOCTL_READDIR_BOTH32: + cmd = VFAT_IOCTL_READDIR_BOTH; + break; + case VFAT_IOCTL_READDIR_SHORT32: + cmd = VFAT_IOCTL_READDIR_SHORT; + break; + default: + return -ENOIOCTLCMD; + } + + set_fs(KERNEL_DS); + lock_kernel(); + ret = fat_dir_ioctl(file->f_dentry->d_inode, file, + cmd, (unsigned long) &d); + unlock_kernel(); + set_fs(oldfs); + if (ret >= 0) { + ret |= fat_compat_put_dirent32(&d[0], p); + ret |= fat_compat_put_dirent32(&d[1], p + 1); + } + return ret; +} +#endif /* CONFIG_COMPAT */ + const struct file_operations fat_dir_operations = { .read = generic_read_dir, .readdir = fat_readdir, .ioctl = fat_dir_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = fat_compat_dir_ioctl, +#endif .fsync = file_fsync, }; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 892643dc9af1..c403b66ec83c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -22,8 +22,7 @@ #include #include #include - -extern struct super_block *blockdev_superblock; +#include "internal.h" /** * __mark_inode_dirty - internal function @@ -320,7 +319,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) if (!bdi_cap_writeback_dirty(bdi)) { list_move(&inode->i_list, &sb->s_dirty); - if (sb == blockdev_superblock) { + if (sb_is_blkdev_sb(sb)) { /* * Dirty memory-backed blockdev: the ramdisk * driver does this. Skip just this inode @@ -337,14 +336,14 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; - if (sb != blockdev_superblock) + if (!sb_is_blkdev_sb(sb)) break; /* Skip a congested fs */ list_move(&inode->i_list, &sb->s_dirty); continue; /* Skip a congested blockdev */ } if (wbc->bdi && bdi != wbc->bdi) { - if (sb != blockdev_superblock) + if (!sb_is_blkdev_sb(sb)) break; /* fs has the wrong queue */ list_move(&inode->i_list, &sb->s_dirty); continue; /* blockdev has wrong queue */ diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 8a1ca5ef7ada..3915635b4470 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -246,12 +246,8 @@ struct hfsplus_readdir_data { /* ext2 ioctls (EXT2_IOC_GETFLAGS and EXT2_IOC_SETFLAGS) to support * chattr/lsattr */ -#define HFSPLUS_IOC_EXT2_GETFLAGS _IOR('f', 1, long) -#define HFSPLUS_IOC_EXT2_SETFLAGS _IOW('f', 2, long) - -#define EXT2_FLAG_IMMUTABLE 0x00000010 /* Immutable file */ -#define EXT2_FLAG_APPEND 0x00000020 /* writes to file may only append */ -#define EXT2_FLAG_NODUMP 0x00000040 /* do not dump file */ +#define HFSPLUS_IOC_EXT2_GETFLAGS FS_IOC_GETFLAGS +#define HFSPLUS_IOC_EXT2_SETFLAGS FS_IOC_SETFLAGS /* diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index 13cf848ac833..79fd10402ea3 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c @@ -28,11 +28,11 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, case HFSPLUS_IOC_EXT2_GETFLAGS: flags = 0; if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE) - flags |= EXT2_FLAG_IMMUTABLE; /* EXT2_IMMUTABLE_FL */ + flags |= FS_IMMUTABLE_FL; /* EXT2_IMMUTABLE_FL */ if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND) - flags |= EXT2_FLAG_APPEND; /* EXT2_APPEND_FL */ + flags |= FS_APPEND_FL; /* EXT2_APPEND_FL */ if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP) - flags |= EXT2_FLAG_NODUMP; /* EXT2_NODUMP_FL */ + flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ return put_user(flags, (int __user *)arg); case HFSPLUS_IOC_EXT2_SETFLAGS: { if (IS_RDONLY(inode)) @@ -44,32 +44,31 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, if (get_user(flags, (int __user *)arg)) return -EFAULT; - if (flags & (EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND) || + if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) || HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } /* don't silently ignore unsupported ext2 flags */ - if (flags & ~(EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND| - EXT2_FLAG_NODUMP)) + if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) return -EOPNOTSUPP; - if (flags & EXT2_FLAG_IMMUTABLE) { /* EXT2_IMMUTABLE_FL */ + if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */ inode->i_flags |= S_IMMUTABLE; HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE; } else { inode->i_flags &= ~S_IMMUTABLE; HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE; } - if (flags & EXT2_FLAG_APPEND) { /* EXT2_APPEND_FL */ + if (flags & FS_APPEND_FL) { /* EXT2_APPEND_FL */ inode->i_flags |= S_APPEND; HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND; } else { inode->i_flags &= ~S_APPEND; HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND; } - if (flags & EXT2_FLAG_NODUMP) /* EXT2_NODUMP_FL */ + if (flags & FS_NODUMP_FL) /* EXT2_NODUMP_FL */ HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP; else HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP; diff --git a/fs/inode.c b/fs/inode.c index abf77471e6c4..ada7643104e1 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -362,27 +362,6 @@ int invalidate_inodes(struct super_block * sb) } EXPORT_SYMBOL(invalidate_inodes); - -int __invalidate_device(struct block_device *bdev) -{ - struct super_block *sb = get_super(bdev); - int res = 0; - - if (sb) { - /* - * no need to lock the super, get_super holds the - * read mutex so the filesystem cannot go away - * under us (->put_super runs with the write lock - * hold). - */ - shrink_dcache_sb(sb); - res = invalidate_inodes(sb); - drop_super(sb); - } - invalidate_bdev(bdev, 0); - return res; -} -EXPORT_SYMBOL(__invalidate_device); static int can_unuse(struct inode *inode) { diff --git a/fs/internal.h b/fs/internal.h new file mode 100644 index 000000000000..ea00126c9a59 --- /dev/null +++ b/fs/internal.h @@ -0,0 +1,55 @@ +/* fs/ internal definitions + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +struct super_block; + +/* + * block_dev.c + */ +#ifdef CONFIG_BLOCK +extern struct super_block *blockdev_superblock; +extern void __init bdev_cache_init(void); + +static inline int sb_is_blkdev_sb(struct super_block *sb) +{ + return sb == blockdev_superblock; +} + +#else +static inline void bdev_cache_init(void) +{ +} + +static inline int sb_is_blkdev_sb(struct super_block *sb) +{ + return 0; +} +#endif + +/* + * char_dev.c + */ +extern void __init chrdev_init(void); + +/* + * compat_ioctl.c + */ +#ifdef CONFIG_COMPAT +extern struct ioctl_trans ioctl_start[]; +extern int ioctl_table_size; +#endif + +/* + * namespace.c + */ +extern int copy_mount_options(const void __user *, unsigned long *); diff --git a/fs/ioprio.c b/fs/ioprio.c index 78b1deae3fa2..6dc6721d9e82 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -1,7 +1,7 @@ /* * fs/ioprio.c * - * Copyright (C) 2004 Jens Axboe + * Copyright (C) 2004 Jens Axboe * * Helper functions for setting/querying io priorities of processes. The * system calls closely mimmick getpriority/setpriority, see the man page for @@ -47,8 +47,8 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) /* see wmb() in current_io_context() */ smp_read_barrier_depends(); - if (ioc && ioc->set_ioprio) - ioc->set_ioprio(ioc, ioprio); + if (ioc) + ioc->ioprio_changed = 1; task_unlock(task); return 0; @@ -81,7 +81,12 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) } ret = -ESRCH; - read_lock_irq(&tasklist_lock); + /* + * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic", + * so we can't use rcu_read_lock(). See re-copy of ->ioprio + * in copy_process(). + */ + read_lock(&tasklist_lock); switch (which) { case IOPRIO_WHO_PROCESS: if (!who) @@ -124,7 +129,7 @@ free_uid: ret = -EINVAL; } - read_unlock_irq(&tasklist_lock); + read_unlock(&tasklist_lock); return ret; } @@ -170,7 +175,7 @@ asmlinkage long sys_ioprio_get(int which, int who) int ret = -ESRCH; int tmpio; - read_lock_irq(&tasklist_lock); + read_lock(&tasklist_lock); switch (which) { case IOPRIO_WHO_PROCESS: if (!who) @@ -221,7 +226,7 @@ asmlinkage long sys_ioprio_get(int which, int who) ret = -EINVAL; } - read_unlock_irq(&tasklist_lock); + read_unlock(&tasklist_lock); return ret; } diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index 67b3774820eb..37db52488262 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include @@ -22,13 +21,13 @@ static struct { long jfs_flag; long ext2_flag; } jfs_map[] = { - {JFS_NOATIME_FL, EXT2_NOATIME_FL}, - {JFS_DIRSYNC_FL, EXT2_DIRSYNC_FL}, - {JFS_SYNC_FL, EXT2_SYNC_FL}, - {JFS_SECRM_FL, EXT2_SECRM_FL}, - {JFS_UNRM_FL, EXT2_UNRM_FL}, - {JFS_APPEND_FL, EXT2_APPEND_FL}, - {JFS_IMMUTABLE_FL, EXT2_IMMUTABLE_FL}, + {JFS_NOATIME_FL, FS_NOATIME_FL}, + {JFS_DIRSYNC_FL, FS_DIRSYNC_FL}, + {JFS_SYNC_FL, FS_SYNC_FL}, + {JFS_SECRM_FL, FS_SECRM_FL}, + {JFS_UNRM_FL, FS_UNRM_FL}, + {JFS_APPEND_FL, FS_APPEND_FL}, + {JFS_IMMUTABLE_FL, FS_IMMUTABLE_FL}, {0, 0}, }; diff --git a/fs/mpage.c b/fs/mpage.c index 1e4598247d0b..692a3e578fc8 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -693,6 +693,8 @@ out: * the call was made get new I/O started against them. If wbc->sync_mode is * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. + * + * If you fix this you should check generic_writepages() also! */ int mpage_writepages(struct address_space *mapping, diff --git a/fs/namespace.c b/fs/namespace.c index 6ede3a539ed8..66d921e14fee 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -24,12 +24,11 @@ #include #include #include +#include #include #include #include "pnode.h" -extern int __init init_rootfs(void); - /* spinlock for vfsmount related operations, inplace of dcache_lock */ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b674462793d3..f6675d2c386c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -51,7 +51,6 @@ #include #include #include -#include #include #include diff --git a/fs/no-block.c b/fs/no-block.c new file mode 100644 index 000000000000..d269a93d3467 --- /dev/null +++ b/fs/no-block.c @@ -0,0 +1,22 @@ +/* no-block.c: implementation of routines required for non-BLOCK configuration + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +static int no_blkdev_open(struct inode * inode, struct file * filp) +{ + return -ENODEV; +} + +const struct file_operations def_blk_fops = { + .open = no_blkdev_open, +}; diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile index d713ce6b3e12..67e665fdb7fc 100644 --- a/fs/partitions/Makefile +++ b/fs/partitions/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -obj-y := check.o +obj-$(CONFIG_BLOCK) := check.o obj-$(CONFIG_ACORN_PARTITION) += acorn.o obj-$(CONFIG_AMIGA_PARTITION) += amiga.o diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 5bbd60896050..66bc425f2f3d 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -277,12 +277,15 @@ static int devinfo_show(struct seq_file *f, void *v) if (i == 0) seq_printf(f, "Character devices:\n"); chrdev_show(f, i); - } else { + } +#ifdef CONFIG_BLOCK + else { i -= CHRDEV_MAJOR_HASH_SIZE; if (i == 0) seq_printf(f, "\nBlock devices:\n"); blkdev_show(f, i); } +#endif return 0; } @@ -355,6 +358,7 @@ static int stram_read_proc(char *page, char **start, off_t off, } #endif +#ifdef CONFIG_BLOCK extern struct seq_operations partitions_op; static int partitions_open(struct inode *inode, struct file *file) { @@ -378,6 +382,7 @@ static struct file_operations proc_diskstats_operations = { .llseek = seq_lseek, .release = seq_release, }; +#endif #ifdef CONFIG_MODULES extern struct seq_operations modules_op; @@ -695,7 +700,9 @@ void __init proc_misc_init(void) entry->proc_fops = &proc_kmsg_operations; create_seq_entry("devices", 0, &proc_devinfo_operations); create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); +#ifdef CONFIG_BLOCK create_seq_entry("partitions", 0, &proc_partitions_operations); +#endif create_seq_entry("stat", 0, &proc_stat_operations); create_seq_entry("interrupts", 0, &proc_interrupts_operations); #ifdef CONFIG_SLAB @@ -707,7 +714,9 @@ void __init proc_misc_init(void) create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); +#ifdef CONFIG_BLOCK create_seq_entry("diskstats", 0, &proc_diskstats_operations); +#endif #ifdef CONFIG_MODULES create_seq_entry("modules", 0, &proc_modules_operations); #endif diff --git a/fs/quota.c b/fs/quota.c index d6a2be826e29..b9dae76a0b6e 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -337,6 +337,34 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void return 0; } +/* + * look up a superblock on which quota ops will be performed + * - use the name of a block device to find the superblock thereon + */ +static inline struct super_block *quotactl_block(const char __user *special) +{ +#ifdef CONFIG_BLOCK + struct block_device *bdev; + struct super_block *sb; + char *tmp = getname(special); + + if (IS_ERR(tmp)) + return ERR_PTR(PTR_ERR(tmp)); + bdev = lookup_bdev(tmp); + putname(tmp); + if (IS_ERR(bdev)) + return ERR_PTR(PTR_ERR(bdev)); + sb = get_super(bdev); + bdput(bdev); + if (!sb) + return ERR_PTR(-ENODEV); + + return sb; +#else + return ERR_PTR(-ENODEV); +#endif +} + /* * This is the system call interface. This communicates with * the user-level programs. Currently this only supports diskquota @@ -347,25 +375,15 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t { uint cmds, type; struct super_block *sb = NULL; - struct block_device *bdev; - char *tmp; int ret; cmds = cmd >> SUBCMDSHIFT; type = cmd & SUBCMDMASK; if (cmds != Q_SYNC || special) { - tmp = getname(special); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - bdev = lookup_bdev(tmp); - putname(tmp); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); - sb = get_super(bdev); - bdput(bdev); - if (!sb) - return -ENODEV; + sb = quotactl_block(special); + if (IS_ERR(sb)) + return PTR_ERR(sb); } ret = check_quotactl_valid(sb, type, cmds, id); diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 9aabcc0ccd2d..657050ad7430 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -22,6 +22,9 @@ const struct file_operations reiserfs_dir_operations = { .readdir = reiserfs_readdir, .fsync = reiserfs_dir_fsync, .ioctl = reiserfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = reiserfs_compat_ioctl, +#endif }; static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 1cfbe857ba27..3e08f7161a3d 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -2,6 +2,7 @@ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ +#include #include #include #include @@ -1568,6 +1569,9 @@ const struct file_operations reiserfs_file_operations = { .read = generic_file_read, .write = reiserfs_file_write, .ioctl = reiserfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = reiserfs_compat_ioctl, +#endif .mmap = generic_file_mmap, .release = reiserfs_file_release, .fsync = reiserfs_sync_file, diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index a986b5e1e288..9c57578cb831 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -9,6 +9,7 @@ #include #include #include +#include static int reiserfs_unpack(struct inode *inode, struct file *filp); @@ -94,6 +95,40 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, } } +#ifdef CONFIG_COMPAT +long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct inode *inode = file->f_dentry->d_inode; + int ret; + + /* These are just misnamed, they actually get/put from/to user an int */ + switch (cmd) { + case REISERFS_IOC32_UNPACK: + cmd = REISERFS_IOC_UNPACK; + break; + case REISERFS_IOC32_GETFLAGS: + cmd = REISERFS_IOC_GETFLAGS; + break; + case REISERFS_IOC32_SETFLAGS: + cmd = REISERFS_IOC_SETFLAGS; + break; + case REISERFS_IOC32_GETVERSION: + cmd = REISERFS_IOC_GETVERSION; + break; + case REISERFS_IOC32_SETVERSION: + cmd = REISERFS_IOC_SETVERSION; + break; + default: + return -ENOIOCTLCMD; + } + lock_kernel(); + ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); + unlock_kernel(); + return ret; +} +#endif + /* ** reiserfs_unpack ** Function try to convert tail from direct item into indirect. diff --git a/fs/splice.c b/fs/splice.c index 684bca3d3a10..13e92dd19fbb 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -12,7 +12,7 @@ * Jens to support splicing to files, network, direct splicing, etc and * fixing lots of bugs. * - * Copyright (C) 2005-2006 Jens Axboe + * Copyright (C) 2005-2006 Jens Axboe * Copyright (C) 2005-2006 Linus Torvalds * Copyright (C) 2006 Ingo Molnar * diff --git a/fs/super.c b/fs/super.c index 6987824d0dce..aec99ddbe53f 100644 --- a/fs/super.c +++ b/fs/super.c @@ -220,6 +220,37 @@ static int grab_super(struct super_block *s) __releases(sb_lock) return 0; } +/* + * Write out and wait upon all dirty data associated with this + * superblock. Filesystem data as well as the underlying block + * device. Takes the superblock lock. Requires a second blkdev + * flush by the caller to complete the operation. + */ +void __fsync_super(struct super_block *sb) +{ + sync_inodes_sb(sb, 0); + DQUOT_SYNC(sb); + lock_super(sb); + if (sb->s_dirt && sb->s_op->write_super) + sb->s_op->write_super(sb); + unlock_super(sb); + if (sb->s_op->sync_fs) + sb->s_op->sync_fs(sb, 1); + sync_blockdev(sb->s_bdev); + sync_inodes_sb(sb, 1); +} + +/* + * Write out and wait upon all dirty data associated with this + * superblock. Filesystem data as well as the underlying block + * device. Takes the superblock lock. + */ +int fsync_super(struct super_block *sb) +{ + __fsync_super(sb); + return sync_blockdev(sb->s_bdev); +} + /** * generic_shutdown_super - common helper for ->kill_sb() * @sb: superblock to kill @@ -540,8 +571,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) { int retval; +#ifdef CONFIG_BLOCK if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) return -EACCES; +#endif if (flags & MS_RDONLY) acct_auto_close(sb); shrink_dcache_sb(sb); @@ -661,6 +694,7 @@ void kill_litter_super(struct super_block *sb) EXPORT_SYMBOL(kill_litter_super); +#ifdef CONFIG_BLOCK static int set_bdev_super(struct super_block *s, void *data) { s->s_bdev = data; @@ -756,6 +790,7 @@ void kill_block_super(struct super_block *sb) } EXPORT_SYMBOL(kill_block_super); +#endif int get_sb_nodev(struct file_system_type *fs_type, int flags, void *data, diff --git a/fs/sync.c b/fs/sync.c index 955aef04da28..1de747b5ddb9 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -10,10 +10,123 @@ #include #include #include +#include +#include #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ SYNC_FILE_RANGE_WAIT_AFTER) +/* + * sync everything. Start out by waking pdflush, because that writes back + * all queues in parallel. + */ +static void do_sync(unsigned long wait) +{ + wakeup_pdflush(0); + sync_inodes(0); /* All mappings, inodes and their blockdevs */ + DQUOT_SYNC(NULL); + sync_supers(); /* Write the superblocks */ + sync_filesystems(0); /* Start syncing the filesystems */ + sync_filesystems(wait); /* Waitingly sync the filesystems */ + sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */ + if (!wait) + printk("Emergency Sync complete\n"); + if (unlikely(laptop_mode)) + laptop_sync_completion(); +} + +asmlinkage long sys_sync(void) +{ + do_sync(1); + return 0; +} + +void emergency_sync(void) +{ + pdflush_operation(do_sync, 0); +} + +/* + * Generic function to fsync a file. + * + * filp may be NULL if called via the msync of a vma. + */ +int file_fsync(struct file *filp, struct dentry *dentry, int datasync) +{ + struct inode * inode = dentry->d_inode; + struct super_block * sb; + int ret, err; + + /* sync the inode to buffers */ + ret = write_inode_now(inode, 0); + + /* sync the superblock to buffers */ + sb = inode->i_sb; + lock_super(sb); + if (sb->s_op->write_super) + sb->s_op->write_super(sb); + unlock_super(sb); + + /* .. finally sync the buffers to disk */ + err = sync_blockdev(sb->s_bdev); + if (!ret) + ret = err; + return ret; +} + +long do_fsync(struct file *file, int datasync) +{ + int ret; + int err; + struct address_space *mapping = file->f_mapping; + + if (!file->f_op || !file->f_op->fsync) { + /* Why? We can still call filemap_fdatawrite */ + ret = -EINVAL; + goto out; + } + + ret = filemap_fdatawrite(mapping); + + /* + * We need to protect against concurrent writers, which could cause + * livelocks in fsync_buffers_list(). + */ + mutex_lock(&mapping->host->i_mutex); + err = file->f_op->fsync(file, file->f_dentry, datasync); + if (!ret) + ret = err; + mutex_unlock(&mapping->host->i_mutex); + err = filemap_fdatawait(mapping); + if (!ret) + ret = err; +out: + return ret; +} + +static long __do_fsync(unsigned int fd, int datasync) +{ + struct file *file; + int ret = -EBADF; + + file = fget(fd); + if (file) { + ret = do_fsync(file, datasync); + fput(file); + } + return ret; +} + +asmlinkage long sys_fsync(unsigned int fd) +{ + return __do_fsync(fd, 0); +} + +asmlinkage long sys_fdatasync(unsigned int fd) +{ + return __do_fsync(fd, 1); +} + /* * sys_sync_file_range() permits finely controlled syncing over a segment of * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 26b364c9d62c..35115bca036e 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -1,5 +1,6 @@ config XFS_FS tristate "XFS filesystem support" + depends on BLOCK help XFS is a high performance journaling filesystem which originated on the SGI IRIX platform. It is completely multi-threaded, can diff --git a/include/linux/bio.h b/include/linux/bio.h index 76bdaeab6f62..711c321a7011 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -148,6 +148,7 @@ struct bio { #define BIO_RW_BARRIER 2 #define BIO_RW_FAILFAST 3 #define BIO_RW_SYNC 4 +#define BIO_RW_META 5 /* * upper 16 bits of bi_rw define the io priority of this bio @@ -178,6 +179,7 @@ struct bio { #define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) +#define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) /* * will die diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cfde8b3ee919..1d79b8d4ca6d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1,6 +1,7 @@ #ifndef _LINUX_BLKDEV_H #define _LINUX_BLKDEV_H +#include #include #include #include @@ -16,6 +17,22 @@ #include +#ifdef CONFIG_LBD +# include +# define sector_div(a, b) do_div(a, b) +#else +# define sector_div(n, b)( \ +{ \ + int _res; \ + _res = (n) % (b); \ + (n) /= (b); \ + _res; \ +} \ +) +#endif + +#ifdef CONFIG_BLOCK + struct scsi_ioctl_command; struct request_queue; @@ -90,7 +107,7 @@ struct io_context { atomic_t refcount; struct task_struct *task; - int (*set_ioprio)(struct io_context *, unsigned int); + unsigned int ioprio_changed; /* * For request batching @@ -104,8 +121,7 @@ struct io_context { void put_io_context(struct io_context *ioc); void exit_io_context(void); -struct io_context *current_io_context(gfp_t gfp_flags); -struct io_context *get_io_context(gfp_t gfp_flags); +struct io_context *get_io_context(gfp_t gfp_flags, int node); void copy_io_context(struct io_context **pdst, struct io_context **psrc); void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); @@ -120,6 +136,90 @@ struct request_list { wait_queue_head_t wait[2]; }; +/* + * request command types + */ +enum rq_cmd_type_bits { + REQ_TYPE_FS = 1, /* fs request */ + REQ_TYPE_BLOCK_PC, /* scsi command */ + REQ_TYPE_SENSE, /* sense request */ + REQ_TYPE_PM_SUSPEND, /* suspend request */ + REQ_TYPE_PM_RESUME, /* resume request */ + REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ + REQ_TYPE_FLUSH, /* flush request */ + REQ_TYPE_SPECIAL, /* driver defined type */ + REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ + /* + * for ATA/ATAPI devices. this really doesn't belong here, ide should + * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver + * private REQ_LB opcodes to differentiate what type of request this is + */ + REQ_TYPE_ATA_CMD, + REQ_TYPE_ATA_TASK, + REQ_TYPE_ATA_TASKFILE, +}; + +/* + * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being + * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a + * SCSI cdb. + * + * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need, + * typically to differentiate REQ_TYPE_SPECIAL requests. + * + */ +enum { + /* + * just examples for now + */ + REQ_LB_OP_EJECT = 0x40, /* eject request */ + REQ_LB_OP_FLUSH = 0x41, /* flush device */ +}; + +/* + * request type modified bits. first three bits match BIO_RW* bits, important + */ +enum rq_flag_bits { + __REQ_RW, /* not set, read. set, write */ + __REQ_FAILFAST, /* no low level driver retries */ + __REQ_SORTED, /* elevator knows about this request */ + __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ + __REQ_HARDBARRIER, /* may not be passed by drive either */ + __REQ_FUA, /* forced unit access */ + __REQ_NOMERGE, /* don't touch this for merging */ + __REQ_STARTED, /* drive already may have started this one */ + __REQ_DONTPREP, /* don't call prep for this one */ + __REQ_QUEUED, /* uses queueing */ + __REQ_ELVPRIV, /* elevator private data attached */ + __REQ_FAILED, /* set if the request failed */ + __REQ_QUIET, /* don't worry about errors */ + __REQ_PREEMPT, /* set for "ide_preempt" requests */ + __REQ_ORDERED_COLOR, /* is before or after barrier */ + __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ + __REQ_ALLOCED, /* request came from our alloc pool */ + __REQ_RW_META, /* metadata io request */ + __REQ_NR_BITS, /* stops here */ +}; + +#define REQ_RW (1 << __REQ_RW) +#define REQ_FAILFAST (1 << __REQ_FAILFAST) +#define REQ_SORTED (1 << __REQ_SORTED) +#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) +#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) +#define REQ_FUA (1 << __REQ_FUA) +#define REQ_NOMERGE (1 << __REQ_NOMERGE) +#define REQ_STARTED (1 << __REQ_STARTED) +#define REQ_DONTPREP (1 << __REQ_DONTPREP) +#define REQ_QUEUED (1 << __REQ_QUEUED) +#define REQ_ELVPRIV (1 << __REQ_ELVPRIV) +#define REQ_FAILED (1 << __REQ_FAILED) +#define REQ_QUIET (1 << __REQ_QUIET) +#define REQ_PREEMPT (1 << __REQ_PREEMPT) +#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) +#define REQ_RW_SYNC (1 << __REQ_RW_SYNC) +#define REQ_ALLOCED (1 << __REQ_ALLOCED) +#define REQ_RW_META (1 << __REQ_RW_META) + #define BLK_MAX_CDB 16 /* @@ -129,30 +229,46 @@ struct request { struct list_head queuelist; struct list_head donelist; - unsigned long flags; /* see REQ_ bits below */ + request_queue_t *q; + + unsigned int cmd_flags; + enum rq_cmd_type_bits cmd_type; /* Maintain bio traversal state for part by part I/O submission. * hard_* are block layer internals, no driver should touch them! */ sector_t sector; /* next sector to submit */ + sector_t hard_sector; /* next sector to complete */ unsigned long nr_sectors; /* no. of sectors left to submit */ + unsigned long hard_nr_sectors; /* no. of sectors left to complete */ /* no. of sectors left to submit in the current segment */ unsigned int current_nr_sectors; - sector_t hard_sector; /* next sector to complete */ - unsigned long hard_nr_sectors; /* no. of sectors left to complete */ /* no. of sectors left to complete in the current segment */ unsigned int hard_cur_sectors; struct bio *bio; struct bio *biotail; - void *elevator_private; - void *completion_data; + struct hlist_node hash; /* merge hash */ + /* + * The rb_node is only used inside the io scheduler, requests + * are pruned when moved to the dispatch queue. So let the + * completion_data share space with the rb_node. + */ + union { + struct rb_node rb_node; /* sort/lookup */ + void *completion_data; + }; + + /* + * two pointers are available for the IO schedulers, if they need + * more they have to dynamically allocate it. + */ + void *elevator_private; + void *elevator_private2; - int rq_status; /* should split this into a few status bits */ - int errors; struct gendisk *rq_disk; unsigned long start_time; @@ -170,16 +286,14 @@ struct request { unsigned short ioprio; - int tag; - - int ref_count; - request_queue_t *q; - struct request_list *rl; - - struct completion *waiting; void *special; char *buffer; + int tag; + int errors; + + int ref_count; + /* * when request is used as a packet command carrier */ @@ -195,80 +309,14 @@ struct request { int retries; /* - * completion callback. end_io_data should be folded in with waiting + * completion callback. */ rq_end_io_fn *end_io; void *end_io_data; }; /* - * first three bits match BIO_RW* bits, important - */ -enum rq_flag_bits { - __REQ_RW, /* not set, read. set, write */ - __REQ_FAILFAST, /* no low level driver retries */ - __REQ_SORTED, /* elevator knows about this request */ - __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ - __REQ_HARDBARRIER, /* may not be passed by drive either */ - __REQ_FUA, /* forced unit access */ - __REQ_CMD, /* is a regular fs rw request */ - __REQ_NOMERGE, /* don't touch this for merging */ - __REQ_STARTED, /* drive already may have started this one */ - __REQ_DONTPREP, /* don't call prep for this one */ - __REQ_QUEUED, /* uses queueing */ - __REQ_ELVPRIV, /* elevator private data attached */ - /* - * for ATA/ATAPI devices - */ - __REQ_PC, /* packet command (special) */ - __REQ_BLOCK_PC, /* queued down pc from block layer */ - __REQ_SENSE, /* sense retrival */ - - __REQ_FAILED, /* set if the request failed */ - __REQ_QUIET, /* don't worry about errors */ - __REQ_SPECIAL, /* driver suplied command */ - __REQ_DRIVE_CMD, - __REQ_DRIVE_TASK, - __REQ_DRIVE_TASKFILE, - __REQ_PREEMPT, /* set for "ide_preempt" requests */ - __REQ_PM_SUSPEND, /* suspend request */ - __REQ_PM_RESUME, /* resume request */ - __REQ_PM_SHUTDOWN, /* shutdown request */ - __REQ_ORDERED_COLOR, /* is before or after barrier */ - __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ - __REQ_NR_BITS, /* stops here */ -}; - -#define REQ_RW (1 << __REQ_RW) -#define REQ_FAILFAST (1 << __REQ_FAILFAST) -#define REQ_SORTED (1 << __REQ_SORTED) -#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) -#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) -#define REQ_FUA (1 << __REQ_FUA) -#define REQ_CMD (1 << __REQ_CMD) -#define REQ_NOMERGE (1 << __REQ_NOMERGE) -#define REQ_STARTED (1 << __REQ_STARTED) -#define REQ_DONTPREP (1 << __REQ_DONTPREP) -#define REQ_QUEUED (1 << __REQ_QUEUED) -#define REQ_ELVPRIV (1 << __REQ_ELVPRIV) -#define REQ_PC (1 << __REQ_PC) -#define REQ_BLOCK_PC (1 << __REQ_BLOCK_PC) -#define REQ_SENSE (1 << __REQ_SENSE) -#define REQ_FAILED (1 << __REQ_FAILED) -#define REQ_QUIET (1 << __REQ_QUIET) -#define REQ_SPECIAL (1 << __REQ_SPECIAL) -#define REQ_DRIVE_CMD (1 << __REQ_DRIVE_CMD) -#define REQ_DRIVE_TASK (1 << __REQ_DRIVE_TASK) -#define REQ_DRIVE_TASKFILE (1 << __REQ_DRIVE_TASKFILE) -#define REQ_PREEMPT (1 << __REQ_PREEMPT) -#define REQ_PM_SUSPEND (1 << __REQ_PM_SUSPEND) -#define REQ_PM_RESUME (1 << __REQ_PM_RESUME) -#define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN) -#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) -#define REQ_RW_SYNC (1 << __REQ_RW_SYNC) - -/* - * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME + * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME * requests. Some step values could eventually be made generic. */ struct request_pm_state @@ -432,9 +480,6 @@ struct request_queue struct mutex sysfs_lock; }; -#define RQ_INACTIVE (-1) -#define RQ_ACTIVE 1 - #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ @@ -490,25 +535,34 @@ enum { #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) -#define blk_fs_request(rq) ((rq)->flags & REQ_CMD) -#define blk_pc_request(rq) ((rq)->flags & REQ_BLOCK_PC) -#define blk_noretry_request(rq) ((rq)->flags & REQ_FAILFAST) -#define blk_rq_started(rq) ((rq)->flags & REQ_STARTED) +#define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) +#define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) +#define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) +#define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) + +#define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) +#define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) #define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) -#define blk_pm_suspend_request(rq) ((rq)->flags & REQ_PM_SUSPEND) -#define blk_pm_resume_request(rq) ((rq)->flags & REQ_PM_RESUME) +#define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) +#define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) #define blk_pm_request(rq) \ - ((rq)->flags & (REQ_PM_SUSPEND | REQ_PM_RESUME)) + (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) -#define blk_sorted_rq(rq) ((rq)->flags & REQ_SORTED) -#define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER) -#define blk_fua_rq(rq) ((rq)->flags & REQ_FUA) +#define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) +#define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) +#define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) -#define rq_data_dir(rq) ((rq)->flags & 1) +#define rq_data_dir(rq) ((rq)->cmd_flags & 1) + +/* + * We regard a request as sync, if it's a READ or a SYNC write. + */ +#define rq_is_sync(rq) (rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC) +#define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) static inline int blk_queue_full(struct request_queue *q, int rw) { @@ -541,13 +595,7 @@ static inline void blk_clear_queue_full(struct request_queue *q, int rw) #define RQ_NOMERGE_FLAGS \ (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) #define rq_mergeable(rq) \ - (!((rq)->flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) - -/* - * noop, requests are automagically marked as active/inactive by I/O - * scheduler -- see elv_next_request - */ -#define blk_queue_headactive(q, head_active) + (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) /* * q->prep_rq_fn return values @@ -586,11 +634,6 @@ static inline void blk_queue_bounce(request_queue_t *q, struct bio **bio) if ((rq->bio)) \ for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) -struct sec_size { - unsigned block_size; - unsigned block_size_bits; -}; - extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); extern void register_disk(struct gendisk *dev); @@ -612,6 +655,7 @@ extern void blk_stop_queue(request_queue_t *q); extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(request_queue_t *q); extern void blk_run_queue(request_queue_t *); +extern void blk_start_queueing(request_queue_t *); extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); extern int blk_rq_unmap_user(struct bio *, unsigned int); @@ -655,16 +699,6 @@ extern void end_that_request_last(struct request *, int); extern void end_request(struct request *req, int uptodate); extern void blk_complete_request(struct request *); -static inline int rq_all_done(struct request *rq, unsigned int nr_bytes) -{ - if (blk_fs_request(rq)) - return (nr_bytes >= (rq->hard_nr_sectors << 9)); - else if (blk_pc_request(rq)) - return nr_bytes >= rq->data_len; - - return 0; -} - /* * end_that_request_first/chunk() takes an uptodate argument. we account * any value <= as an io error. 0 means -EIO for compatability reasons, @@ -678,21 +712,6 @@ static inline void blkdev_dequeue_request(struct request *req) elv_dequeue_request(req->q, req); } -/* - * This should be in elevator.h, but that requires pulling in rq and q - */ -static inline void elv_dispatch_add_tail(struct request_queue *q, - struct request *rq) -{ - if (q->last_merge == rq) - q->last_merge = NULL; - q->nr_sorted--; - - q->end_sector = rq_end_sector(rq); - q->boundary_rq = rq; - list_add_tail(&rq->queuelist, &q->queue_head); -} - /* * Access functions for manipulating queue properties */ @@ -737,7 +756,7 @@ extern void blk_put_queue(request_queue_t *); */ #define blk_queue_tag_depth(q) ((q)->queue_tags->busy) #define blk_queue_tag_queue(q) ((q)->queue_tags->busy < (q)->queue_tags->max_depth) -#define blk_rq_tagged(rq) ((rq)->flags & REQ_QUEUED) +#define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) extern int blk_queue_start_tag(request_queue_t *, struct request *); extern struct request *blk_queue_find_tag(request_queue_t *, int); extern void blk_queue_end_tag(request_queue_t *, struct request *); @@ -787,14 +806,6 @@ static inline int queue_dma_alignment(request_queue_t *q) return retval; } -static inline int bdev_dma_aligment(struct block_device *bdev) -{ - return queue_dma_alignment(bdev_get_queue(bdev)); -} - -#define blk_finished_io(nsects) do { } while (0) -#define blk_started_io(nsects) do { } while (0) - /* assumes size > 256 */ static inline unsigned int blksize_bits(unsigned int size) { @@ -824,24 +835,32 @@ struct work_struct; int kblockd_schedule_work(struct work_struct *work); void kblockd_flush(void); -#ifdef CONFIG_LBD -# include -# define sector_div(a, b) do_div(a, b) -#else -# define sector_div(n, b)( \ -{ \ - int _res; \ - _res = (n) % (b); \ - (n) /= (b); \ - _res; \ -} \ -) -#endif - #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ MODULE_ALIAS("block-major-" __stringify(major) "-*") +#else /* CONFIG_BLOCK */ +/* + * stubs for when the block layer is configured out + */ +#define buffer_heads_over_limit 0 + +static inline long blk_congestion_wait(int rw, long timeout) +{ + return io_schedule_timeout(timeout); +} + +static inline long nr_blockdev_pages(void) +{ + return 0; +} + +static inline void exit_io_context(void) +{ +} + +#endif /* CONFIG_BLOCK */ + #endif diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7520cc1ff9e2..b99a714fcac6 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -20,6 +20,7 @@ enum blktrace_cat { BLK_TC_PC = 1 << 9, /* pc requests */ BLK_TC_NOTIFY = 1 << 10, /* special message */ BLK_TC_AHEAD = 1 << 11, /* readahead */ + BLK_TC_META = 1 << 12, /* metadata */ BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ }; @@ -148,7 +149,7 @@ static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq, u32 what) { struct blk_trace *bt = q->blk_trace; - int rw = rq->flags & 0x03; + int rw = rq->cmd_flags & 0x03; if (likely(!bt)) return; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 737e407d0cd1..131ffd37e716 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -14,6 +14,8 @@ #include #include +#ifdef CONFIG_BLOCK + enum bh_state_bits { BH_Uptodate, /* Contains valid data */ BH_Dirty, /* Is dirty */ @@ -190,9 +192,7 @@ extern int buffer_heads_over_limit; * Generic address_space_operations implementations for buffer_head-backed * address_spaces. */ -int try_to_release_page(struct page * page, gfp_t gfp_mask); void block_invalidatepage(struct page *page, unsigned long offset); -void do_invalidatepage(struct page *page, unsigned long offset); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); int block_read_full_page(struct page*, get_block_t*); @@ -302,4 +302,19 @@ static inline void lock_buffer(struct buffer_head *bh) __lock_buffer(bh); } +extern int __set_page_dirty_buffers(struct page *page); + +#else /* CONFIG_BLOCK */ + +static inline void buffer_init(void) {} +static inline int try_to_free_buffers(struct page *page) { return 1; } +static inline int sync_blockdev(struct block_device *bdev) { return 0; } +static inline int inode_has_buffers(struct inode *inode) { return 0; } +static inline void invalidate_inode_buffers(struct inode *inode) {} +static inline int remove_inode_buffers(struct inode *inode) { return 1; } +static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } +static inline void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) {} + + +#endif /* CONFIG_BLOCK */ #endif /* _LINUX_BUFFER_HEAD_H */ diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index bea0255196c4..d61ef5951538 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -90,6 +90,7 @@ COMPATIBLE_IOCTL(FDTWADDLE) COMPATIBLE_IOCTL(FDFMTTRK) COMPATIBLE_IOCTL(FDRAWCMD) /* 0x12 */ +#ifdef CONFIG_BLOCK COMPATIBLE_IOCTL(BLKRASET) COMPATIBLE_IOCTL(BLKROSET) COMPATIBLE_IOCTL(BLKROGET) @@ -103,6 +104,7 @@ COMPATIBLE_IOCTL(BLKTRACESETUP) COMPATIBLE_IOCTL(BLKTRACETEARDOWN) ULONG_IOCTL(BLKRASET) ULONG_IOCTL(BLKFRASET) +#endif /* RAID */ COMPATIBLE_IOCTL(RAID_VERSION) COMPATIBLE_IOCTL(GET_ARRAY_INFO) @@ -395,12 +397,6 @@ COMPATIBLE_IOCTL(DVD_WRITE_STRUCT) COMPATIBLE_IOCTL(DVD_AUTH) /* pktcdvd */ COMPATIBLE_IOCTL(PACKET_CTRL_CMD) -/* Big L */ -ULONG_IOCTL(LOOP_SET_FD) -ULONG_IOCTL(LOOP_CHANGE_FD) -COMPATIBLE_IOCTL(LOOP_CLR_FD) -COMPATIBLE_IOCTL(LOOP_GET_STATUS64) -COMPATIBLE_IOCTL(LOOP_SET_STATUS64) /* Big A */ /* sparc only */ /* Big Q for sound/OSS */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 1713ace808bf..b3370ef5164d 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -1,12 +1,16 @@ #ifndef _LINUX_ELEVATOR_H #define _LINUX_ELEVATOR_H +#include + +#ifdef CONFIG_BLOCK + typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct bio *); typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struct request *); -typedef void (elevator_merged_fn) (request_queue_t *, struct request *); +typedef void (elevator_merged_fn) (request_queue_t *, struct request *, int); typedef int (elevator_dispatch_fn) (request_queue_t *, int); @@ -14,9 +18,9 @@ typedef void (elevator_add_req_fn) (request_queue_t *, struct request *); typedef int (elevator_queue_empty_fn) (request_queue_t *); typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *); typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *); -typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *); +typedef int (elevator_may_queue_fn) (request_queue_t *, int); -typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, gfp_t); +typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, gfp_t); typedef void (elevator_put_req_fn) (request_queue_t *, struct request *); typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *); typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *); @@ -82,19 +86,21 @@ struct elevator_queue struct kobject kobj; struct elevator_type *elevator_type; struct mutex sysfs_lock; + struct hlist_head *hash; }; /* * block elevator interface */ extern void elv_dispatch_sort(request_queue_t *, struct request *); +extern void elv_dispatch_add_tail(request_queue_t *, struct request *); extern void elv_add_request(request_queue_t *, struct request *, int, int); extern void __elv_add_request(request_queue_t *, struct request *, int, int); extern void elv_insert(request_queue_t *, struct request *, int); extern int elv_merge(request_queue_t *, struct request **, struct bio *); extern void elv_merge_requests(request_queue_t *, struct request *, struct request *); -extern void elv_merged_request(request_queue_t *, struct request *); +extern void elv_merged_request(request_queue_t *, struct request *, int); extern void elv_dequeue_request(request_queue_t *, struct request *); extern void elv_requeue_request(request_queue_t *, struct request *); extern int elv_queue_empty(request_queue_t *); @@ -103,9 +109,9 @@ extern struct request *elv_former_request(request_queue_t *, struct request *); extern struct request *elv_latter_request(request_queue_t *, struct request *); extern int elv_register_queue(request_queue_t *q); extern void elv_unregister_queue(request_queue_t *q); -extern int elv_may_queue(request_queue_t *, int, struct bio *); +extern int elv_may_queue(request_queue_t *, int); extern void elv_completed_request(request_queue_t *, struct request *); -extern int elv_set_request(request_queue_t *, struct request *, struct bio *, gfp_t); +extern int elv_set_request(request_queue_t *, struct request *, gfp_t); extern void elv_put_request(request_queue_t *, struct request *); /* @@ -124,6 +130,19 @@ extern int elevator_init(request_queue_t *, char *); extern void elevator_exit(elevator_t *); extern int elv_rq_merge_ok(struct request *, struct bio *); +/* + * Helper functions. + */ +extern struct request *elv_rb_former_request(request_queue_t *, struct request *); +extern struct request *elv_rb_latter_request(request_queue_t *, struct request *); + +/* + * rb support functions. + */ +extern struct request *elv_rb_add(struct rb_root *, struct request *); +extern void elv_rb_del(struct rb_root *, struct request *); +extern struct request *elv_rb_find(struct rb_root *, sector_t); + /* * Return values from elevator merger */ @@ -149,5 +168,42 @@ enum { }; #define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors) +#define rb_entry_rq(node) rb_entry((node), struct request, rb_node) +/* + * Hack to reuse the donelist list_head as the fifo time holder while + * the request is in the io scheduler. Saves an unsigned long in rq. + */ +#define rq_fifo_time(rq) ((unsigned long) (rq)->donelist.next) +#define rq_set_fifo_time(rq,exp) ((rq)->donelist.next = (void *) (exp)) +#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) +#define rq_fifo_clear(rq) do { \ + list_del_init(&(rq)->queuelist); \ + INIT_LIST_HEAD(&(rq)->donelist); \ + } while (0) + +/* + * io context count accounting + */ +#define elv_ioc_count_mod(name, __val) \ + do { \ + preempt_disable(); \ + __get_cpu_var(name) += (__val); \ + preempt_enable(); \ + } while (0) + +#define elv_ioc_count_inc(name) elv_ioc_count_mod(name, 1) +#define elv_ioc_count_dec(name) elv_ioc_count_mod(name, -1) + +#define elv_ioc_count_read(name) \ +({ \ + unsigned long __val = 0; \ + int __cpu; \ + smp_wmb(); \ + for_each_possible_cpu(__cpu) \ + __val += per_cpu(name, __cpu); \ + __val; \ +}) + +#endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index 33a1aa107329..153d755376a4 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -165,41 +165,49 @@ struct ext2_group_desc #define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1) /* - * Inode flags + * Inode flags (GETFLAGS/SETFLAGS) */ -#define EXT2_SECRM_FL 0x00000001 /* Secure deletion */ -#define EXT2_UNRM_FL 0x00000002 /* Undelete */ -#define EXT2_COMPR_FL 0x00000004 /* Compress file */ -#define EXT2_SYNC_FL 0x00000008 /* Synchronous updates */ -#define EXT2_IMMUTABLE_FL 0x00000010 /* Immutable file */ -#define EXT2_APPEND_FL 0x00000020 /* writes to file may only append */ -#define EXT2_NODUMP_FL 0x00000040 /* do not dump file */ -#define EXT2_NOATIME_FL 0x00000080 /* do not update atime */ +#define EXT2_SECRM_FL FS_SECRM_FL /* Secure deletion */ +#define EXT2_UNRM_FL FS_UNRM_FL /* Undelete */ +#define EXT2_COMPR_FL FS_COMPR_FL /* Compress file */ +#define EXT2_SYNC_FL FS_SYNC_FL /* Synchronous updates */ +#define EXT2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */ +#define EXT2_APPEND_FL FS_APPEND_FL /* writes to file may only append */ +#define EXT2_NODUMP_FL FS_NODUMP_FL /* do not dump file */ +#define EXT2_NOATIME_FL FS_NOATIME_FL /* do not update atime */ /* Reserved for compression usage... */ -#define EXT2_DIRTY_FL 0x00000100 -#define EXT2_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ -#define EXT2_NOCOMP_FL 0x00000400 /* Don't compress */ -#define EXT2_ECOMPR_FL 0x00000800 /* Compression error */ +#define EXT2_DIRTY_FL FS_DIRTY_FL +#define EXT2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */ +#define EXT2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */ +#define EXT2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */ /* End compression flags --- maybe not all used */ -#define EXT2_BTREE_FL 0x00001000 /* btree format dir */ -#define EXT2_INDEX_FL 0x00001000 /* hash-indexed directory */ -#define EXT2_IMAGIC_FL 0x00002000 /* AFS directory */ -#define EXT2_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ -#define EXT2_NOTAIL_FL 0x00008000 /* file tail should not be merged */ -#define EXT2_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ -#define EXT2_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -#define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ +#define EXT2_BTREE_FL FS_BTREE_FL /* btree format dir */ +#define EXT2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */ +#define EXT2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */ +#define EXT2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */ +#define EXT2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */ +#define EXT2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */ +#define EXT2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/ +#define EXT2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */ -#define EXT2_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -#define EXT2_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +#define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ +#define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */ /* * ioctl commands */ -#define EXT2_IOC_GETFLAGS _IOR('f', 1, long) -#define EXT2_IOC_SETFLAGS _IOW('f', 2, long) -#define EXT2_IOC_GETVERSION _IOR('v', 1, long) -#define EXT2_IOC_SETVERSION _IOW('v', 2, long) +#define EXT2_IOC_GETFLAGS FS_IOC_GETFLAGS +#define EXT2_IOC_SETFLAGS FS_IOC_SETFLAGS +#define EXT2_IOC_GETVERSION FS_IOC_GETVERSION +#define EXT2_IOC_SETVERSION FS_IOC_SETVERSION + +/* + * ioctl commands in 32 bit emulation + */ +#define EXT2_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define EXT2_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define EXT2_IOC32_GETVERSION FS_IOC32_GETVERSION +#define EXT2_IOC32_SETVERSION FS_IOC32_SETVERSION /* * Structure of an inode on the disk diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index cc08f56750da..11cca1bdc0c7 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -216,20 +216,37 @@ struct ext3_new_group_data { /* * ioctl commands */ -#define EXT3_IOC_GETFLAGS _IOR('f', 1, long) -#define EXT3_IOC_SETFLAGS _IOW('f', 2, long) +#define EXT3_IOC_GETFLAGS FS_IOC_GETFLAGS +#define EXT3_IOC_SETFLAGS FS_IOC_SETFLAGS #define EXT3_IOC_GETVERSION _IOR('f', 3, long) #define EXT3_IOC_SETVERSION _IOW('f', 4, long) #define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) #define EXT3_IOC_GROUP_ADD _IOW('f', 8,struct ext3_new_group_input) -#define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long) -#define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long) +#define EXT3_IOC_GETVERSION_OLD FS_IOC_GETVERSION +#define EXT3_IOC_SETVERSION_OLD FS_IOC_SETVERSION #ifdef CONFIG_JBD_DEBUG #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) #endif #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) +/* + * ioctl commands in 32 bit emulation + */ +#define EXT3_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define EXT3_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define EXT3_IOC32_GETVERSION _IOR('f', 3, int) +#define EXT3_IOC32_SETVERSION _IOW('f', 4, int) +#define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int) +#define EXT3_IOC32_SETRSVSZ _IOW('f', 6, int) +#define EXT3_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) +#ifdef CONFIG_JBD_DEBUG +#define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) +#endif +#define EXT3_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION +#define EXT3_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION + + /* * Mount options */ @@ -812,6 +829,7 @@ extern void ext3_set_aops(struct inode *inode); /* ioctl.c */ extern int ext3_ioctl (struct inode *, struct file *, unsigned int, unsigned long); +extern long ext3_compat_ioctl (struct file *, unsigned int, unsigned long); /* namei.c */ extern int ext3_orphan_add(handle_t *, struct inode *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 6eafbe309483..5baf3a153403 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -79,8 +79,8 @@ extern int dir_notify_enable; #define WRITE 1 #define READA 2 /* read-ahead - don't block if no resources */ #define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ -#define SPECIAL 4 /* For non-blockdevice requests in request queue */ #define READ_SYNC (READ | (1 << BIO_RW_SYNC)) +#define READ_META (READ | (1 << BIO_RW_META)) #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) #define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) @@ -217,6 +217,45 @@ extern int dir_notify_enable; #define FIBMAP _IO(0x00,1) /* bmap access */ #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ +#define FS_IOC_GETFLAGS _IOR('f', 1, long) +#define FS_IOC_SETFLAGS _IOW('f', 2, long) +#define FS_IOC_GETVERSION _IOR('v', 1, long) +#define FS_IOC_SETVERSION _IOW('v', 2, long) +#define FS_IOC32_GETFLAGS _IOR('f', 1, int) +#define FS_IOC32_SETFLAGS _IOW('f', 2, int) +#define FS_IOC32_GETVERSION _IOR('v', 1, int) +#define FS_IOC32_SETVERSION _IOW('v', 2, int) + +/* + * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) + */ +#define FS_SECRM_FL 0x00000001 /* Secure deletion */ +#define FS_UNRM_FL 0x00000002 /* Undelete */ +#define FS_COMPR_FL 0x00000004 /* Compress file */ +#define FS_SYNC_FL 0x00000008 /* Synchronous updates */ +#define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define FS_APPEND_FL 0x00000020 /* writes to file may only append */ +#define FS_NODUMP_FL 0x00000040 /* do not dump file */ +#define FS_NOATIME_FL 0x00000080 /* do not update atime */ +/* Reserved for compression usage... */ +#define FS_DIRTY_FL 0x00000100 +#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ +#define FS_NOCOMP_FL 0x00000400 /* Don't compress */ +#define FS_ECOMPR_FL 0x00000800 /* Compression error */ +/* End compression flags --- maybe not all used */ +#define FS_BTREE_FL 0x00001000 /* btree format dir */ +#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ +#define FS_IMAGIC_FL 0x00002000 /* AFS directory */ +#define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ +#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ +#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ +#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ + +#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ +#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ + + #define SYNC_FILE_RANGE_WAIT_BEFORE 1 #define SYNC_FILE_RANGE_WRITE 2 #define SYNC_FILE_RANGE_WAIT_AFTER 4 @@ -1443,6 +1482,7 @@ extern void __init vfs_caches_init(unsigned long); extern void putname(const char *name); #endif +#ifdef CONFIG_BLOCK extern int register_blkdev(unsigned int, const char *); extern int unregister_blkdev(unsigned int, const char *); extern struct block_device *bdget(dev_t); @@ -1451,11 +1491,15 @@ extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); extern struct block_device *open_by_devnum(dev_t, unsigned); extern struct block_device *open_partition_by_devnum(dev_t, unsigned); -extern const struct file_operations def_blk_fops; extern const struct address_space_operations def_blk_aops; +#else +static inline void bd_forget(struct inode *inode) {} +#endif +extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; extern const struct file_operations bad_sock_fops; extern const struct file_operations def_fifo_fops; +#ifdef CONFIG_BLOCK extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); @@ -1471,6 +1515,7 @@ extern void bd_release_from_disk(struct block_device *, struct gendisk *); #define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) #define bd_release_from_disk(bdev, disk) bd_release(bdev) #endif +#endif /* fs/char_dev.c */ #define CHRDEV_MAJOR_HASH_SIZE 255 @@ -1484,14 +1529,19 @@ extern int chrdev_open(struct inode *, struct file *); extern void chrdev_show(struct seq_file *,off_t); /* fs/block_dev.c */ -#define BLKDEV_MAJOR_HASH_SIZE 255 #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ + +#ifdef CONFIG_BLOCK +#define BLKDEV_MAJOR_HASH_SIZE 255 extern const char *__bdevname(dev_t, char *buffer); extern const char *bdevname(struct block_device *bdev, char *buffer); extern struct block_device *lookup_bdev(const char *); extern struct block_device *open_bdev_excl(const char *, int, void *); extern void close_bdev_excl(struct block_device *); extern void blkdev_show(struct seq_file *,off_t); +#else +#define BLKDEV_MAJOR_HASH_SIZE 0 +#endif extern void init_special_inode(struct inode *, umode_t, dev_t); @@ -1505,6 +1555,7 @@ extern const struct file_operations rdwr_fifo_fops; extern int fs_may_remount_ro(struct super_block *); +#ifdef CONFIG_BLOCK /* * return READ, READA, or WRITE */ @@ -1516,9 +1567,10 @@ extern int fs_may_remount_ro(struct super_block *); #define bio_data_dir(bio) ((bio)->bi_rw & 1) extern int check_disk_change(struct block_device *); -extern int invalidate_inodes(struct super_block *); extern int __invalidate_device(struct block_device *); extern int invalidate_partition(struct gendisk *, int); +#endif +extern int invalidate_inodes(struct super_block *); unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); unsigned long invalidate_inode_pages(struct address_space *mapping); @@ -1546,11 +1598,14 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping, extern long do_fsync(struct file *file, int datasync); extern void sync_supers(void); extern void sync_filesystems(int wait); +extern void __fsync_super(struct super_block *sb); extern void emergency_sync(void); extern void emergency_remount(void); extern int do_remount_sb(struct super_block *sb, int flags, void *data, int force); +#ifdef CONFIG_BLOCK extern sector_t bmap(struct inode *, sector_t); +#endif extern int notify_change(struct dentry *, struct iattr *); extern int permission(struct inode *, int, struct nameidata *); extern int generic_permission(struct inode *, int, @@ -1633,9 +1688,11 @@ static inline void insert_inode_hash(struct inode *inode) { extern struct file * get_empty_filp(void); extern void file_move(struct file *f, struct list_head *list); extern void file_kill(struct file *f); +#ifdef CONFIG_BLOCK struct bio; extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); +#endif extern int set_blocksize(struct block_device *, int); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); @@ -1716,6 +1773,7 @@ static inline void do_generic_file_read(struct file * filp, loff_t *ppos, actor); } +#ifdef CONFIG_BLOCK ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, @@ -1753,6 +1811,7 @@ static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb, return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, nr_segs, get_block, end_io, DIO_OWN_LOCKING); } +#endif extern const struct file_operations generic_ro_fops; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e4af57e87c17..41f276fdd185 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -11,6 +11,8 @@ #include +#ifdef CONFIG_BLOCK + enum { /* These three have identical behaviour; use the second one if DOS FDISK gets confused about extended/logical partitions starting past cylinder 1023. */ @@ -420,3 +422,5 @@ static inline struct block_device *bdget_disk(struct gendisk *disk, int index) #endif #endif + +#endif diff --git a/include/linux/mm.h b/include/linux/mm.h index 7b703b6d4358..4edf1934e5ca 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -743,7 +743,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long); -int __set_page_dirty_buffers(struct page *page); +extern int try_to_release_page(struct page * page, gfp_t gfp_mask); +extern void do_invalidatepage(struct page *page, unsigned long offset); + int __set_page_dirty_nobuffers(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 3ca880463c47..cc5fb75af78a 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -9,6 +9,7 @@ * (And no, it doesn't do the #ifdef __MPAGE_H thing, and it doesn't do * nested includes. Get it right in the .c file). */ +#ifdef CONFIG_BLOCK struct writeback_control; typedef int (writepage_t)(struct page *page, struct writeback_control *wbc); @@ -21,8 +22,4 @@ int mpage_writepages(struct address_space *mapping, int mpage_writepage(struct page *page, get_block_t *get_block, struct writeback_control *wbc); -static inline int -generic_writepages(struct address_space *mapping, struct writeback_control *wbc) -{ - return mpage_writepages(mapping, wbc, NULL); -} +#endif diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index eb3e547c8fee..c588709acbbc 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -53,6 +53,8 @@ #include #include +#ifdef CONFIG_MD + /* * Different major versions are not compatible. * Different minor versions are only downward compatible. @@ -95,5 +97,6 @@ extern void md_new_event(mddev_t *mddev); extern void md_update_sb(mddev_t * mddev); +#endif /* CONFIG_MD */ #endif diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index d28890295852..920b94fe31fa 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -18,6 +18,8 @@ /* and dm-bio-list.h is not under include/linux because.... ??? */ #include "../../../drivers/md/dm-bio-list.h" +#ifdef CONFIG_BLOCK + #define LEVEL_MULTIPATH (-4) #define LEVEL_LINEAR (-1) #define LEVEL_FAULTY (-5) @@ -362,5 +364,6 @@ static inline void safe_put_page(struct page *p) if (p) put_page(p); } +#endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index 00b340ba6612..b160fb18e8d6 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -17,5 +17,6 @@ extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); extern const struct file_operations ramfs_file_operations; extern struct vm_operations_struct generic_file_vm_ops; +extern int __init init_rootfs(void); #endif diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 8d5382e62c08..344bc3495ddb 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -133,7 +133,7 @@ static inline void rb_set_color(struct rb_node *rb, int color) #define rb_entry(ptr, type, member) container_of(ptr, type, member) #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) -#define RB_EMPTY_NODE(node) (rb_parent(node) != node) +#define RB_EMPTY_NODE(node) (rb_parent(node) == node) #define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) extern void rb_insert_color(struct rb_node *, struct rb_root *); diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 28493ffaafe7..9c63abffd7b2 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -807,21 +807,19 @@ struct stat_data_v1 { #define set_sd_v1_first_direct_byte(sdp,v) \ ((sdp)->sd_first_direct_byte = cpu_to_le32(v)) -#include - /* inode flags stored in sd_attrs (nee sd_reserved) */ /* we want common flags to have the same values as in ext2, so chattr(1) will work without problems */ -#define REISERFS_IMMUTABLE_FL EXT2_IMMUTABLE_FL -#define REISERFS_APPEND_FL EXT2_APPEND_FL -#define REISERFS_SYNC_FL EXT2_SYNC_FL -#define REISERFS_NOATIME_FL EXT2_NOATIME_FL -#define REISERFS_NODUMP_FL EXT2_NODUMP_FL -#define REISERFS_SECRM_FL EXT2_SECRM_FL -#define REISERFS_UNRM_FL EXT2_UNRM_FL -#define REISERFS_COMPR_FL EXT2_COMPR_FL -#define REISERFS_NOTAIL_FL EXT2_NOTAIL_FL +#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL +#define REISERFS_APPEND_FL FS_APPEND_FL +#define REISERFS_SYNC_FL FS_SYNC_FL +#define REISERFS_NOATIME_FL FS_NOATIME_FL +#define REISERFS_NODUMP_FL FS_NODUMP_FL +#define REISERFS_SECRM_FL FS_SECRM_FL +#define REISERFS_UNRM_FL FS_UNRM_FL +#define REISERFS_COMPR_FL FS_COMPR_FL +#define REISERFS_NOTAIL_FL FS_NOTAIL_FL /* persistent flags that file inherits from the parent directory */ #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ @@ -2163,15 +2161,24 @@ __u32 r5_hash(const signed char *msg, int len); /* prototypes from ioctl.c */ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); +long reiserfs_compat_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg); /* ioctl's command */ #define REISERFS_IOC_UNPACK _IOW(0xCD,1,long) /* define following flags to be the same as in ext2, so that chattr(1), lsattr(1) will work with us. */ -#define REISERFS_IOC_GETFLAGS EXT2_IOC_GETFLAGS -#define REISERFS_IOC_SETFLAGS EXT2_IOC_SETFLAGS -#define REISERFS_IOC_GETVERSION EXT2_IOC_GETVERSION -#define REISERFS_IOC_SETVERSION EXT2_IOC_SETVERSION +#define REISERFS_IOC_GETFLAGS FS_IOC_GETFLAGS +#define REISERFS_IOC_SETFLAGS FS_IOC_SETFLAGS +#define REISERFS_IOC_GETVERSION FS_IOC_GETVERSION +#define REISERFS_IOC_SETVERSION FS_IOC_SETVERSION + +/* the 32 bit compat definitions with int argument */ +#define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int) +#define REISERFS_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define REISERFS_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION +#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION /* Locking primitives */ /* Right now we are still falling back to (un)lock_kernel, but eventually that diff --git a/include/linux/sched.h b/include/linux/sched.h index a06fc89cf6e5..fc4a9873ec10 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -710,7 +710,6 @@ extern unsigned int max_cache_size; struct io_context; /* See blkdev.h */ -void exit_io_context(void); struct cpuset; #define NGROUPS_SMALL 32 diff --git a/include/linux/tty.h b/include/linux/tty.h index ea4c2605f8da..44091c0db0b4 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -307,6 +307,9 @@ extern void tty_ldisc_put(int); extern void tty_wakeup(struct tty_struct *tty); extern void tty_ldisc_flush(struct tty_struct *tty); +extern int tty_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long arg); + extern struct mutex tty_mutex; /* n_tty.c */ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 9d4074ecd0cd..4f4d98addb44 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -111,6 +111,8 @@ balance_dirty_pages_ratelimited(struct address_space *mapping) } int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); +extern int generic_writepages(struct address_space *mapping, + struct writeback_control *wbc); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); int sync_page_range(struct inode *inode, struct address_space *mapping, loff_t pos, loff_t count); diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index d04d05adfa9b..c247a28259bc 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -6,7 +6,6 @@ #include #include - #define MSG_SIMPLE_TAG 0x20 #define MSG_HEAD_TAG 0x21 #define MSG_ORDERED_TAG 0x22 @@ -14,6 +13,7 @@ #define SCSI_NO_TAG (-1) /* identify no tag in use */ +#ifdef CONFIG_BLOCK /** * scsi_get_tag_type - get the type of tag the device supports @@ -100,7 +100,7 @@ static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg) struct scsi_device *sdev = cmd->device; if (blk_rq_tagged(req)) { - if (sdev->ordered_tags && req->flags & REQ_HARDBARRIER) + if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER) *msg++ = MSG_ORDERED_TAG; else *msg++ = MSG_SIMPLE_TAG; @@ -144,4 +144,5 @@ static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth) return shost->bqt ? 0 : -ENOMEM; } +#endif /* CONFIG_BLOCK */ #endif /* _SCSI_SCSI_TCQ_H */ diff --git a/init/Kconfig b/init/Kconfig index 4381006dd666..d2eb7a84a264 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -92,7 +92,7 @@ config LOCALVERSION_AUTO config SWAP bool "Support for paging of anonymous memory (swap)" - depends on MMU + depends on MMU && BLOCK default y help This option allows you to choose whether you want to have support diff --git a/init/do_mounts.c b/init/do_mounts.c index b290aadb1d3f..dc1ec0803ef9 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -285,7 +285,11 @@ void __init mount_block_root(char *name, int flags) { char *fs_names = __getname(); char *p; +#ifdef CONFIG_BLOCK char b[BDEVNAME_SIZE]; +#else + const char *b = name; +#endif get_fs_names(fs_names); retry: @@ -304,7 +308,9 @@ retry: * Allow the user to distinguish between failed sys_open * and bad superblock on root device. */ +#ifdef CONFIG_BLOCK __bdevname(ROOT_DEV, b); +#endif printk("VFS: Cannot open root device \"%s\" or %s\n", root_device_name, b); printk("Please append a correct \"root=\" boot option\n"); @@ -316,7 +322,10 @@ retry: for (p = fs_names; *p; p += strlen(p)+1) printk(" %s", p); printk("\n"); - panic("VFS: Unable to mount root fs on %s", __bdevname(ROOT_DEV, b)); +#ifdef CONFIG_BLOCK + __bdevname(ROOT_DEV, b); +#endif + panic("VFS: Unable to mount root fs on %s", b); out: putname(fs_names); } @@ -387,8 +396,10 @@ void __init mount_root(void) change_floppy("root floppy"); } #endif +#ifdef CONFIG_BLOCK create_dev("/dev/root", ROOT_DEV); mount_block_root("/dev/root", root_mountflags); +#endif } /* diff --git a/kernel/compat.c b/kernel/compat.c index 75573e5d27b0..b4fbd838cd77 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -26,6 +26,8 @@ #include +extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); + int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts) { return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) || diff --git a/kernel/exit.c b/kernel/exit.c index 2e4c13cba95a..c189de2927ab 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -38,6 +38,7 @@ #include #include /* for audit_free() */ #include +#include #include #include diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 6991bece67e8..7a3b2e75f040 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -134,3 +134,8 @@ cond_syscall(sys_madvise); cond_syscall(sys_mremap); cond_syscall(sys_remap_file_pages); cond_syscall(compat_sys_move_pages); + +/* block-layer dependent */ +cond_syscall(sys_bdflush); +cond_syscall(sys_ioprio_set); +cond_syscall(sys_ioprio_get); diff --git a/lib/rbtree.c b/lib/rbtree.c index 1e55ba1c2edf..48499c2d88cc 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -322,6 +322,9 @@ struct rb_node *rb_next(struct rb_node *node) { struct rb_node *parent; + if (rb_parent(node) == node) + return NULL; + /* If we have a right-hand child, go down and then left as far as we can. */ if (node->rb_right) { @@ -348,6 +351,9 @@ struct rb_node *rb_prev(struct rb_node *node) { struct rb_node *parent; + if (rb_parent(node) == node) + return NULL; + /* If we have a left-hand child, go down and then right as far as we can. */ if (node->rb_left) { diff --git a/mm/Makefile b/mm/Makefile index 6200c6d6afd2..12b3a4eee88d 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -12,6 +12,9 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ readahead.o swap.o truncate.o vmscan.o \ prio_tree.o util.o mmzone.o vmstat.o $(mmu-y) +ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy) +obj-y += bounce.o +endif obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o obj-$(CONFIG_NUMA) += mempolicy.o diff --git a/mm/bounce.c b/mm/bounce.c new file mode 100644 index 000000000000..e4b62d2a4024 --- /dev/null +++ b/mm/bounce.c @@ -0,0 +1,302 @@ +/* bounce buffer handling for block devices + * + * - Split from highmem.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define POOL_SIZE 64 +#define ISA_POOL_SIZE 16 + +static mempool_t *page_pool, *isa_page_pool; + +#ifdef CONFIG_HIGHMEM +static __init int init_emergency_pool(void) +{ + struct sysinfo i; + si_meminfo(&i); + si_swapinfo(&i); + + if (!i.totalhigh) + return 0; + + page_pool = mempool_create_page_pool(POOL_SIZE, 0); + BUG_ON(!page_pool); + printk("highmem bounce pool size: %d pages\n", POOL_SIZE); + + return 0; +} + +__initcall(init_emergency_pool); + +/* + * highmem version, map in to vec + */ +static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) +{ + unsigned long flags; + unsigned char *vto; + + local_irq_save(flags); + vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ); + memcpy(vto + to->bv_offset, vfrom, to->bv_len); + kunmap_atomic(vto, KM_BOUNCE_READ); + local_irq_restore(flags); +} + +#else /* CONFIG_HIGHMEM */ + +#define bounce_copy_vec(to, vfrom) \ + memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len) + +#endif /* CONFIG_HIGHMEM */ + +/* + * allocate pages in the DMA region for the ISA pool + */ +static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data) +{ + return mempool_alloc_pages(gfp_mask | GFP_DMA, data); +} + +/* + * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA + * as the max address, so check if the pool has already been created. + */ +int init_emergency_isa_pool(void) +{ + if (isa_page_pool) + return 0; + + isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa, + mempool_free_pages, (void *) 0); + BUG_ON(!isa_page_pool); + + printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE); + return 0; +} + +/* + * Simple bounce buffer support for highmem pages. Depending on the + * queue gfp mask set, *to may or may not be a highmem page. kmap it + * always, it will do the Right Thing + */ +static void copy_to_high_bio_irq(struct bio *to, struct bio *from) +{ + unsigned char *vfrom; + struct bio_vec *tovec, *fromvec; + int i; + + __bio_for_each_segment(tovec, to, i, 0) { + fromvec = from->bi_io_vec + i; + + /* + * not bounced + */ + if (tovec->bv_page == fromvec->bv_page) + continue; + + /* + * fromvec->bv_offset and fromvec->bv_len might have been + * modified by the block layer, so use the original copy, + * bounce_copy_vec already uses tovec->bv_len + */ + vfrom = page_address(fromvec->bv_page) + tovec->bv_offset; + + flush_dcache_page(tovec->bv_page); + bounce_copy_vec(tovec, vfrom); + } +} + +static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) +{ + struct bio *bio_orig = bio->bi_private; + struct bio_vec *bvec, *org_vec; + int i; + + if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) + set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags); + + /* + * free up bounce indirect pages used + */ + __bio_for_each_segment(bvec, bio, i, 0) { + org_vec = bio_orig->bi_io_vec + i; + if (bvec->bv_page == org_vec->bv_page) + continue; + + dec_zone_page_state(bvec->bv_page, NR_BOUNCE); + mempool_free(bvec->bv_page, pool); + } + + bio_endio(bio_orig, bio_orig->bi_size, err); + bio_put(bio); +} + +static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err) +{ + if (bio->bi_size) + return 1; + + bounce_end_io(bio, page_pool, err); + return 0; +} + +static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err) +{ + if (bio->bi_size) + return 1; + + bounce_end_io(bio, isa_page_pool, err); + return 0; +} + +static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err) +{ + struct bio *bio_orig = bio->bi_private; + + if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + copy_to_high_bio_irq(bio_orig, bio); + + bounce_end_io(bio, pool, err); +} + +static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err) +{ + if (bio->bi_size) + return 1; + + __bounce_end_io_read(bio, page_pool, err); + return 0; +} + +static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err) +{ + if (bio->bi_size) + return 1; + + __bounce_end_io_read(bio, isa_page_pool, err); + return 0; +} + +static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig, + mempool_t *pool) +{ + struct page *page; + struct bio *bio = NULL; + int i, rw = bio_data_dir(*bio_orig); + struct bio_vec *to, *from; + + bio_for_each_segment(from, *bio_orig, i) { + page = from->bv_page; + + /* + * is destination page below bounce pfn? + */ + if (page_to_pfn(page) < q->bounce_pfn) + continue; + + /* + * irk, bounce it + */ + if (!bio) + bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt); + + to = bio->bi_io_vec + i; + + to->bv_page = mempool_alloc(pool, q->bounce_gfp); + to->bv_len = from->bv_len; + to->bv_offset = from->bv_offset; + inc_zone_page_state(to->bv_page, NR_BOUNCE); + + if (rw == WRITE) { + char *vto, *vfrom; + + flush_dcache_page(from->bv_page); + vto = page_address(to->bv_page) + to->bv_offset; + vfrom = kmap(from->bv_page) + from->bv_offset; + memcpy(vto, vfrom, to->bv_len); + kunmap(from->bv_page); + } + } + + /* + * no pages bounced + */ + if (!bio) + return; + + /* + * at least one page was bounced, fill in possible non-highmem + * pages + */ + __bio_for_each_segment(from, *bio_orig, i, 0) { + to = bio_iovec_idx(bio, i); + if (!to->bv_page) { + to->bv_page = from->bv_page; + to->bv_len = from->bv_len; + to->bv_offset = from->bv_offset; + } + } + + bio->bi_bdev = (*bio_orig)->bi_bdev; + bio->bi_flags |= (1 << BIO_BOUNCED); + bio->bi_sector = (*bio_orig)->bi_sector; + bio->bi_rw = (*bio_orig)->bi_rw; + + bio->bi_vcnt = (*bio_orig)->bi_vcnt; + bio->bi_idx = (*bio_orig)->bi_idx; + bio->bi_size = (*bio_orig)->bi_size; + + if (pool == page_pool) { + bio->bi_end_io = bounce_end_io_write; + if (rw == READ) + bio->bi_end_io = bounce_end_io_read; + } else { + bio->bi_end_io = bounce_end_io_write_isa; + if (rw == READ) + bio->bi_end_io = bounce_end_io_read_isa; + } + + bio->bi_private = *bio_orig; + *bio_orig = bio; +} + +void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig) +{ + mempool_t *pool; + + /* + * for non-isa bounce case, just check if the bounce pfn is equal + * to or bigger than the highest pfn in the system -- in that case, + * don't waste time iterating over bio segments + */ + if (!(q->bounce_gfp & GFP_DMA)) { + if (q->bounce_pfn >= blk_max_pfn) + return; + pool = page_pool; + } else { + BUG_ON(!isa_page_pool); + pool = isa_page_pool; + } + + blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE); + + /* + * slow path + */ + __blk_queue_bounce(q, bio_orig, pool); +} + +EXPORT_SYMBOL(blk_queue_bounce); diff --git a/mm/filemap.c b/mm/filemap.c index 3277f3b23524..c4fe97f5ace0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2020,6 +2020,7 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i if (unlikely(*pos + *count > inode->i_sb->s_maxbytes)) *count = inode->i_sb->s_maxbytes - *pos; } else { +#ifdef CONFIG_BLOCK loff_t isize; if (bdev_read_only(I_BDEV(inode))) return -EPERM; @@ -2031,6 +2032,9 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i if (*pos + *count > isize) *count = isize - *pos; +#else + return -EPERM; +#endif } return 0; } @@ -2491,3 +2495,33 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, } return retval; } + +/** + * try_to_release_page() - release old fs-specific metadata on a page + * + * @page: the page which the kernel is trying to free + * @gfp_mask: memory allocation flags (and I/O mode) + * + * The address_space is to try to release any data against the page + * (presumably at page->private). If the release was successful, return `1'. + * Otherwise return zero. + * + * The @gfp_mask argument specifies whether I/O may be performed to release + * this page (__GFP_IO), and whether the call may block (__GFP_WAIT). + * + * NOTE: @gfp_mask may go away, and this function may become non-blocking. + */ +int try_to_release_page(struct page *page, gfp_t gfp_mask) +{ + struct address_space * const mapping = page->mapping; + + BUG_ON(!PageLocked(page)); + if (PageWriteback(page)) + return 0; + + if (mapping && mapping->a_ops->releasepage) + return mapping->a_ops->releasepage(page, gfp_mask); + return try_to_free_buffers(page); +} + +EXPORT_SYMBOL(try_to_release_page); diff --git a/mm/highmem.c b/mm/highmem.c index ee5519b176ee..0206e7e5018c 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -29,13 +29,6 @@ #include #include -static mempool_t *page_pool, *isa_page_pool; - -static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data) -{ - return mempool_alloc_pages(gfp_mask | GFP_DMA, data); -} - /* * Virtual_count is not a pure "count". * 0 means that it is not mapped, and has not been mapped @@ -217,282 +210,8 @@ void fastcall kunmap_high(struct page *page) } EXPORT_SYMBOL(kunmap_high); - -#define POOL_SIZE 64 - -static __init int init_emergency_pool(void) -{ - struct sysinfo i; - si_meminfo(&i); - si_swapinfo(&i); - - if (!i.totalhigh) - return 0; - - page_pool = mempool_create_page_pool(POOL_SIZE, 0); - BUG_ON(!page_pool); - printk("highmem bounce pool size: %d pages\n", POOL_SIZE); - - return 0; -} - -__initcall(init_emergency_pool); - -/* - * highmem version, map in to vec - */ -static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) -{ - unsigned long flags; - unsigned char *vto; - - local_irq_save(flags); - vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ); - memcpy(vto + to->bv_offset, vfrom, to->bv_len); - kunmap_atomic(vto, KM_BOUNCE_READ); - local_irq_restore(flags); -} - -#else /* CONFIG_HIGHMEM */ - -#define bounce_copy_vec(to, vfrom) \ - memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len) - #endif -#define ISA_POOL_SIZE 16 - -/* - * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA - * as the max address, so check if the pool has already been created. - */ -int init_emergency_isa_pool(void) -{ - if (isa_page_pool) - return 0; - - isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa, - mempool_free_pages, (void *) 0); - BUG_ON(!isa_page_pool); - - printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE); - return 0; -} - -/* - * Simple bounce buffer support for highmem pages. Depending on the - * queue gfp mask set, *to may or may not be a highmem page. kmap it - * always, it will do the Right Thing - */ -static void copy_to_high_bio_irq(struct bio *to, struct bio *from) -{ - unsigned char *vfrom; - struct bio_vec *tovec, *fromvec; - int i; - - __bio_for_each_segment(tovec, to, i, 0) { - fromvec = from->bi_io_vec + i; - - /* - * not bounced - */ - if (tovec->bv_page == fromvec->bv_page) - continue; - - /* - * fromvec->bv_offset and fromvec->bv_len might have been - * modified by the block layer, so use the original copy, - * bounce_copy_vec already uses tovec->bv_len - */ - vfrom = page_address(fromvec->bv_page) + tovec->bv_offset; - - flush_dcache_page(tovec->bv_page); - bounce_copy_vec(tovec, vfrom); - } -} - -static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) -{ - struct bio *bio_orig = bio->bi_private; - struct bio_vec *bvec, *org_vec; - int i; - - if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) - set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags); - - /* - * free up bounce indirect pages used - */ - __bio_for_each_segment(bvec, bio, i, 0) { - org_vec = bio_orig->bi_io_vec + i; - if (bvec->bv_page == org_vec->bv_page) - continue; - - dec_zone_page_state(bvec->bv_page, NR_BOUNCE); - mempool_free(bvec->bv_page, pool); - } - - bio_endio(bio_orig, bio_orig->bi_size, err); - bio_put(bio); -} - -static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err) -{ - if (bio->bi_size) - return 1; - - bounce_end_io(bio, page_pool, err); - return 0; -} - -static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err) -{ - if (bio->bi_size) - return 1; - - bounce_end_io(bio, isa_page_pool, err); - return 0; -} - -static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err) -{ - struct bio *bio_orig = bio->bi_private; - - if (test_bit(BIO_UPTODATE, &bio->bi_flags)) - copy_to_high_bio_irq(bio_orig, bio); - - bounce_end_io(bio, pool, err); -} - -static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err) -{ - if (bio->bi_size) - return 1; - - __bounce_end_io_read(bio, page_pool, err); - return 0; -} - -static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err) -{ - if (bio->bi_size) - return 1; - - __bounce_end_io_read(bio, isa_page_pool, err); - return 0; -} - -static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig, - mempool_t *pool) -{ - struct page *page; - struct bio *bio = NULL; - int i, rw = bio_data_dir(*bio_orig); - struct bio_vec *to, *from; - - bio_for_each_segment(from, *bio_orig, i) { - page = from->bv_page; - - /* - * is destination page below bounce pfn? - */ - if (page_to_pfn(page) < q->bounce_pfn) - continue; - - /* - * irk, bounce it - */ - if (!bio) - bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt); - - to = bio->bi_io_vec + i; - - to->bv_page = mempool_alloc(pool, q->bounce_gfp); - to->bv_len = from->bv_len; - to->bv_offset = from->bv_offset; - inc_zone_page_state(to->bv_page, NR_BOUNCE); - - if (rw == WRITE) { - char *vto, *vfrom; - - flush_dcache_page(from->bv_page); - vto = page_address(to->bv_page) + to->bv_offset; - vfrom = kmap(from->bv_page) + from->bv_offset; - memcpy(vto, vfrom, to->bv_len); - kunmap(from->bv_page); - } - } - - /* - * no pages bounced - */ - if (!bio) - return; - - /* - * at least one page was bounced, fill in possible non-highmem - * pages - */ - __bio_for_each_segment(from, *bio_orig, i, 0) { - to = bio_iovec_idx(bio, i); - if (!to->bv_page) { - to->bv_page = from->bv_page; - to->bv_len = from->bv_len; - to->bv_offset = from->bv_offset; - } - } - - bio->bi_bdev = (*bio_orig)->bi_bdev; - bio->bi_flags |= (1 << BIO_BOUNCED); - bio->bi_sector = (*bio_orig)->bi_sector; - bio->bi_rw = (*bio_orig)->bi_rw; - - bio->bi_vcnt = (*bio_orig)->bi_vcnt; - bio->bi_idx = (*bio_orig)->bi_idx; - bio->bi_size = (*bio_orig)->bi_size; - - if (pool == page_pool) { - bio->bi_end_io = bounce_end_io_write; - if (rw == READ) - bio->bi_end_io = bounce_end_io_read; - } else { - bio->bi_end_io = bounce_end_io_write_isa; - if (rw == READ) - bio->bi_end_io = bounce_end_io_read_isa; - } - - bio->bi_private = *bio_orig; - *bio_orig = bio; -} - -void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig) -{ - mempool_t *pool; - - /* - * for non-isa bounce case, just check if the bounce pfn is equal - * to or bigger than the highest pfn in the system -- in that case, - * don't waste time iterating over bio segments - */ - if (!(q->bounce_gfp & GFP_DMA)) { - if (q->bounce_pfn >= blk_max_pfn) - return; - pool = page_pool; - } else { - BUG_ON(!isa_page_pool); - pool = isa_page_pool; - } - - blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE); - - /* - * slow path - */ - __blk_queue_bounce(q, bio_orig, pool); -} - -EXPORT_SYMBOL(blk_queue_bounce); - #if defined(HASHED_PAGE_VIRTUAL) #define PA_HASH_ORDER 7 diff --git a/mm/migrate.c b/mm/migrate.c index 20a8c2687b1e..ba2453f9483d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -409,6 +409,7 @@ int migrate_page(struct address_space *mapping, } EXPORT_SYMBOL(migrate_page); +#ifdef CONFIG_BLOCK /* * Migration function for pages with buffers. This function can only be used * if the underlying filesystem guarantees that no other references to "page" @@ -466,6 +467,7 @@ int buffer_migrate_page(struct address_space *mapping, return 0; } EXPORT_SYMBOL(buffer_migrate_page); +#endif /* * Writeback a page to clean the dirty state @@ -525,7 +527,7 @@ static int fallback_migrate_page(struct address_space *mapping, * Buffers may be managed in a filesystem specific way. * We must have no buffers or drop them. */ - if (page_has_buffers(page) && + if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) return -EAGAIN; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 488b7088557c..c0d4ce144dec 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include /* * The maximum number of pages to writeout in a single bdflush/kupdate @@ -550,6 +552,139 @@ void __init page_writeback_init(void) register_cpu_notifier(&ratelimit_nb); } +/** + * generic_writepages - walk the list of dirty pages of the given + * address space and writepage() all of them. + * + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * + * This is a library function, which implements the writepages() + * address_space_operation. + * + * If a page is already under I/O, generic_writepages() skips it, even + * if it's dirty. This is desirable behaviour for memory-cleaning writeback, + * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() + * and msync() need to guarantee that all the data which was dirty at the time + * the call was made get new I/O started against them. If wbc->sync_mode is + * WB_SYNC_ALL then we were called for data integrity and we must wait for + * existing IO to complete. + * + * Derived from mpage_writepages() - if you fix this you should check that + * also! + */ +int generic_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + int (*writepage)(struct page *page, struct writeback_control *wbc); + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + writepage = mapping->a_ops->writepage; + + /* deal with chardevs and other special file */ + if (!writepage) + return 0; + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + ret = (*writepage)(page, wbc); + if (ret) { + if (ret == -ENOSPC) + set_bit(AS_ENOSPC, &mapping->flags); + else + set_bit(AS_EIO, &mapping->flags); + } + + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) + unlock_page(page); + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } + } + pagevec_release(&pvec); + cond_resched(); + } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} + +EXPORT_SYMBOL(generic_writepages); + int do_writepages(struct address_space *mapping, struct writeback_control *wbc) { int ret; @@ -672,9 +807,11 @@ int fastcall set_page_dirty(struct page *page) if (likely(mapping)) { int (*spd)(struct page *) = mapping->a_ops->set_page_dirty; - if (spd) - return (*spd)(page); - return __set_page_dirty_buffers(page); +#ifdef CONFIG_BLOCK + if (!spd) + spd = __set_page_dirty_buffers; +#endif + return (*spd)(page); } if (!PageDirty(page)) { if (!TestSetPageDirty(page)) diff --git a/mm/truncate.c b/mm/truncate.c index a654928323dc..8fde6580657e 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -17,6 +17,32 @@ do_invalidatepage */ +/** + * do_invalidatepage - invalidate part of all of a page + * @page: the page which is affected + * @offset: the index of the truncation point + * + * do_invalidatepage() is called when all or part of the page has become + * invalidated by a truncate operation. + * + * do_invalidatepage() does not have to release all buffers, but it must + * ensure that no dirty buffer is left outside @offset and that no I/O + * is underway against any of the blocks which are outside the truncation + * point. Because the caller is about to free (and possibly reuse) those + * blocks on-disk. + */ +void do_invalidatepage(struct page *page, unsigned long offset) +{ + void (*invalidatepage)(struct page *, unsigned long); + invalidatepage = page->mapping->a_ops->invalidatepage; +#ifdef CONFIG_BLOCK + if (!invalidatepage) + invalidatepage = block_invalidatepage; +#endif + if (invalidatepage) + (*invalidatepage)(page, offset); +} + static inline void truncate_partial_page(struct page *page, unsigned partial) { memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);