From da02eb72f150bef1d281d93b3e4716ce374c4510 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Thu, 23 Aug 2012 17:28:57 +0200 Subject: [PATCH 1/6] reiserfs: Make reiserfs_xattr_handlers static Silences the following sparse warning: fs/reiserfs/xattr.c:899:28: warning: symbol 'reiserfs_xattr_handlers' was not declared. Should it be static? Signed-off-by: Sachin Kamat Signed-off-by: Jan Kara --- fs/reiserfs/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index d319963aeb11..c196369fe408 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -896,7 +896,7 @@ static int create_privroot(struct dentry *dentry) { return 0; } #endif /* Actual operations that are exported to VFS-land */ -const struct xattr_handler *reiserfs_xattr_handlers[] = { +static const struct xattr_handler *reiserfs_xattr_handlers[] = { #ifdef CONFIG_REISERFS_FS_XATTR &reiserfs_xattr_user_handler, &reiserfs_xattr_trusted_handler, From e4230108547766f4e105a62c45f3724309106f91 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 27 Aug 2012 14:30:40 -0500 Subject: [PATCH 2/6] ext3: don't clear orphan list on ro mount with errors When we have a filesystem with an orphan inode list *and* in error state, things behave differently if: 1) e2fsck -p is done prior to mount: e2fsck fixes things and exits happily (barring other significant problems) vs. 2) mount is done first, then e2fsck -p: due to the orphan inode list removal, more errors are found and e2fsck exits with UNEXPECTED INCONSISTENCY. The 2nd case above, on the root filesystem, has the tendency to halt the boot process, which is unfortunate. The situation can be improved by not clearing the orphan inode list when the fs is mounted readonly. Signed-off-by: Eric Sandeen Signed-off-by: Jan Kara --- fs/ext3/super.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 8c892e93d8e7..d90ad0574705 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1479,10 +1479,12 @@ static void ext3_orphan_cleanup (struct super_block * sb, } if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { - if (es->s_last_orphan) + /* don't clear list on RO mount w/ errors */ + if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { jbd_debug(1, "Errors on filesystem, " "clearing orphan list.\n"); - es->s_last_orphan = 0; + es->s_last_orphan = 0; + } jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); return; } From 378b8e1ad18e7c97832aa3771e295153c4cd2a55 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 31 Aug 2012 12:49:07 -0400 Subject: [PATCH 3/6] udf: add writepages support for udf Use mpage_writepages() instead of multiple calls to udf_writepage() to make performance higher. *Write Speed with writepage() = RecSize ReadSpeed WriteSpeed RanReadSpeed RanWriteSpeed 10485760 0.00MB/sec 8.56MB/sec 0.00MB/sec 8.20MB/sec 1048576 0.00MB/sec 8.57MB/sec 0.00MB/sec 6.42MB/sec 524288 0.00MB/sec 8.59MB/sec 0.00MB/sec 5.24MB/sec 262144 0.00MB/sec 8.59MB/sec 0.00MB/sec 4.17MB/sec 131072 0.00MB/sec 8.53MB/sec 0.00MB/sec 3.32MB/sec 65536 0.00MB/sec 8.49MB/sec 0.00MB/sec 2.31MB/sec *Write Speed with writepages() RecSize ReadSpeed WriteSpeed RanReadSpeed RanWriteSpeed 10485760 0.00MB/sec 9.88MB/sec 0.00MB/sec 9.60MB/sec 1048576 0.00MB/sec 9.95MB/sec 0.00MB/sec 7.52MB/sec 524288 0.00MB/sec 9.98MB/sec 0.00MB/sec 6.16MB/sec 262144 0.00MB/sec 9.90MB/sec 0.00MB/sec 4.98MB/sec 131072 0.00MB/sec 9.89MB/sec 0.00MB/sec 3.78MB/sec 65536 0.00MB/sec 9.81MB/sec 0.00MB/sec 2.50MB/sec There is about 1.4MB/sec speed improvement over 8.5MB/sec, which comes out around 16% improvement. Signed-off-by: Namjae Jeon Signed-off-by: Ashish Sangwan Signed-off-by: Jan Kara --- fs/udf/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index aa233469b3c1..1a0588e77a2f 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -100,6 +100,12 @@ static int udf_writepage(struct page *page, struct writeback_control *wbc) return block_write_full_page(page, udf_get_block, wbc); } +static int udf_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + return mpage_writepages(mapping, wbc, udf_get_block); +} + static int udf_readpage(struct file *file, struct page *page) { return mpage_readpage(page, udf_get_block); @@ -145,6 +151,7 @@ const struct address_space_operations udf_aops = { .readpage = udf_readpage, .readpages = udf_readpages, .writepage = udf_writepage, + .writepages = udf_writepages, .write_begin = udf_write_begin, .write_end = generic_write_end, .bmap = udf_bmap, From 76f59b3bf5af41c2fb60f05f5d5ec51e8138e996 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Tue, 4 Sep 2012 13:42:00 +0530 Subject: [PATCH 4/6] ext3: Replace 0 with NULL for pointer in super.c file Fixes the following sparse warning: fs/ext3/super.c:983:45: warning: Using plain integer as NULL pointer Signed-off-by: Sachin Kamat Signed-off-by: Jan Kara --- fs/ext3/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index d90ad0574705..504fb3f51420 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -975,7 +975,7 @@ static int parse_options (char *options, struct super_block *sb, * Initialize args struct so we know whether arg was * found; some options take optional arguments. */ - args[0].to = args[0].from = 0; + args[0].to = args[0].from = NULL; token = match_token(p, tokens, args); switch (token) { case Opt_bsd_df: From 5eec54fcde7e065eb3d8a6e70e61d90673ca706b Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 5 Sep 2012 17:44:31 +0100 Subject: [PATCH 5/6] UDF: Add support for O_DIRECT Add support for the O_DIRECT flag. There are two cases to deal with: 1. Small files stored in the ICB (inode control block?): just return 0 from the new udf_adinicb_direct_IO() handler to fall back to buffered I/O. 2. Larger files, not stored in the ICB: nothing special here. Just call blockdev_direct_IO() from our new udf_direct_IO() handler and tidy up any blocks instantiated outside i_size on error. This is pretty standard. Factor error handling code out of udf_write_begin() into new function udf_write_failed() so it can also be called by udf_direct_IO(). Also change the whitespace in udf_aops to make it a bit neater. Signed-off-by: Ian Abbott Signed-off-by: Jan Kara --- arch/um/os-Linux/time.c | 2 +- fs/udf/file.c | 9 ++++++++ fs/udf/inode.c | 50 ++++++++++++++++++++++++++++------------- 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index f60238559af3..0748fe0c8a73 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -114,7 +114,7 @@ static void deliver_alarm(void) skew += this_tick - last_tick; while (skew >= one_tick) { - alarm_handler(SIGVTALRM, NULL); + alarm_handler(SIGVTALRM, NULL, NULL); skew -= one_tick; } diff --git a/fs/udf/file.c b/fs/udf/file.c index d1c6093fd3d3..77b5953eaac8 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -118,11 +118,20 @@ static int udf_adinicb_write_end(struct file *file, return simple_write_end(file, mapping, pos, len, copied, page, fsdata); } +static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + /* Fallback to buffered I/O. */ + return 0; +} + const struct address_space_operations udf_adinicb_aops = { .readpage = udf_adinicb_readpage, .writepage = udf_adinicb_writepage, .write_begin = udf_adinicb_write_begin, .write_end = udf_adinicb_write_end, + .direct_IO = udf_adinicb_direct_IO, }; static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 1a0588e77a2f..41d58309d6a8 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -95,6 +95,22 @@ void udf_evict_inode(struct inode *inode) } } +static void udf_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + struct udf_inode_info *iinfo = UDF_I(inode); + loff_t isize = inode->i_size; + + if (to > isize) { + truncate_pagecache(inode, to, isize); + if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { + down_write(&iinfo->i_data_sem); + udf_truncate_extents(inode); + up_write(&iinfo->i_data_sem); + } + } +} + static int udf_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, udf_get_block, wbc); @@ -124,21 +140,24 @@ static int udf_write_begin(struct file *file, struct address_space *mapping, int ret; ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block); - if (unlikely(ret)) { - struct inode *inode = mapping->host; - struct udf_inode_info *iinfo = UDF_I(inode); - loff_t isize = inode->i_size; + if (unlikely(ret)) + udf_write_failed(mapping, pos + len); + return ret; +} - if (pos + len > isize) { - truncate_pagecache(inode, pos + len, isize); - if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { - down_write(&iinfo->i_data_sem); - udf_truncate_extents(inode); - up_write(&iinfo->i_data_sem); - } - } - } +static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + ssize_t ret; + ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + udf_get_block); + if (unlikely(ret < 0 && (rw & WRITE))) + udf_write_failed(mapping, offset + iov_length(iov, nr_segs)); return ret; } @@ -152,8 +171,9 @@ const struct address_space_operations udf_aops = { .readpages = udf_readpages, .writepage = udf_writepage, .writepages = udf_writepages, - .write_begin = udf_write_begin, - .write_end = generic_write_end, + .write_begin = udf_write_begin, + .write_end = generic_write_end, + .direct_IO = udf_direct_IO, .bmap = udf_bmap, }; From 09e05d4805e6c524c1af74e524e5d0528bb3fef3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 11 Jul 2012 23:16:25 +0200 Subject: [PATCH 6/6] jbd: Fix assertion failure in commit code due to lacking transaction credits ext3 users of data=journal mode with blocksize < pagesize were occasionally hitting assertion failure in journal_commit_transaction() checking whether the transaction has at least as many credits reserved as buffers attached. The core of the problem is that when a file gets truncated, buffers that still need checkpointing or that are attached to the committing transaction are left with buffer_mapped set. When this happens to buffers beyond i_size attached to a page stradding i_size, subsequent write extending the file will see these buffers and as they are mapped (but underlying blocks were freed) things go awry from here. The assertion failure just coincidentally (and in this case luckily as we would start corrupting filesystem) triggers due to journal_head not being properly cleaned up as well. Under some rare circumstances this bug could even hit data=ordered mode users. There the assertion won't trigger and we would end up corrupting the filesystem. We fix the problem by unmapping buffers if possible (in lots of cases we just need a buffer attached to a transaction as a place holder but it must not be written out anyway). And in one case, we just have to bite the bullet and wait for transaction commit to finish. Reviewed-by: Josef Bacik Signed-off-by: Jan Kara --- fs/jbd/commit.c | 45 +++++++++++++++++++++++-------- fs/jbd/transaction.c | 64 ++++++++++++++++++++++++++++++-------------- 2 files changed, 78 insertions(+), 31 deletions(-) diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 52c15c776029..86b39b167c23 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -86,7 +86,12 @@ nope: static void release_data_buffer(struct buffer_head *bh) { if (buffer_freed(bh)) { + WARN_ON_ONCE(buffer_dirty(bh)); clear_buffer_freed(bh); + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; release_buffer_page(bh); } else put_bh(bh); @@ -866,17 +871,35 @@ restart_loop: * there's no point in keeping a checkpoint record for * it. */ - /* A buffer which has been freed while still being - * journaled by a previous transaction may end up still - * being dirty here, but we want to avoid writing back - * that buffer in the future after the "add to orphan" - * operation been committed, That's not only a performance - * gain, it also stops aliasing problems if the buffer is - * left behind for writeback and gets reallocated for another - * use in a different page. */ - if (buffer_freed(bh) && !jh->b_next_transaction) { - clear_buffer_freed(bh); - clear_buffer_jbddirty(bh); + /* + * A buffer which has been freed while still being journaled by + * a previous transaction. + */ + if (buffer_freed(bh)) { + /* + * If the running transaction is the one containing + * "add to orphan" operation (b_next_transaction != + * NULL), we have to wait for that transaction to + * commit before we can really get rid of the buffer. + * So just clear b_modified to not confuse transaction + * credit accounting and refile the buffer to + * BJ_Forget of the running transaction. If the just + * committed transaction contains "add to orphan" + * operation, we can completely invalidate the buffer + * now. We are rather throughout in that since the + * buffer may be still accessible when blocksize < + * pagesize and it is attached to the last partial + * page. + */ + jh->b_modified = 0; + if (!jh->b_next_transaction) { + clear_buffer_freed(bh); + clear_buffer_jbddirty(bh); + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; + } } if (buffer_jbddirty(bh)) { diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index febc10db5ced..78b7f84241d4 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1843,15 +1843,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) * We're outside-transaction here. Either or both of j_running_transaction * and j_committing_transaction may be NULL. */ -static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) +static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, + int partial_page) { transaction_t *transaction; struct journal_head *jh; int may_free = 1; - int ret; BUFFER_TRACE(bh, "entry"); +retry: /* * It is safe to proceed here without the j_list_lock because the * buffers cannot be stolen by try_to_free_buffers as long as we are @@ -1879,10 +1880,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * clear the buffer dirty bit at latest at the moment when the * transaction marking the buffer as freed in the filesystem * structures is committed because from that moment on the - * buffer can be reallocated and used by a different page. + * block can be reallocated and used by a different page. * Since the block hasn't been freed yet but the inode has * already been added to orphan list, it is safe for us to add * the buffer to BJ_Forget list of the newest transaction. + * + * Also we have to clear buffer_mapped flag of a truncated buffer + * because the buffer_head may be attached to the page straddling + * i_size (can happen only when blocksize < pagesize) and thus the + * buffer_head can be reused when the file is extended again. So we end + * up keeping around invalidated buffers attached to transactions' + * BJ_Forget list just to stop checkpointing code from cleaning up + * the transaction this buffer was modified in. */ transaction = jh->b_transaction; if (transaction == NULL) { @@ -1909,13 +1918,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * committed, the buffer won't be needed any * longer. */ JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); - ret = __dispose_buffer(jh, + may_free = __dispose_buffer(jh, journal->j_running_transaction); - journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); - return ret; + goto zap_buffer; } else { /* There is no currently-running transaction. So the * orphan record which we wrote for this file must have @@ -1923,13 +1928,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * the committing transaction, if it exists. */ if (journal->j_committing_transaction) { JBUFFER_TRACE(jh, "give to committing trans"); - ret = __dispose_buffer(jh, + may_free = __dispose_buffer(jh, journal->j_committing_transaction); - journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); - return ret; + goto zap_buffer; } else { /* The orphan record's transaction has * committed. We can cleanse this buffer */ @@ -1950,10 +1951,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } /* * The buffer is committing, we simply cannot touch - * it. So we just set j_next_transaction to the - * running transaction (if there is one) and mark - * buffer as freed so that commit code knows it should - * clear dirty bits when it is done with the buffer. + * it. If the page is straddling i_size we have to wait + * for commit and try again. + */ + if (partial_page) { + tid_t tid = journal->j_committing_transaction->t_tid; + + journal_put_journal_head(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + spin_unlock(&journal->j_state_lock); + log_wait_commit(journal, tid); + goto retry; + } + /* + * OK, buffer won't be reachable after truncate. We just set + * j_next_transaction to the running transaction (if there is + * one) and mark buffer as freed so that commit code knows it + * should clear dirty bits when it is done with the buffer. */ set_buffer_freed(bh); if (journal->j_running_transaction && buffer_jbddirty(bh)) @@ -1976,6 +1991,14 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } zap_buffer: + /* + * This is tricky. Although the buffer is truncated, it may be reused + * if blocksize < pagesize and it is attached to the page straddling + * EOF. Since the buffer might have been added to BJ_Forget list of the + * running transaction, journal_get_write_access() won't clear + * b_modified and credit accounting gets confused. So clear b_modified + * here. */ + jh->b_modified = 0; journal_put_journal_head(jh); zap_buffer_no_jh: spin_unlock(&journal->j_list_lock); @@ -2024,7 +2047,8 @@ void journal_invalidatepage(journal_t *journal, if (offset <= curr_off) { /* This block is wholly outside the truncation point */ lock_buffer(bh); - may_free &= journal_unmap_buffer(journal, bh); + may_free &= journal_unmap_buffer(journal, bh, + offset > 0); unlock_buffer(bh); } curr_off = next_off;