From 9a7f143ab529352ebef13d3f0f4a09f13efa9435 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Fri, 15 Nov 2013 10:42:51 +0900 Subject: [PATCH 01/95] f2fs: introduce __find_rev_next(_zero)_bit When f2fs_set_bit is used, in a byte MSB and LSB is reversed, in that case we can use __find_rev_next_bit or __find_rev_next_zero_bit. Signed-off-by: Changman Lee [Jaegeuk Kim: change the function names] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 148 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fa284d397199..aa1d30d76719 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -20,6 +20,154 @@ #include "node.h" #include +#define __reverse_ffz(x) __reverse_ffs(~(x)) + +/* + * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since + * MSB and LSB are reversed in a byte by f2fs_set_bit. + */ +static inline unsigned long __reverse_ffs(unsigned long word) +{ + int num = 0; + +#if BITS_PER_LONG == 64 + if ((word & 0xffffffff) == 0) { + num += 32; + word >>= 32; + } +#endif + if ((word & 0xffff) == 0) { + num += 16; + word >>= 16; + } + if ((word & 0xff) == 0) { + num += 8; + word >>= 8; + } + if ((word & 0xf0) == 0) + num += 4; + else + word >>= 4; + if ((word & 0xc) == 0) + num += 2; + else + word >>= 2; + if ((word & 0x2) == 0) + num += 1; + return num; +} + +/* + * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue + * f2fs_set_bit makes MSB and LSB reversed in a byte. + * Example: + * LSB <--> MSB + * f2fs_set_bit(0, bitmap) => 0000 0001 + * f2fs_set_bit(7, bitmap) => 1000 0000 + */ +static unsigned long __find_rev_next_bit(const unsigned long *addr, + unsigned long size, unsigned long offset) +{ + const unsigned long *p = addr + BIT_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG - 1); + unsigned long tmp; + unsigned long mask, submask; + unsigned long quot, rest; + + if (offset >= size) + return size; + + size -= result; + offset %= BITS_PER_LONG; + if (!offset) + goto aligned; + + tmp = *(p++); + quot = (offset >> 3) << 3; + rest = offset & 0x7; + mask = ~0UL << quot; + submask = (unsigned char)(0xff << rest) >> rest; + submask <<= quot; + mask &= submask; + tmp &= mask; + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + + size -= BITS_PER_LONG; + result += BITS_PER_LONG; +aligned: + while (size & ~(BITS_PER_LONG-1)) { + tmp = *(p++); + if (tmp) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + __reverse_ffs(tmp); +} + +static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, + unsigned long size, unsigned long offset) +{ + const unsigned long *p = addr + BIT_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG - 1); + unsigned long tmp; + unsigned long mask, submask; + unsigned long quot, rest; + + if (offset >= size) + return size; + + size -= result; + offset %= BITS_PER_LONG; + if (!offset) + goto aligned; + + tmp = *(p++); + quot = (offset >> 3) << 3; + rest = offset & 0x7; + mask = ~(~0UL << quot); + submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest); + submask <<= quot; + mask += submask; + tmp |= mask; + if (size < BITS_PER_LONG) + goto found_first; + if (~tmp) + goto found_middle; + + size -= BITS_PER_LONG; + result += BITS_PER_LONG; +aligned: + while (size & ~(BITS_PER_LONG - 1)) { + tmp = *(p++); + if (~tmp) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp |= ~0UL << size; + if (tmp == ~0UL) /* Are any bits zero? */ + return result + size; /* Nope. */ +found_middle: + return result + __reverse_ffz(tmp); +} + /* * This function balances dirty node and dentry pages. * In addition, it controls garbage collection. From e81c93cf8c7bd413898798cf8c67f18b1fef3360 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Fri, 15 Nov 2013 13:21:16 +0900 Subject: [PATCH 02/95] f2fs: improve searching speed of __next_free_blkoff To find a zero bit using the result of OR operation between ckpt_valid_map and cur_valid_map is more fast than find a zero bit in each bitmap. Signed-off-by: Changman Lee [Jaegeuk Kim: adjust changed function name] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index aa1d30d76719..67f1e5bbdac9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -607,13 +607,18 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi, struct curseg_info *seg, block_t start) { struct seg_entry *se = get_seg_entry(sbi, seg->segno); - block_t ofs; - for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) { - if (!f2fs_test_bit(ofs, se->ckpt_valid_map) - && !f2fs_test_bit(ofs, se->cur_valid_map)) - break; - } - seg->next_blkoff = ofs; + int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); + unsigned long target_map[entries]; + unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; + unsigned long *cur_map = (unsigned long *)se->cur_valid_map; + int i, pos; + + for (i = 0; i < entries; i++) + target_map[i] = ckpt_map[i] | cur_map[i]; + + pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); + + seg->next_blkoff = pos; } /* From 7fd9e544fbb10c6ae4b4953f6063560c8eeae6e8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 15 Nov 2013 13:55:58 +0900 Subject: [PATCH 03/95] f2fs: add a slab cache entry for small discards This patch adds a slab cache entry for small discards. Each entry consists of: struct discard_entry { struct list_head list; /* list head */ block_t blkaddr; /* block address to be discarded */ int len; /* # of consecutive blocks of the discard */ }; Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 14 ++++++++++++++ fs/f2fs/segment.c | 20 ++++++++++++++++++++ fs/f2fs/super.c | 7 ++++++- 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 89dc7508faf2..c73e3dff6f13 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -97,6 +97,13 @@ struct dir_inode_entry { struct inode *inode; /* vfs inode pointer */ }; +/* for the list of blockaddresses to be discarded */ +struct discard_entry { + struct list_head list; /* list head */ + block_t blkaddr; /* block address to be discarded */ + int len; /* # of consecutive blocks of the discard */ +}; + /* for the list of fsync inodes, used only during recovery */ struct fsync_inode_entry { struct list_head list; /* list head */ @@ -308,6 +315,11 @@ struct f2fs_sm_info { /* a threshold to reclaim prefree segments */ unsigned int rec_prefree_segments; + + /* for small discard management */ + struct list_head discard_list; /* 4KB discard list */ + int nr_discards; /* # of discards in the list */ + int max_discards; /* max. discards to be issued */ }; /* @@ -1079,6 +1091,8 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *, void flush_sit_entries(struct f2fs_sb_info *); int build_segment_manager(struct f2fs_sb_info *); void destroy_segment_manager(struct f2fs_sb_info *); +int __init create_segment_manager_caches(void); +void destroy_segment_manager_caches(void); /* * checkpoint.c diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 67f1e5bbdac9..823526ec5243 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -22,6 +22,8 @@ #define __reverse_ffz(x) __reverse_ffs(~(x)) +static struct kmem_cache *discard_entry_slab; + /* * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since * MSB and LSB are reversed in a byte by f2fs_set_bit. @@ -1798,6 +1800,10 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS; + INIT_LIST_HEAD(&sm_info->discard_list); + sm_info->nr_discards = 0; + sm_info->max_discards = 0; + err = build_sit_info(sbi); if (err) return err; @@ -1913,3 +1919,17 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi) sbi->sm_info = NULL; kfree(sm_info); } + +int __init create_segment_manager_caches(void) +{ + discard_entry_slab = f2fs_kmem_cache_create("discard_entry", + sizeof(struct discard_entry), NULL); + if (!discard_entry_slab) + return -ENOMEM; + return 0; +} + +void destroy_segment_manager_caches(void) +{ + kmem_cache_destroy(discard_entry_slab); +} diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bafff72de8e8..e9aa3f79f425 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1078,9 +1078,12 @@ static int __init init_f2fs_fs(void) err = create_node_manager_caches(); if (err) goto free_inodecache; - err = create_gc_caches(); + err = create_segment_manager_caches(); if (err) goto free_node_manager_caches; + err = create_gc_caches(); + if (err) + goto free_segment_manager_caches; err = create_checkpoint_caches(); if (err) goto free_gc_caches; @@ -1102,6 +1105,8 @@ free_checkpoint_caches: destroy_checkpoint_caches(); free_gc_caches: destroy_gc_caches(); +free_segment_manager_caches: + destroy_segment_manager_caches(); free_node_manager_caches: destroy_node_manager_caches(); free_inodecache: From b29555505d81e496fdbd125190c8043f3c09a83c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 Nov 2013 14:49:56 +0900 Subject: [PATCH 04/95] f2fs: add key functions for small discards This patch adds key functions to activate the small discard feature. Note that this procedure is conducted during the checkpoint only. In flush_sit_entries(), when a new dirty sit entry is flushed, f2fs calls add_discard_addrs() which searches candidates to be discarded. The candidates should be marked *invalidated* and also previous checkpoint recognizes it as *valid*. At the end of a checkpoint procedure, f2fs throws discards based on the discard entry list. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 823526ec5243..505a8894cfa1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -266,6 +266,47 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } +static void add_discard_addrs(struct f2fs_sb_info *sbi, + unsigned int segno, struct seg_entry *se) +{ + struct list_head *head = &SM_I(sbi)->discard_list; + struct discard_entry *new; + int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); + int max_blocks = sbi->blocks_per_seg; + unsigned long *cur_map = (unsigned long *)se->cur_valid_map; + unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; + unsigned long dmap[entries]; + unsigned int start = 0, end = -1; + int i; + + if (!test_opt(sbi, DISCARD)) + return; + + /* zero block will be discarded through the prefree list */ + if (!se->valid_blocks || se->valid_blocks == max_blocks) + return; + + /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ + for (i = 0; i < entries; i++) + dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; + + while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { + start = __find_rev_next_bit(dmap, max_blocks, end + 1); + if (start >= max_blocks) + break; + + end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); + + new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); + INIT_LIST_HEAD(&new->list); + new->blkaddr = START_BLOCK(sbi, segno) + start; + new->len = end - start; + + list_add_tail(&new->list, head); + SM_I(sbi)->nr_discards += end - start; + } +} + /* * Should call clear_prefree_segments after checkpoint is done. */ @@ -288,6 +329,9 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) void clear_prefree_segments(struct f2fs_sb_info *sbi) { + struct list_head *head = &(SM_I(sbi)->discard_list); + struct list_head *this, *next; + struct discard_entry *entry; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int total_segs = TOTAL_SEGS(sbi); @@ -318,6 +362,18 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi) GFP_NOFS, 0); } mutex_unlock(&dirty_i->seglist_lock); + + /* send small discards */ + list_for_each_safe(this, next, head) { + entry = list_entry(this, struct discard_entry, list); + blkdev_issue_discard(sbi->sb->s_bdev, + entry->blkaddr << sbi->log_sectors_per_block, + (1 << sbi->log_sectors_per_block) * entry->len, + GFP_NOFS, 0); + list_del(&entry->list); + SM_I(sbi)->nr_discards -= entry->len; + kmem_cache_free(discard_entry_slab, entry); + } } static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) @@ -1469,6 +1525,10 @@ void flush_sit_entries(struct f2fs_sb_info *sbi) sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); + /* add discard candidates */ + if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) + add_discard_addrs(sbi, segno, se); + if (flushed) goto to_sit_page; From 7ac8c3b051ed3a3e71df72a7ffc44e14cc5c2bae Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 Nov 2013 15:00:38 +0900 Subject: [PATCH 05/95] f2fs: add a sysfs entry to control max_discards If frequent small discards are issued to the device, the performance would be degraded significantly. So, this patch adds a sysfs entry to control the number of discards to be issued during a checkpoint procedure. By default, f2fs does not issue any small discards, which means max_discards is zero. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e9aa3f79f425..a022412c66ea 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -175,6 +175,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -183,6 +184,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_no_gc_sleep_time), ATTR_LIST(gc_idle), ATTR_LIST(reclaim_segments), + ATTR_LIST(max_small_discards), NULL, }; From 3720887910864467a61cd0d64bad3965009cdef8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 Nov 2013 16:55:17 +0900 Subject: [PATCH 06/95] f2fs: introduce f2fs_issue_discard() to clean up Change log from v1: o fix 32bit drops reported by Dan Carpenter This patch adds f2fs_issue_discard() to clean up blkdev_issue_discard() flows. Dan carpenter reported: "block_t is a 32 bit type and sector_t is a 64 bit type. The upper 32 bits of the sector_t are not used because the shift will wrap." Bug-Reported-by: Dan Carpenter Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 505a8894cfa1..c51fa4bee60b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -266,6 +266,14 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } +static void f2fs_issue_discard(struct f2fs_sb_info *sbi, + block_t blkstart, block_t blklen) +{ + sector_t start = ((sector_t)blkstart) << sbi->log_sectors_per_block; + sector_t len = ((sector_t)blklen) << sbi->log_sectors_per_block; + blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); +} + static void add_discard_addrs(struct f2fs_sb_info *sbi, unsigned int segno, struct seg_entry *se) { @@ -354,22 +362,15 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi) if (!test_opt(sbi, DISCARD)) continue; - blkdev_issue_discard(sbi->sb->s_bdev, - START_BLOCK(sbi, start) << - sbi->log_sectors_per_block, - (1 << (sbi->log_sectors_per_block + - sbi->log_blocks_per_seg)) * (end - start), - GFP_NOFS, 0); + f2fs_issue_discard(sbi, START_BLOCK(sbi, start), + (end - start) << sbi->log_blocks_per_seg); } mutex_unlock(&dirty_i->seglist_lock); /* send small discards */ list_for_each_safe(this, next, head) { entry = list_entry(this, struct discard_entry, list); - blkdev_issue_discard(sbi->sb->s_bdev, - entry->blkaddr << sbi->log_sectors_per_block, - (1 << sbi->log_sectors_per_block) * entry->len, - GFP_NOFS, 0); + f2fs_issue_discard(sbi, entry->blkaddr, entry->len); list_del(&entry->list); SM_I(sbi)->nr_discards -= entry->len; kmem_cache_free(discard_entry_slab, entry); From 1661d07c2d5e6486cab1c189cfb65ff60abf9b92 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 Nov 2013 17:01:00 +0900 Subject: [PATCH 07/95] f2fs: add a tracepoint for f2fs_issue_discard This patch adds a tracepoint for f2fs_issue_discard. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 + include/trace/events/f2fs.h | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c51fa4bee60b..cfc0eb4f24d5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -272,6 +272,7 @@ static void f2fs_issue_discard(struct f2fs_sb_info *sbi, sector_t start = ((sector_t)blkstart) << sbi->log_sectors_per_block; sector_t len = ((sector_t)blklen) << sbi->log_sectors_per_block; blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); + trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); } static void add_discard_addrs(struct f2fs_sb_info *sbi, diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index e0dc355fa317..47ee70de7fed 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -727,6 +727,29 @@ TRACE_EVENT(f2fs_write_checkpoint, __entry->msg) ); +TRACE_EVENT(f2fs_issue_discard, + + TP_PROTO(struct super_block *sb, block_t blkstart, block_t blklen), + + TP_ARGS(sb, blkstart, blklen), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(block_t, blkstart) + __field(block_t, blklen) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->blkstart = blkstart; + __entry->blklen = blklen; + ), + + TP_printk("dev = (%d,%d), blkstart = 0x%llx, blklen = 0x%llx", + show_dev(__entry), + (unsigned long long)__entry->blkstart, + (unsigned long long)__entry->blklen) +); #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ From 75c3c8bc88d573d6823fcf2576f5c47e4fc41710 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 16 Nov 2013 14:15:59 +0800 Subject: [PATCH 08/95] f2fs: use f2fs_put_page to release page for uniform style We should use f2fs_put_page to release page for uniform style of f2fs code. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index aa3438c571fa..076a60cb8dbb 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -714,8 +714,7 @@ static int f2fs_write_end(struct file *file, update_inode_page(inode); } - unlock_page(page); - page_cache_release(page); + f2fs_put_page(page, 1); return copied; } From 7d5e510944ce60ef0c6c2300a58547679df76db7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 18 Nov 2013 17:13:35 +0900 Subject: [PATCH 09/95] f2fs: clean up the do_submit_bio flow This patch introduces PAGE_TYPE_OF_BIO() and cleans up do_submit_bio() with it. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 39 +++++++++++++++++++++------------------ 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c73e3dff6f13..38963b4b500a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -350,6 +350,7 @@ enum count_type { * with waiting the bio's completion * ... Only can be used with META. */ +#define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type)) enum page_type { DATA, NODE, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index cfc0eb4f24d5..d5043bdaa1af 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -835,32 +835,35 @@ static void do_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync) { int rw = sync ? WRITE_SYNC : WRITE; - enum page_type btype = type > META ? META : type; + enum page_type btype = PAGE_TYPE_OF_BIO(type); + struct bio *bio = sbi->bio[btype]; + struct bio_private *p; + + if (!bio) + return; + + sbi->bio[btype] = NULL; if (type >= META_FLUSH) rw = WRITE_FLUSH_FUA; - if (btype == META) rw |= REQ_META; - if (sbi->bio[btype]) { - struct bio_private *p = sbi->bio[btype]->bi_private; - p->sbi = sbi; - sbi->bio[btype]->bi_end_io = f2fs_end_io_write; + p = bio->bi_private; + p->sbi = sbi; + bio->bi_end_io = f2fs_end_io_write; - trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]); + trace_f2fs_do_submit_bio(sbi->sb, btype, sync, bio); - if (type == META_FLUSH) { - DECLARE_COMPLETION_ONSTACK(wait); - p->is_sync = true; - p->wait = &wait; - submit_bio(rw, sbi->bio[btype]); - wait_for_completion(&wait); - } else { - p->is_sync = false; - submit_bio(rw, sbi->bio[btype]); - } - sbi->bio[btype] = NULL; + if (type == META_FLUSH) { + DECLARE_COMPLETION_ONSTACK(wait); + p->is_sync = true; + p->wait = &wait; + submit_bio(rw, bio); + wait_for_completion(&wait); + } else { + p->is_sync = false; + submit_bio(rw, bio); } } From 971767caf632190f77a40b4011c19948232eed75 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 18 Nov 2013 17:16:17 +0900 Subject: [PATCH 10/95] f2fs: use sbi->write_mutex for write bios This patch removes an unnecessary semaphore (i.e., sbi->bio_sem). There is no reason to use the semaphore when f2fs submits read and write IOs. Instead, let's use a write mutex and cover the sbi->bio[] by the lock. Change log from v1: o split write_mutex suggested by Chao Yu Chao described, "All DATA/NODE/META bio buffers in superblock is protected by 'sbi->write_mutex', but each bio buffer area is independent, So we should split write_mutex to three for DATA/NODE/META." Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ---- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 13 +++++++++---- fs/f2fs/super.c | 6 +++++- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 076a60cb8dbb..5920639ca377 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -383,8 +383,6 @@ int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, trace_f2fs_readpage(page, blk_addr, type); - down_read(&sbi->bio_sem); - /* Allocate a new bio */ bio = f2fs_bio_alloc(bdev, 1); @@ -394,13 +392,11 @@ int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { bio_put(bio); - up_read(&sbi->bio_sem); f2fs_put_page(page, 1); return -EFAULT; } submit_bio(type, bio); - up_read(&sbi->bio_sem); return 0; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 38963b4b500a..c2de54919725 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -374,7 +374,7 @@ struct f2fs_sb_info { struct f2fs_sm_info *sm_info; /* segment manager */ struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */ sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */ - struct rw_semaphore bio_sem; /* IO semaphore */ + struct mutex write_mutex[NR_PAGE_TYPE]; /* mutex for writing IOs */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d5043bdaa1af..68b8a2bdf6d8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -869,9 +869,14 @@ static void do_submit_bio(struct f2fs_sb_info *sbi, void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync) { - down_write(&sbi->bio_sem); + enum page_type btype = PAGE_TYPE_OF_BIO(type); + + if (!sbi->bio[btype]) + return; + + mutex_lock(&sbi->write_mutex[btype]); do_submit_bio(sbi, type, sync); - up_write(&sbi->bio_sem); + mutex_unlock(&sbi->write_mutex[btype]); } static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, @@ -882,7 +887,7 @@ static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, verify_block_addr(sbi, blk_addr); - down_write(&sbi->bio_sem); + mutex_lock(&sbi->write_mutex[type]); inc_page_count(sbi, F2FS_WRITEBACK); @@ -917,7 +922,7 @@ retry: sbi->last_block_in_bio[type] = blk_addr; - up_write(&sbi->bio_sem); + mutex_unlock(&sbi->write_mutex[type]); trace_f2fs_submit_write_page(page, blk_addr, type); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a022412c66ea..e194578b3594 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -820,6 +820,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) struct buffer_head *raw_super_buf; struct inode *root; long err = -EINVAL; + int i; /* allocate memory for f2fs-specific super block info */ sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); @@ -876,7 +877,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) mutex_init(&sbi->node_write); sbi->por_doing = false; spin_lock_init(&sbi->stat_lock); - init_rwsem(&sbi->bio_sem); + + for (i = 0; i < NR_PAGE_TYPE; i++) + mutex_init(&sbi->write_mutex[i]); + init_rwsem(&sbi->cp_rwsem); init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); From c11abd1a8075e428ecf5303359513b48193b29cd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 19 Nov 2013 10:41:54 +0900 Subject: [PATCH 11/95] f2fs: disable the extent cache ops on high fragmented files The f2fs manages an extent cache to search a number of consecutive data blocks very quickly. However it conducts unnecessary cache operations if the file is highly fragmented with no valid extent cache. In such the case, we don't need to handle the extent cache, but just can disable the cache facility. Nevertheless, this patch gives one more chance to enable the extent cache. For example, 1. create a file 2. write data sequentially which produces a large valid extent cache 3. update some data, resulting in a fragmented extent 4. if the fragmented extent is too small, then drop extent cache 5. close the file 6. open the file again 7. give another chance to make a new extent cache 8. write data sequentially again which creates another big extent cache. ... Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 22 ++++++++++++++++++---- fs/f2fs/f2fs.h | 3 +++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5920639ca377..2e54522a8061 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -71,6 +71,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, pgoff_t start_fofs, end_fofs; block_t start_blkaddr; + if (is_inode_flag_set(fi, FI_NO_EXTENT)) + return 0; + read_lock(&fi->ext.ext_lock); if (fi->ext.len == 0) { read_unlock(&fi->ext.ext_lock); @@ -109,6 +112,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) struct f2fs_inode_info *fi = F2FS_I(dn->inode); pgoff_t fofs, start_fofs, end_fofs; block_t start_blkaddr, end_blkaddr; + int need_update = true; f2fs_bug_on(blk_addr == NEW_ADDR); fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + @@ -117,6 +121,9 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) /* Update the page address in the parent node */ __set_data_blkaddr(dn, blk_addr); + if (is_inode_flag_set(fi, FI_NO_EXTENT)) + return; + write_lock(&fi->ext.ext_lock); start_fofs = fi->ext.fofs; @@ -163,14 +170,21 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) fofs - start_fofs + 1; fi->ext.len -= fofs - start_fofs + 1; } - goto end_update; + } else { + need_update = false; } - write_unlock(&fi->ext.ext_lock); - return; + /* Finally, if the extent is very fragmented, let's drop the cache. */ + if (fi->ext.len < F2FS_MIN_EXTENT_LEN) { + fi->ext.len = 0; + set_inode_flag(fi, FI_NO_EXTENT); + need_update = true; + } end_update: write_unlock(&fi->ext.ext_lock); - sync_inode_page(dn); + if (need_update) + sync_inode_page(dn); + return; } struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c2de54919725..e9038bbeee9f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -169,6 +169,8 @@ enum { #define F2FS_LINK_MAX 32000 /* maximum link count per file */ /* for in-memory extent cache entry */ +#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ + struct extent_info { rwlock_t ext_lock; /* rwlock for consistency */ unsigned int fofs; /* start offset in a file */ @@ -889,6 +891,7 @@ enum { FI_NO_ALLOC, /* should not allocate any blocks */ FI_UPDATE_DIR, /* should update inode block for consistency */ FI_DELAY_IPUT, /* used for the recovery */ + FI_NO_EXTENT, /* not to use the extent cache */ FI_INLINE_XATTR, /* used for inline xattr */ }; From 1ff7bd3bb5f7f57bc7418ee6ed12f3ae217e4e9c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 19 Nov 2013 12:47:22 +0900 Subject: [PATCH 12/95] f2fs: introduce a bio array for per-page write bios The f2fs has three bio types, NODE, DATA, and META, and manages some data structures per each bio types. The codes are a little bit messy, thus, this patch introduces a bio array which groups individual data structures as follows. struct f2fs_bio_info { struct bio *bio; /* bios to merge */ sector_t last_block_in_bio; /* last block number */ struct mutex io_mutex; /* mutex for bio */ }; struct f2fs_sb_info { ... struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ ... }; The code changes from this new data structure are trivial. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 12 +++++++++--- fs/f2fs/segment.c | 44 ++++++++++++++++++++++---------------------- fs/f2fs/super.c | 2 +- 3 files changed, 32 insertions(+), 26 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e9038bbeee9f..05f8fe1399ac 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -361,6 +361,12 @@ enum page_type { META_FLUSH, }; +struct f2fs_bio_info { + struct bio *bio; /* bios to merge */ + sector_t last_block_in_bio; /* last block number */ + struct mutex io_mutex; /* mutex for bio */ +}; + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -374,9 +380,9 @@ struct f2fs_sb_info { /* for segment-related operations */ struct f2fs_sm_info *sm_info; /* segment manager */ - struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */ - sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */ - struct mutex write_mutex[NR_PAGE_TYPE]; /* mutex for writing IOs */ + + /* for bio operations */ + struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 68b8a2bdf6d8..38c1a89f54cc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -836,65 +836,65 @@ static void do_submit_bio(struct f2fs_sb_info *sbi, { int rw = sync ? WRITE_SYNC : WRITE; enum page_type btype = PAGE_TYPE_OF_BIO(type); - struct bio *bio = sbi->bio[btype]; + struct f2fs_bio_info *io = &sbi->write_io[btype]; struct bio_private *p; - if (!bio) + if (!io->bio) return; - sbi->bio[btype] = NULL; - if (type >= META_FLUSH) rw = WRITE_FLUSH_FUA; if (btype == META) rw |= REQ_META; - p = bio->bi_private; + p = io->bio->bi_private; p->sbi = sbi; - bio->bi_end_io = f2fs_end_io_write; + io->bio->bi_end_io = f2fs_end_io_write; - trace_f2fs_do_submit_bio(sbi->sb, btype, sync, bio); + trace_f2fs_do_submit_bio(sbi->sb, btype, sync, io->bio); if (type == META_FLUSH) { DECLARE_COMPLETION_ONSTACK(wait); p->is_sync = true; p->wait = &wait; - submit_bio(rw, bio); + submit_bio(rw, io->bio); wait_for_completion(&wait); } else { p->is_sync = false; - submit_bio(rw, bio); + submit_bio(rw, io->bio); } + io->bio = NULL; } void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync) { - enum page_type btype = PAGE_TYPE_OF_BIO(type); + struct f2fs_bio_info *io = &sbi->write_io[PAGE_TYPE_OF_BIO(type)]; - if (!sbi->bio[btype]) + if (!io->bio) return; - mutex_lock(&sbi->write_mutex[btype]); + mutex_lock(&io->io_mutex); do_submit_bio(sbi, type, sync); - mutex_unlock(&sbi->write_mutex[btype]); + mutex_unlock(&io->io_mutex); } static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, block_t blk_addr, enum page_type type) { struct block_device *bdev = sbi->sb->s_bdev; + struct f2fs_bio_info *io = &sbi->write_io[type]; int bio_blocks; verify_block_addr(sbi, blk_addr); - mutex_lock(&sbi->write_mutex[type]); + mutex_lock(&io->io_mutex); inc_page_count(sbi, F2FS_WRITEBACK); - if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1) + if (io->bio && io->last_block_in_bio != blk_addr - 1) do_submit_bio(sbi, type, false); alloc_new: - if (sbi->bio[type] == NULL) { + if (io->bio == NULL) { struct bio_private *priv; retry: priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); @@ -904,9 +904,9 @@ retry: } bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); - sbi->bio[type] = f2fs_bio_alloc(bdev, bio_blocks); - sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); - sbi->bio[type]->bi_private = priv; + io->bio = f2fs_bio_alloc(bdev, bio_blocks); + io->bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + io->bio->bi_private = priv; /* * The end_io will be assigned at the sumbission phase. * Until then, let bio_add_page() merge consecutive IOs as much @@ -914,15 +914,15 @@ retry: */ } - if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) < + if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { do_submit_bio(sbi, type, false); goto alloc_new; } - sbi->last_block_in_bio[type] = blk_addr; + io->last_block_in_bio = blk_addr; - mutex_unlock(&sbi->write_mutex[type]); + mutex_unlock(&io->io_mutex); trace_f2fs_submit_write_page(page, blk_addr, type); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e194578b3594..8c677e958deb 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -879,7 +879,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) spin_lock_init(&sbi->stat_lock); for (i = 0; i < NR_PAGE_TYPE; i++) - mutex_init(&sbi->write_mutex[i]); + mutex_init(&sbi->write_io[i].io_mutex); init_rwsem(&sbi->cp_rwsem); init_waitqueue_head(&sbi->cp_wait); From 58e674d6ab8b23ff1e933a8b714b302786351bd7 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Tue, 19 Nov 2013 18:03:18 +0800 Subject: [PATCH 13/95] f2fs: convert remove_inode_page to void Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 05f8fe1399ac..707896456a42 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1049,7 +1049,7 @@ int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int truncate_inode_blocks(struct inode *, pgoff_t); int truncate_xattr_node(struct inode *, struct page *); int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); -int remove_inode_page(struct inode *); +void remove_inode_page(struct inode *); struct page *new_inode_page(struct inode *, const struct qstr *); struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); void ra_node_page(struct f2fs_sb_info *, nid_t); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4ac4150d421d..e7b6826beac4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -803,29 +803,25 @@ int truncate_xattr_node(struct inode *inode, struct page *page) * Caller should grab and release a mutex by calling mutex_lock_op() and * mutex_unlock_op(). */ -int remove_inode_page(struct inode *inode) +void remove_inode_page(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct page *page; nid_t ino = inode->i_ino; struct dnode_of_data dn; - int err; page = get_node_page(sbi, ino); if (IS_ERR(page)) - return PTR_ERR(page); + return; - err = truncate_xattr_node(inode, page); - if (err) { + if (truncate_xattr_node(inode, page)) { f2fs_put_page(page, 1); - return err; + return; } - /* 0 is possible, after f2fs_new_inode() is failed */ f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); set_new_dnode(&dn, inode, page, page, ino); truncate_node(&dn); - return 0; } struct page *new_inode_page(struct inode *inode, const struct qstr *name) From da19b0dc5080db4e9138d4f5bc47d5fa9ce81c98 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Tue, 19 Nov 2013 18:03:27 +0800 Subject: [PATCH 14/95] f2fs: convert dev_valid_block_count to void Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 707896456a42..154f07604c11 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -606,7 +606,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, return true; } -static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, +static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t count) { @@ -616,7 +616,6 @@ static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, inode->i_blocks -= count; sbi->total_valid_block_count -= (block_t)count; spin_unlock(&sbi->stat_lock); - return 0; } static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) From ef86d70994b57cf8095d436da76c6684c83ef0b2 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Tue, 19 Nov 2013 18:03:38 +0800 Subject: [PATCH 15/95] f2fs: convert inc/dec_valid_node_count to inc/dec one count Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 32 +++++++++++++++----------------- fs/f2fs/node.c | 6 +++--- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 154f07604c11..b665ce492e1f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -706,50 +706,48 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) } static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, - struct inode *inode, - unsigned int count) + struct inode *inode) { block_t valid_block_count; unsigned int valid_node_count; spin_lock(&sbi->stat_lock); - valid_block_count = sbi->total_valid_block_count + (block_t)count; - sbi->alloc_valid_block_count += (block_t)count; - valid_node_count = sbi->total_valid_node_count + count; - + valid_block_count = sbi->total_valid_block_count + 1; if (valid_block_count > sbi->user_block_count) { spin_unlock(&sbi->stat_lock); return false; } + valid_node_count = sbi->total_valid_node_count + 1; if (valid_node_count > sbi->total_node_count) { spin_unlock(&sbi->stat_lock); return false; } if (inode) - inode->i_blocks += count; - sbi->total_valid_node_count = valid_node_count; - sbi->total_valid_block_count = valid_block_count; + inode->i_blocks++; + + sbi->alloc_valid_block_count++; + sbi->total_valid_node_count++; + sbi->total_valid_block_count++; spin_unlock(&sbi->stat_lock); return true; } static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, - struct inode *inode, - unsigned int count) + struct inode *inode) { spin_lock(&sbi->stat_lock); - f2fs_bug_on(sbi->total_valid_block_count < count); - f2fs_bug_on(sbi->total_valid_node_count < count); - f2fs_bug_on(inode->i_blocks < count); + f2fs_bug_on(!sbi->total_valid_block_count); + f2fs_bug_on(!sbi->total_valid_node_count); + f2fs_bug_on(!inode->i_blocks); - inode->i_blocks -= count; - sbi->total_valid_node_count -= count; - sbi->total_valid_block_count -= (block_t)count; + inode->i_blocks--; + sbi->total_valid_node_count--; + sbi->total_valid_block_count--; spin_unlock(&sbi->stat_lock); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e7b6826beac4..b843a5b3d5ee 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -502,7 +502,7 @@ static void truncate_node(struct dnode_of_data *dn) /* Deallocate node address */ invalidate_blocks(sbi, ni.blk_addr); - dec_valid_node_count(sbi, dn->inode, 1); + dec_valid_node_count(sbi, dn->inode); set_node_addr(sbi, &ni, NULL_ADDR); if (dn->nid == dn->inode->i_ino) { @@ -851,7 +851,7 @@ struct page *new_node_page(struct dnode_of_data *dn, if (!page) return ERR_PTR(-ENOMEM); - if (!inc_valid_node_count(sbi, dn->inode, 1)) { + if (!inc_valid_node_count(sbi, dn->inode)) { err = -ENOSPC; goto fail; } @@ -1560,7 +1560,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) new_ni = old_ni; new_ni.ino = ino; - if (!inc_valid_node_count(sbi, NULL, 1)) + if (!inc_valid_node_count(sbi, NULL)) WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR); inc_valid_inode_count(sbi); From 502c6e0bcd95d45f734548e2fcf8a0de494a9095 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Tue, 19 Nov 2013 18:03:58 +0800 Subject: [PATCH 16/95] f2fs: simplify write_orphan_inodes for better readable Simplify write_orphan_inodes for better readable. Because we hold the orphan_inode_mutex, so it's safe to use list_for_each_entry instead of list_for_each_safe. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5716e5eb4e8e..247879f0e730 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -300,12 +300,13 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) { - struct list_head *head, *this, *next; + struct list_head *head; struct f2fs_orphan_block *orphan_blk = NULL; struct page *page = NULL; unsigned int nentries = 0; unsigned short index = 1; unsigned short orphan_blocks; + struct orphan_inode_entry *orphan = NULL; orphan_blocks = (unsigned short)((sbi->n_orphans + (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); @@ -314,12 +315,17 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) head = &sbi->orphan_inode_list; /* loop for each orphan inode entry and write them in Jornal block */ - list_for_each_safe(this, next, head) { - struct orphan_inode_entry *orphan; + list_for_each_entry(orphan, head, list) { + if (!page) { + page = grab_meta_page(sbi, start_blk); + orphan_blk = + (struct f2fs_orphan_block *)page_address(page); + memset(orphan_blk, 0, sizeof(*orphan_blk)); + } - orphan = list_entry(this, struct orphan_inode_entry, list); + orphan_blk->ino[nentries] = cpu_to_le32(orphan->ino); - if (nentries == F2FS_ORPHANS_PER_BLOCK) { + if (nentries++ == F2FS_ORPHANS_PER_BLOCK) { /* * an orphan block is full of 1020 entries, * then we need to flush current orphan blocks @@ -335,24 +341,16 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) nentries = 0; page = NULL; } - if (page) - goto page_exist; - - page = grab_meta_page(sbi, start_blk); - orphan_blk = (struct f2fs_orphan_block *)page_address(page); - memset(orphan_blk, 0, sizeof(*orphan_blk)); -page_exist: - orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); } - if (!page) - goto end; - orphan_blk->blk_addr = cpu_to_le16(index); - orphan_blk->blk_count = cpu_to_le16(orphan_blocks); - orphan_blk->entry_count = cpu_to_le32(nentries); - set_page_dirty(page); - f2fs_put_page(page, 1); -end: + if (page) { + orphan_blk->blk_addr = cpu_to_le16(index); + orphan_blk->blk_count = cpu_to_le16(orphan_blocks); + orphan_blk->entry_count = cpu_to_le32(nentries); + set_page_dirty(page); + f2fs_put_page(page, 1); + } + mutex_unlock(&sbi->orphan_inode_mutex); } From ce3b7d80edad7bc5ff347b9ff02f1484265b1f05 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Tue, 19 Nov 2013 18:03:47 +0800 Subject: [PATCH 17/95] f2fs: move the list_head initialization into the lock protection region Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 247879f0e730..eae8dc3e1c08 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -511,8 +511,8 @@ void add_dirty_dir_inode(struct inode *inode) void remove_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct list_head *head = &sbi->dir_inode_list; - struct list_head *this; + + struct list_head *this, *head; if (!S_ISDIR(inode->i_mode)) return; @@ -523,6 +523,7 @@ void remove_dirty_dir_inode(struct inode *inode) return; } + head = &sbi->dir_inode_list; list_for_each(this, head) { struct dir_inode_entry *entry; entry = list_entry(this, struct dir_inode_entry, list); @@ -544,11 +545,13 @@ void remove_dirty_dir_inode(struct inode *inode) struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) { - struct list_head *head = &sbi->dir_inode_list; - struct list_head *this; + + struct list_head *this, *head; struct inode *inode = NULL; spin_lock(&sbi->dir_inode_lock); + + head = &sbi->dir_inode_list; list_for_each(this, head) { struct dir_inode_entry *entry; entry = list_entry(this, struct dir_inode_entry, list); @@ -563,11 +566,13 @@ struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) { - struct list_head *head = &sbi->dir_inode_list; + struct list_head *head; struct dir_inode_entry *entry; struct inode *inode; retry: spin_lock(&sbi->dir_inode_lock); + + head = &sbi->dir_inode_list; if (list_empty(head)) { spin_unlock(&sbi->dir_inode_lock); return; From 924b720b589f91311657216c97edbb3337449270 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 20 Nov 2013 14:46:39 +0800 Subject: [PATCH 18/95] f2fs: add a new function to support for merging contiguous read For better read performance, we add a new function to support for merging contiguous read as the one for write. v1-->v2: o add declarations here as Gu Zheng suggested. o use new structure f2fs_bio_info introduced by Jaegeuk Kim. Signed-off-by: Chao Yu Acked-by: Gu Zheng --- fs/f2fs/data.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/super.c | 1 + 3 files changed, 54 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2e54522a8061..ce3cbd92585e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -414,6 +414,56 @@ int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, return 0; } +void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, int rw) +{ + struct f2fs_bio_info *io = &sbi->read_io; + + if (!io->bio) + return; + + mutex_lock(&io->io_mutex); + if (io->bio) { + submit_bio(rw, io->bio); + io->bio = NULL; + } + mutex_unlock(&io->io_mutex); +} + +void submit_read_page(struct f2fs_sb_info *sbi, struct page *page, + block_t blk_addr, int rw) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct f2fs_bio_info *io = &sbi->read_io; + int bio_blocks; + + verify_block_addr(sbi, blk_addr); + + mutex_lock(&io->io_mutex); + + if (io->bio && io->last_block_in_bio != blk_addr - 1) { + submit_bio(rw, io->bio); + io->bio = NULL; + } +alloc_new: + if (io->bio == NULL) { + bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); + io->bio = f2fs_bio_alloc(bdev, bio_blocks); + io->bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + io->bio->bi_end_io = read_end_io; + } + + if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < + PAGE_CACHE_SIZE) { + submit_bio(rw, io->bio); + io->bio = NULL; + goto alloc_new; + } + + io->last_block_in_bio = blk_addr; + + mutex_unlock(&io->io_mutex); +} + /* * This function should be used by the data read flow only where it * does not check the "create" flag that indicates block allocation. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b665ce492e1f..72718ef0d56a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -382,6 +382,7 @@ struct f2fs_sb_info { struct f2fs_sm_info *sm_info; /* segment manager */ /* for bio operations */ + struct f2fs_bio_info read_io; /* for read bios */ struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ /* for checkpoint */ @@ -1132,6 +1133,8 @@ struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); +void f2fs_submit_read_bio(struct f2fs_sb_info *, int); +void submit_read_page(struct f2fs_sb_info *, struct page *, block_t, int); int do_write_data_page(struct page *); /* diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8c677e958deb..9981b28744ed 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -878,6 +878,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->por_doing = false; spin_lock_init(&sbi->stat_lock); + mutex_init(&sbi->read_io.io_mutex); for (i = 0; i < NR_PAGE_TYPE; i++) mutex_init(&sbi->write_io[i].io_mutex); From 7107e0a9b131f46785b853388fb263306721a986 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 21 Nov 2013 13:54:23 +0900 Subject: [PATCH 19/95] f2fs: merge read IOs at ra_nat_pages() Change log from v1: o add mark_page_accessed() not to reclaim the nat pages. This patch changes the policy of submitting read bios at ra_nat_pages. Previously, f2fs submits small read bios with block plugging. But, with this patch, f2fs itself merges read bios first and then submits a large bio, which can reduce the bio handling overheads. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b843a5b3d5ee..b212599b0da4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -89,13 +89,10 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) { struct address_space *mapping = sbi->meta_inode->i_mapping; struct f2fs_nm_info *nm_i = NM_I(sbi); - struct blk_plug plug; struct page *page; pgoff_t index; int i; - blk_start_plug(&plug); - for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { if (nid >= nm_i->max_nid) nid = 0; @@ -105,15 +102,15 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) if (!page) continue; if (PageUptodate(page)) { + mark_page_accessed(page); f2fs_put_page(page, 1); continue; } - if (f2fs_readpage(sbi, page, index, READ)) - continue; - + submit_read_page(sbi, page, index, READ_SYNC); + mark_page_accessed(page); f2fs_put_page(page, 0); } - blk_finish_plug(&plug); + f2fs_submit_read_bio(sbi, READ_SYNC); } static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) From 61ae45c8803262a96f3af200489344f339992291 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Thu, 21 Nov 2013 20:04:21 +0900 Subject: [PATCH 20/95] f2fs: simplify IS_DATASEG and IS_NODESEG macro It is not efficient comparing each segment type to find node or data. Signed-off-by: Changman Lee [Jaegeuk Kim: remove unnecessary white spaces] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 269f690b4e24..38f6196493ff 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -20,13 +20,8 @@ #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) #define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) -#define IS_DATASEG(t) \ - ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \ - (t == CURSEG_WARM_DATA)) - -#define IS_NODESEG(t) \ - ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \ - (t == CURSEG_WARM_NODE)) +#define IS_DATASEG(t) (t <= CURSEG_COLD_DATA) +#define IS_NODESEG(t) (t >= CURSEG_HOT_NODE) #define IS_CURSEG(sbi, seg) \ ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ From 87b8872d5b4a8f9f61123ab913aff4f6047d8b53 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 20 Nov 2013 16:40:10 +0800 Subject: [PATCH 21/95] f2fs: adds a tracepoint for submit_read_page This patch adds a tracepoint for submit_read_page. Signed-off-by: Chao Yu [Jaegeuk Kim: integrate tracepoints of f2fs_submit_read(_write)_page] Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 + fs/f2fs/segment.c | 2 +- include/trace/events/f2fs.h | 25 +++++++++++++++++++++---- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ce3cbd92585e..cdb342eeb421 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -462,6 +462,7 @@ alloc_new: io->last_block_in_bio = blk_addr; mutex_unlock(&io->io_mutex); + trace_f2fs_submit_read_page(page, rw, META, blk_addr); } /* diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 38c1a89f54cc..d42426dc3706 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -923,7 +923,7 @@ retry: io->last_block_in_bio = blk_addr; mutex_unlock(&io->io_mutex); - trace_f2fs_submit_write_page(page, blk_addr, type); + trace_f2fs_submit_write_page(page, WRITE, type, blk_addr); } void f2fs_wait_on_page_writeback(struct page *page, diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 47ee70de7fed..73cc5f07d1ba 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -674,15 +674,16 @@ DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite, TP_ARGS(page, type) ); -TRACE_EVENT(f2fs_submit_write_page, +DECLARE_EVENT_CLASS(f2fs_io_page, - TP_PROTO(struct page *page, block_t blk_addr, int type), + TP_PROTO(struct page *page, int rw, int type, block_t blk_addr), - TP_ARGS(page, blk_addr, type), + TP_ARGS(page, rw, type, blk_addr), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) + __field(int, rw) __field(int, type) __field(pgoff_t, index) __field(block_t, block) @@ -691,18 +692,34 @@ TRACE_EVENT(f2fs_submit_write_page, TP_fast_assign( __entry->dev = page->mapping->host->i_sb->s_dev; __entry->ino = page->mapping->host->i_ino; + __entry->rw = rw; __entry->type = type; __entry->index = page->index; __entry->block = blk_addr; ), - TP_printk("dev = (%d,%d), ino = %lu, %s, index = %lu, blkaddr = 0x%llx", + TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, blkaddr = 0x%llx", show_dev_ino(__entry), + show_bio_type(__entry->rw), show_block_type(__entry->type), (unsigned long)__entry->index, (unsigned long long)__entry->block) ); +DEFINE_EVENT(f2fs_io_page, f2fs_submit_write_page, + + TP_PROTO(struct page *page, int rw, int type, block_t blk_addr), + + TP_ARGS(page, rw, type, blk_addr) +); + +DEFINE_EVENT(f2fs_io_page, f2fs_submit_read_page, + + TP_PROTO(struct page *page, int rw, int type, block_t blk_addr), + + TP_ARGS(page, rw, type, blk_addr) +); + TRACE_EVENT(f2fs_write_checkpoint, TP_PROTO(struct super_block *sb, bool is_umount, char *msg), From d4d288bc72c020d335868ce217695c4d5dfd74d0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 24 Nov 2013 12:36:42 +0900 Subject: [PATCH 22/95] f2fs: adds a tracepoint for f2fs_submit_read_bio This patch adds a tracepoint for f2fs_submit_read_bio. Signed-off-by: Chao Yu [Jaegeuk Kim: integrate tracepoints of f2fs_submit_read(_write)_bio] Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ fs/f2fs/segment.c | 5 ++-- include/trace/events/f2fs.h | 49 ++++++++++++++++++++++++------------- 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cdb342eeb421..711722018b8e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -421,6 +421,8 @@ void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, int rw) if (!io->bio) return; + trace_f2fs_submit_read_bio(sbi->sb, rw, META, io->bio); + mutex_lock(&io->io_mutex); if (io->bio) { submit_bio(rw, io->bio); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d42426dc3706..a1acaa025bde 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -844,6 +844,9 @@ static void do_submit_bio(struct f2fs_sb_info *sbi, if (type >= META_FLUSH) rw = WRITE_FLUSH_FUA; + + trace_f2fs_submit_write_bio(sbi->sb, rw, btype, io->bio); + if (btype == META) rw |= REQ_META; @@ -851,8 +854,6 @@ static void do_submit_bio(struct f2fs_sb_info *sbi, p->sbi = sbi; io->bio->bi_end_io = f2fs_end_io_write; - trace_f2fs_do_submit_bio(sbi->sb, btype, sync, io->bio); - if (type == META_FLUSH) { DECLARE_COMPLETION_ONSTACK(wait); p->is_sync = true; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 73cc5f07d1ba..1f59f5db55e5 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -18,13 +18,14 @@ #define show_bio_type(type) \ __print_symbolic(type, \ - { READ, "READ" }, \ - { READA, "READAHEAD" }, \ - { READ_SYNC, "READ_SYNC" }, \ - { WRITE, "WRITE" }, \ - { WRITE_SYNC, "WRITE_SYNC" }, \ - { WRITE_FLUSH, "WRITE_FLUSH" }, \ - { WRITE_FUA, "WRITE_FUA" }) + { READ, "READ" }, \ + { READA, "READAHEAD" }, \ + { READ_SYNC, "READ_SYNC" }, \ + { WRITE, "WRITE" }, \ + { WRITE_SYNC, "WRITE_SYNC" }, \ + { WRITE_FLUSH, "WRITE_FLUSH" }, \ + { WRITE_FUA, "WRITE_FUA" }, \ + { WRITE_FLUSH_FUA, "WRITE_FLUSH_FUA" }) #define show_data_type(type) \ __print_symbolic(type, \ @@ -598,36 +599,50 @@ TRACE_EVENT(f2fs_reserve_new_block, __entry->ofs_in_node) ); -TRACE_EVENT(f2fs_do_submit_bio, +DECLARE_EVENT_CLASS(f2fs__submit_bio, - TP_PROTO(struct super_block *sb, int btype, bool sync, struct bio *bio), + TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), - TP_ARGS(sb, btype, sync, bio), + TP_ARGS(sb, rw, type, bio), TP_STRUCT__entry( __field(dev_t, dev) - __field(int, btype) - __field(bool, sync) + __field(int, rw) + __field(int, type) __field(sector_t, sector) __field(unsigned int, size) ), TP_fast_assign( __entry->dev = sb->s_dev; - __entry->btype = btype; - __entry->sync = sync; + __entry->rw = rw; + __entry->type = type; __entry->sector = bio->bi_sector; __entry->size = bio->bi_size; ), - TP_printk("dev = (%d,%d), type = %s, io = %s, sector = %lld, size = %u", + TP_printk("dev = (%d,%d), %s, %s, sector = %lld, size = %u", show_dev(__entry), - show_block_type(__entry->btype), - __entry->sync ? "sync" : "no sync", + show_bio_type(__entry->rw), + show_block_type(__entry->type), (unsigned long long)__entry->sector, __entry->size) ); +DEFINE_EVENT(f2fs__submit_bio, f2fs_submit_write_bio, + + TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), + + TP_ARGS(sb, rw, type, bio) +); + +DEFINE_EVENT(f2fs__submit_bio, f2fs_submit_read_bio, + + TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), + + TP_ARGS(sb, rw, type, bio) +); + DECLARE_EVENT_CLASS(f2fs__page, TP_PROTO(struct page *page, int type), From 74de593af77b109f202c47e090c9e134c8882869 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 22 Nov 2013 09:09:59 +0800 Subject: [PATCH 23/95] f2fs: read contiguous sit entry pages by merging for mount performance Previously we read sit entries page one by one, this method lost the chance of reading contiguous page together. So we read pages as contiguous as possible for better mount performance. change log: o merge judgements/use 'Continue' or 'Break' instead of 'Goto' as Gu Zheng suggested. o add mark_page_accessed() before release page to delay VM reclaiming. o remove '*order' for simplification of function as Jaegeuk Kim suggested. Signed-off-by: Chao Yu [Jaegeuk Kim: fix a bug on the block address calculation] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 101 ++++++++++++++++++++++++++++++++++------------ fs/f2fs/segment.h | 2 + 2 files changed, 77 insertions(+), 26 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a1acaa025bde..6dd1dc16a9d5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "f2fs.h" #include "segment.h" @@ -1706,41 +1707,89 @@ static int build_curseg(struct f2fs_sb_info *sbi) return restore_curseg_summaries(sbi); } +static int ra_sit_pages(struct f2fs_sb_info *sbi, int start, int nrpages) +{ + struct address_space *mapping = sbi->meta_inode->i_mapping; + struct page *page; + block_t blk_addr, prev_blk_addr = 0; + int sit_blk_cnt = SIT_BLK_CNT(sbi); + int blkno = start; + + for (; blkno < start + nrpages && blkno < sit_blk_cnt; blkno++) { + + blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); + + if (blkno != start && prev_blk_addr + 1 != blk_addr) + break; + prev_blk_addr = blk_addr; +repeat: + page = grab_cache_page(mapping, blk_addr); + if (!page) { + cond_resched(); + goto repeat; + } + if (PageUptodate(page)) { + mark_page_accessed(page); + f2fs_put_page(page, 1); + continue; + } + + submit_read_page(sbi, page, blk_addr, READ_SYNC); + + mark_page_accessed(page); + f2fs_put_page(page, 0); + } + + f2fs_submit_read_bio(sbi, READ_SYNC); + return blkno - start; +} + static void build_sit_entries(struct f2fs_sb_info *sbi) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; - unsigned int start; + int sit_blk_cnt = SIT_BLK_CNT(sbi); + unsigned int i, start, end; + unsigned int readed, start_blk = 0; + int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); - for (start = 0; start < TOTAL_SEGS(sbi); start++) { - struct seg_entry *se = &sit_i->sentries[start]; - struct f2fs_sit_block *sit_blk; - struct f2fs_sit_entry sit; - struct page *page; - int i; + do { + readed = ra_sit_pages(sbi, start_blk, nrpages); - mutex_lock(&curseg->curseg_mutex); - for (i = 0; i < sits_in_cursum(sum); i++) { - if (le32_to_cpu(segno_in_journal(sum, i)) == start) { - sit = sit_in_journal(sum, i); - mutex_unlock(&curseg->curseg_mutex); - goto got_it; + start = start_blk * sit_i->sents_per_block; + end = (start_blk + readed) * sit_i->sents_per_block; + + for (; start < end && start < TOTAL_SEGS(sbi); start++) { + struct seg_entry *se = &sit_i->sentries[start]; + struct f2fs_sit_block *sit_blk; + struct f2fs_sit_entry sit; + struct page *page; + + mutex_lock(&curseg->curseg_mutex); + for (i = 0; i < sits_in_cursum(sum); i++) { + if (le32_to_cpu(segno_in_journal(sum, i)) == start) { + sit = sit_in_journal(sum, i); + mutex_unlock(&curseg->curseg_mutex); + goto got_it; + } + } + mutex_unlock(&curseg->curseg_mutex); + + page = get_current_sit_page(sbi, start); + sit_blk = (struct f2fs_sit_block *)page_address(page); + sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; + f2fs_put_page(page, 1); +got_it: + check_block_count(sbi, start, &sit); + seg_info_from_raw_sit(se, &sit); + if (sbi->segs_per_sec > 1) { + struct sec_entry *e = get_sec_entry(sbi, start); + e->valid_blocks += se->valid_blocks; } } - mutex_unlock(&curseg->curseg_mutex); - page = get_current_sit_page(sbi, start); - sit_blk = (struct f2fs_sit_block *)page_address(page); - sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; - f2fs_put_page(page, 1); -got_it: - check_block_count(sbi, start, &sit); - seg_info_from_raw_sit(se, &sit); - if (sbi->segs_per_sec > 1) { - struct sec_entry *e = get_sec_entry(sbi, start); - e->valid_blocks += se->valid_blocks; - } - } + start_blk += readed; + } while (start_blk < sit_blk_cnt); } static void init_free_segmap(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 38f6196493ff..b84dd2396665 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -78,6 +78,8 @@ (segno / SIT_ENTRY_PER_BLOCK) #define START_SEGNO(sit_i, segno) \ (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) +#define SIT_BLK_CNT(sbi) \ + ((TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) #define f2fs_bitmap_size(nr) \ (BITS_TO_LONGS(nr) * sizeof(unsigned long)) #define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) From 0daaad97dcdc5cf91d368894a0954704e9a2cf2d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 24 Nov 2013 13:50:35 +0900 Subject: [PATCH 24/95] f2fs: avoid lock debugging overhead If CONFIG_F2FS_CHECK_FS is unset, we don't need to add any debugging overhead. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 72718ef0d56a..417280aa0549 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -22,8 +22,10 @@ #ifdef CONFIG_F2FS_CHECK_FS #define f2fs_bug_on(condition) BUG_ON(condition) +#define f2fs_down_write(x, y) down_write_nest_lock(x, y) #else #define f2fs_bug_on(condition) +#define f2fs_down_write(x, y) down_write(x) #endif /* @@ -556,7 +558,7 @@ static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) { - down_write_nest_lock(&sbi->cp_rwsem, &sbi->cp_mutex); + f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex); } static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) From b600965c43f9690eb481d0c19948e109b685bde7 Mon Sep 17 00:00:00 2001 From: Huajun Li Date: Sun, 10 Nov 2013 23:13:18 +0800 Subject: [PATCH 25/95] f2fs: add a new function: f2fs_reserve_block() Add the function f2fs_reserve_block() to easily reserve new blocks, and use it to clean up more codes. Signed-off-by: Huajun Li Signed-off-by: Haicheng Li Signed-off-by: Weihong Xu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 50 +++++++++++++++++++++++--------------------------- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 40 +++++++--------------------------------- 3 files changed, 31 insertions(+), 60 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 711722018b8e..2d02cf36d806 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -64,6 +64,22 @@ int reserve_new_block(struct dnode_of_data *dn) return 0; } +int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) +{ + bool need_put = dn->inode_page ? false : true; + int err; + + err = get_dnode_of_data(dn, index, ALLOC_NODE); + if (err) + return err; + if (dn->data_blkaddr == NULL_ADDR) + err = reserve_new_block(dn); + + if (need_put) + f2fs_put_dnode(dn); + return err; +} + static int check_extent_cache(struct inode *inode, pgoff_t pgofs, struct buffer_head *bh_result) { @@ -314,19 +330,10 @@ struct page *get_new_data_page(struct inode *inode, int err; set_new_dnode(&dn, inode, npage, npage, 0); - err = get_dnode_of_data(&dn, index, ALLOC_NODE); + err = f2fs_reserve_block(&dn, index); if (err) return ERR_PTR(err); - if (dn.data_blkaddr == NULL_ADDR) { - if (reserve_new_block(&dn)) { - if (!npage) - f2fs_put_dnode(&dn); - return ERR_PTR(-ENOSPC); - } - } - if (!npage) - f2fs_put_dnode(&dn); repeat: page = grab_cache_page(mapping, index); if (!page) @@ -707,21 +714,15 @@ repeat: *pagep = page; f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, index, ALLOC_NODE); - if (err) - goto err; - - if (dn.data_blkaddr == NULL_ADDR) - err = reserve_new_block(&dn); - - f2fs_put_dnode(&dn); - if (err) - goto err; - + err = f2fs_reserve_block(&dn, index); f2fs_unlock_op(sbi); + if (err) { + f2fs_put_page(page, 1); + return err; + } + if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) return 0; @@ -754,11 +755,6 @@ out: SetPageUptodate(page); clear_cold_data(page); return 0; - -err: - f2fs_unlock_op(sbi); - f2fs_put_page(page, 1); - return err; } static int f2fs_write_end(struct file *file, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 417280aa0549..c8eb37e3b015 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1130,6 +1130,7 @@ void destroy_checkpoint_caches(void); * data.c */ int reserve_new_block(struct dnode_of_data *); +int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); void update_extent_cache(block_t, struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7d714f4972d5..1cd8e44b637f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -33,7 +33,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - block_t old_blk_addr; struct dnode_of_data dn; int err; @@ -44,24 +43,10 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, /* block allocation */ f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); - if (err) { - f2fs_unlock_op(sbi); - goto out; - } - - old_blk_addr = dn.data_blkaddr; - - if (old_blk_addr == NULL_ADDR) { - err = reserve_new_block(&dn); - if (err) { - f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); - goto out; - } - } - f2fs_put_dnode(&dn); + err = f2fs_reserve_block(&dn, page->index); f2fs_unlock_op(sbi); + if (err) + goto out; file_update_time(vma->vm_file); lock_page(page); @@ -532,22 +517,11 @@ static int expand_inode_data(struct inode *inode, loff_t offset, f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, index, ALLOC_NODE); - if (ret) { - f2fs_unlock_op(sbi); - break; - } - - if (dn.data_blkaddr == NULL_ADDR) { - ret = reserve_new_block(&dn); - if (ret) { - f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); - break; - } - } - f2fs_put_dnode(&dn); + ret = f2fs_reserve_block(&dn, index); f2fs_unlock_op(sbi); + if (ret) + break; + if (pg_start == pg_end) new_size = offset + len; From a709f4a2f22c0ebaed1d99aee63ab44ffc2ba3d0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 24 Nov 2013 14:42:23 +0900 Subject: [PATCH 26/95] f2fs: add detailed information of bio types in the tracepoints This patch inserts information of bio types in more detail. So, we can now see REQ_META and REQ_PRIO too. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- include/trace/events/f2fs.h | 22 +++++++++++++++++----- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6dd1dc16a9d5..33ab378df5bd 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -846,11 +846,11 @@ static void do_submit_bio(struct f2fs_sb_info *sbi, if (type >= META_FLUSH) rw = WRITE_FLUSH_FUA; - trace_f2fs_submit_write_bio(sbi->sb, rw, btype, io->bio); - if (btype == META) rw |= REQ_META; + trace_f2fs_submit_write_bio(sbi->sb, rw, btype, io->bio); + p = io->bio->bi_private; p->sbi = sbi; io->bio->bi_end_io = f2fs_end_io_write; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 1f59f5db55e5..204fcc3201b1 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -16,8 +16,13 @@ { META, "META" }, \ { META_FLUSH, "META_FLUSH" }) -#define show_bio_type(type) \ - __print_symbolic(type, \ +#define F2FS_BIO_MASK(t) (t & (READA | WRITE_FLUSH_FUA)) +#define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO)) + +#define show_bio_type(type) show_bio_base(type), show_bio_extra(type) + +#define show_bio_base(type) \ + __print_symbolic(F2FS_BIO_MASK(type), \ { READ, "READ" }, \ { READA, "READAHEAD" }, \ { READ_SYNC, "READ_SYNC" }, \ @@ -27,6 +32,13 @@ { WRITE_FUA, "WRITE_FUA" }, \ { WRITE_FLUSH_FUA, "WRITE_FLUSH_FUA" }) +#define show_bio_extra(type) \ + __print_symbolic(F2FS_BIO_EXTRA_MASK(type), \ + { REQ_META, "(M)" }, \ + { REQ_PRIO, "(P)" }, \ + { REQ_META | REQ_PRIO, "(MP)" }, \ + { 0, " \b" }) + #define show_data_type(type) \ __print_symbolic(type, \ { CURSEG_HOT_DATA, "Hot DATA" }, \ @@ -447,7 +459,7 @@ TRACE_EVENT_CONDITION(f2fs_readpage, ), TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " - "blkaddr = 0x%llx, bio_type = %s", + "blkaddr = 0x%llx, bio_type = %s%s", show_dev_ino(__entry), (unsigned long)__entry->index, (unsigned long long)__entry->blkaddr, @@ -621,7 +633,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio, __entry->size = bio->bi_size; ), - TP_printk("dev = (%d,%d), %s, %s, sector = %lld, size = %u", + TP_printk("dev = (%d,%d), %s%s, %s, sector = %lld, size = %u", show_dev(__entry), show_bio_type(__entry->rw), show_block_type(__entry->type), @@ -713,7 +725,7 @@ DECLARE_EVENT_CLASS(f2fs_io_page, __entry->block = blk_addr; ), - TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, blkaddr = 0x%llx", + TP_printk("dev = (%d,%d), ino = %lu, %s%s, %s, index = %lu, blkaddr = 0x%llx", show_dev_ino(__entry), show_bio_type(__entry->rw), show_block_type(__entry->type), From 03232305ff3cf44761f7ea271f7c9af5105392b9 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Sun, 24 Nov 2013 15:13:08 +0900 Subject: [PATCH 27/95] f2fs: send REQ_META or REQ_PRIO when reading meta area Let's send REQ_META or REQ_PRIO when reading meta area such as NAT/SIT etc. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/node.c | 4 ++-- fs/f2fs/segment.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index eae8dc3e1c08..7fe69ff2bfe7 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -61,7 +61,7 @@ repeat: if (PageUptodate(page)) goto out; - if (f2fs_readpage(sbi, page, index, READ_SYNC)) + if (f2fs_readpage(sbi, page, index, READ_SYNC | REQ_META | REQ_PRIO)) goto repeat; lock_page(page); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b212599b0da4..0fe9a9720b39 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -106,11 +106,11 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) f2fs_put_page(page, 1); continue; } - submit_read_page(sbi, page, index, READ_SYNC); + submit_read_page(sbi, page, index, READ_SYNC | REQ_META); mark_page_accessed(page); f2fs_put_page(page, 0); } - f2fs_submit_read_bio(sbi, READ_SYNC); + f2fs_submit_read_bio(sbi, READ_SYNC | REQ_META); } static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 33ab378df5bd..1e8371392dcd 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1734,13 +1734,13 @@ repeat: continue; } - submit_read_page(sbi, page, blk_addr, READ_SYNC); + submit_read_page(sbi, page, blk_addr, READ_SYNC | REQ_META); mark_page_accessed(page); f2fs_put_page(page, 0); } - f2fs_submit_read_bio(sbi, READ_SYNC); + f2fs_submit_read_bio(sbi, READ_SYNC | REQ_META); return blkno - start; } From 1001b3479ce96e37aed5e4fcdc3c60126e034d08 Mon Sep 17 00:00:00 2001 From: Huajun Li Date: Sun, 10 Nov 2013 23:13:16 +0800 Subject: [PATCH 28/95] f2fs: add flags and helpers to support inline data Add new inode flags F2FS_INLINE_DATA and FI_INLINE_DATA to indicate whether the inode has inline data. Inline data makes use of inode block's data indices region to save small file. Currently there are 923 data indices in an inode block. Since inline xattr has made use of the last 50 indices to save its data, there are 873 indices left which can be used for inline data. When FI_INLINE_DATA is set, the layout of inode block's indices region is like below: +-----------------+ | | Reserved. reserve_new_block() will make use of | i_addr[0] | i_addr[0] when we need to reserve a new data block | | to convert inline data into regular one's. |-----------------| | | Used by inline data. A file whose size is less than | i_addr[1~872] | 3488 bytes(~3.4k) and doesn't reserve extra | | blocks by fallocate() can be saved here. |-----------------| | | | i_addr[873~922] | Reserved for inline xattr | | +-----------------+ Signed-off-by: Haicheng Li Signed-off-by: Huajun Li Signed-off-by: Weihong Xu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 14 ++++++++++++++ include/linux/f2fs_fs.h | 8 ++++++++ 2 files changed, 22 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c8eb37e3b015..72456ec3a3e2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -39,6 +39,7 @@ #define F2FS_MOUNT_POSIX_ACL 0x00000020 #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 #define F2FS_MOUNT_INLINE_XATTR 0x00000080 +#define F2FS_MOUNT_INLINE_DATA 0x00000100 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) @@ -899,6 +900,7 @@ enum { FI_DELAY_IPUT, /* used for the recovery */ FI_NO_EXTENT, /* not to use the extent cache */ FI_INLINE_XATTR, /* used for inline xattr */ + FI_INLINE_DATA, /* used for inline data*/ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -936,6 +938,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi, { if (ri->i_inline & F2FS_INLINE_XATTR) set_inode_flag(fi, FI_INLINE_XATTR); + if (ri->i_inline & F2FS_INLINE_DATA) + set_inode_flag(fi, FI_INLINE_DATA); } static inline void set_raw_inline(struct f2fs_inode_info *fi, @@ -945,6 +949,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, if (is_inode_flag_set(fi, FI_INLINE_XATTR)) ri->i_inline |= F2FS_INLINE_XATTR; + if (is_inode_flag_set(fi, FI_INLINE_DATA)) + ri->i_inline |= F2FS_INLINE_DATA; } static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) @@ -970,6 +976,13 @@ static inline int inline_xattr_size(struct inode *inode) return 0; } +static inline void *inline_data_addr(struct page *page) +{ + struct f2fs_inode *ri; + ri = (struct f2fs_inode *)page_address(page); + return (void *)&(ri->i_addr[1]); +} + static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; @@ -1265,4 +1278,5 @@ extern const struct address_space_operations f2fs_meta_aops; extern const struct inode_operations f2fs_dir_inode_operations; extern const struct inode_operations f2fs_symlink_inode_operations; extern const struct inode_operations f2fs_special_inode_operations; + #endif diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index bb942f6d5702..aea5eedbb1ca 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -153,6 +153,14 @@ struct f2fs_extent { #define NODE_DIND_BLOCK (DEF_ADDRS_PER_INODE + 5) #define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ +#define F2FS_INLINE_DATA 0x02 /* file inline data flag */ + + +#define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ + F2FS_INLINE_XATTR_ADDRS - 1)) + +#define INLINE_DATA_OFFSET (PAGE_CACHE_SIZE - sizeof(struct node_footer) \ + - sizeof(__le32)*(DEF_ADDRS_PER_INODE + 5 - 1)) struct f2fs_inode { __le16 i_mode; /* file mode */ From 8274de77b7072d983fe4b452b981b3e520f12698 Mon Sep 17 00:00:00 2001 From: Huajun Li Date: Sun, 10 Nov 2013 23:13:17 +0800 Subject: [PATCH 29/95] f2fs: add a new mount option: inline_data Add a mount option: inline_data. If the mount option is set, data of New created small files can be stored in their inode. Signed-off-by: Huajun Li Signed-off-by: Haicheng Li Signed-off-by: Weihong Xu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9981b28744ed..43f11cae408b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -50,6 +50,7 @@ enum { Opt_active_logs, Opt_disable_ext_identify, Opt_inline_xattr, + Opt_inline_data, Opt_err, }; @@ -65,6 +66,7 @@ static match_table_t f2fs_tokens = { {Opt_active_logs, "active_logs=%u"}, {Opt_disable_ext_identify, "disable_ext_identify"}, {Opt_inline_xattr, "inline_xattr"}, + {Opt_inline_data, "inline_data"}, {Opt_err, NULL}, }; @@ -313,6 +315,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_disable_ext_identify: set_opt(sbi, DISABLE_EXT_IDENTIFY); break; + case Opt_inline_data: + set_opt(sbi, INLINE_DATA); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -510,7 +515,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) #endif if (test_opt(sbi, DISABLE_EXT_IDENTIFY)) seq_puts(seq, ",disable_ext_identify"); - + if (test_opt(sbi, INLINE_DATA)) + seq_puts(seq, ",inline_data"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; From 0e80220ac554ea55fd867dede91f0054a13cf85c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 26 Nov 2013 16:36:20 +0900 Subject: [PATCH 30/95] f2fs: remove unnecessary return value Let's remove the unnecessary return value. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 72456ec3a3e2..1f1bc58d50ed 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -773,13 +773,12 @@ static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) spin_unlock(&sbi->stat_lock); } -static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) +static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi) { spin_lock(&sbi->stat_lock); f2fs_bug_on(!sbi->total_valid_inode_count); sbi->total_valid_inode_count--; spin_unlock(&sbi->stat_lock); - return 0; } static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) From 36795567942a033ef9e22d8eba86396ffb9aa80c Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Tue, 26 Nov 2013 16:44:16 +0800 Subject: [PATCH 31/95] f2fs: fix a potential out of range issue Fix a potential out of range issue introduced by commit: 22fb72225a f2fs: simplify write_orphan_inodes for better readable Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7fe69ff2bfe7..3e62987e333a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -323,9 +323,9 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) memset(orphan_blk, 0, sizeof(*orphan_blk)); } - orphan_blk->ino[nentries] = cpu_to_le32(orphan->ino); + orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); - if (nentries++ == F2FS_ORPHANS_PER_BLOCK) { + if (nentries == F2FS_ORPHANS_PER_BLOCK) { /* * an orphan block is full of 1020 entries, * then we need to flush current orphan blocks From f9a4e6df52edf8ce1040d1b8d340d31234a1bce3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 28 Nov 2013 12:44:05 +0900 Subject: [PATCH 32/95] f2fs: bug fix on bit overflow from 32bits to 64bits This patch fixes some bit overflows by the shift operations. Dan Carpenter reported potential bugs on bit overflows as follows. fs/f2fs/segment.c:910 submit_write_page() warn: should 'blk_addr << ((sbi)->log_blocksize - 9)' be a 64 bit type? fs/f2fs/checkpoint.c:429 get_valid_checkpoint() warn: should '1 << ()' be a 64 bit type? fs/f2fs/data.c:408 f2fs_readpage() warn: should 'blk_addr << ((sbi)->log_blocksize - 9)' be a 64 bit type? fs/f2fs/data.c:457 submit_read_page() warn: should 'blk_addr << ((sbi)->log_blocksize - 9)' be a 64 bit type? fs/f2fs/data.c:525 get_data_block_ro() warn: should 'i << blkbits' be a 64 bit type? Bug-Reported-by: Dan Carpenter Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 ++- fs/f2fs/data.c | 2 +- fs/f2fs/segment.c | 4 ++-- fs/f2fs/segment.h | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3e62987e333a..21e721534965 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -426,7 +426,8 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); /* The second checkpoint pack should start at the next segment */ - cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); + cp_start_blk_no += ((unsigned long long)1) << + le32_to_cpu(fsb->log_blocks_per_seg); cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); if (cp1 && cp2) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2d02cf36d806..24f752de6a90 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -525,7 +525,7 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock, != (dn.data_blkaddr + i)) || maxblocks == i) break; map_bh(bh_result, inode->i_sb, dn.data_blkaddr); - bh_result->b_size = (i << blkbits); + bh_result->b_size = (((size_t)i) << blkbits); } f2fs_put_dnode(&dn); trace_f2fs_get_data_block(inode, iblock, bh_result, 0); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1e8371392dcd..03878634a0fe 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -270,8 +270,8 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) static void f2fs_issue_discard(struct f2fs_sb_info *sbi, block_t blkstart, block_t blklen) { - sector_t start = ((sector_t)blkstart) << sbi->log_sectors_per_block; - sector_t len = ((sector_t)blklen) << sbi->log_sectors_per_block; + sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart); + sector_t len = SECTOR_FROM_BLOCK(sbi, blklen); blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index b84dd2396665..07887e1cc704 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -86,9 +86,9 @@ #define TOTAL_SECS(sbi) (sbi->total_sections) #define SECTOR_FROM_BLOCK(sbi, blk_addr) \ - (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) + (((sector_t)blk_addr) << (sbi)->log_sectors_per_block) #define SECTOR_TO_BLOCK(sbi, sectors) \ - (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) + (sectors >> (sbi)->log_sectors_per_block) #define MAX_BIO_BLOCKS(max_hw_blocks) \ (min((int)max_hw_blocks, BIO_MAX_PAGES)) From 031fa8cc9ba45c14f440b9cf71d09950fbe5eb9b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 28 Nov 2013 12:55:13 +0900 Subject: [PATCH 33/95] f2fs: remove unnecessary condition checks This patch removes the unnecessary condition checks on: fs/f2fs/gc.c:667 do_garbage_collect() warn: 'sum_page' isn't an ERR_PTR fs/f2fs/f2fs.h:795 f2fs_put_page() warn: 'page' isn't an ERR_PTR Reported-by: Dan Carpenter Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1f1bc58d50ed..9a76c83937b1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -792,7 +792,7 @@ static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) static inline void f2fs_put_page(struct page *page, int unlock) { - if (!page || IS_ERR(page)) + if (!page) return; if (unlock) { diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index b7ad1ec7e4cc..5fa54c1ca33b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -664,8 +664,6 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, /* read segment summary of victim */ sum_page = get_sum_page(sbi, segno); - if (IS_ERR(sum_page)) - return; blk_start_plug(&plug); From a66c7b2fcfbc9ef4e972f6bc2b63d72d00f23122 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 22 Nov 2013 16:52:50 +0800 Subject: [PATCH 34/95] f2fs: remove unneeded code in punch_hole Because FALLOC_FL_PUNCH_HOLE flag must be ORed with FALLOC_FL_KEEP_SIZE in fallocate, so we could remove the useless 'keep size' branch code which will never be excuted in punch_hole. Signed-off-by: Chao Yu Signed-off-by: Fan Li [Jaegeuk Kim: remove an unnecessary parameter togather] Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1cd8e44b637f..2b47adcd852a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -444,7 +444,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) return 0; } -static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) +static int punch_hole(struct inode *inode, loff_t offset, loff_t len) { pgoff_t pg_start, pg_end; loff_t off_start, off_end; @@ -484,12 +484,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) } } - if (!(mode & FALLOC_FL_KEEP_SIZE) && - i_size_read(inode) <= (offset + len)) { - i_size_write(inode, offset); - mark_inode_dirty(inode); - } - return ret; } @@ -552,7 +546,7 @@ static long f2fs_fallocate(struct file *file, int mode, return -EOPNOTSUPP; if (mode & FALLOC_FL_PUNCH_HOLE) - ret = punch_hole(inode, offset, len, mode); + ret = punch_hole(inode, offset, len); else ret = expand_inode_data(inode, offset, len, mode); From 6947eea957e4c4c873f92a4ee5da7a6ef0012718 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 29 Nov 2013 16:37:00 +0800 Subject: [PATCH 35/95] f2fs: avoid to calculate incorrect max orphan number Because we will write node summaries when do_checkpoint with umount flag, our number of max orphan blocks should minus NR_CURSEG_NODE_TYPE additional. Signed-off-by: Chao Yu Signed-off-by: Shu Tan Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 21e721534965..c7e0572fdede 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -190,12 +190,13 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) int err = 0; /* - * considering 512 blocks in a segment 5 blocks are needed for cp + * considering 512 blocks in a segment 8 blocks are needed for cp * and log segment summaries. Remaining blocks are used to keep * orphan entries with the limitation one reserved segment - * for cp pack we can have max 1020*507 orphan entries + * for cp pack we can have max 1020*504 orphan entries */ - max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK; + max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) + * F2FS_ORPHANS_PER_BLOCK; mutex_lock(&sbi->orphan_inode_mutex); if (sbi->n_orphans >= max_orphans) err = -ENOSPC; From aac44046a2cdf70474fee8931bc8a591fd16c8dc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 28 Nov 2013 15:41:39 +0800 Subject: [PATCH 36/95] f2fs: correct type of wait in struct bio_private The void *wait in bio_private is used for waiting completion of checkpoint bio. So we don't need to use its type as void, but declare it as completion type. Signed-off-by: Chao Yu [Jaegeuk Kim: add description] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 07887e1cc704..7fea2ee8a5da 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -96,7 +96,7 @@ struct bio_private { struct f2fs_sb_info *sbi; bool is_sync; - void *wait; + struct completion *wait; }; /* From 01d2d1aa0648192fd1d49f7d74d7e8b85b1c585a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 28 Nov 2013 15:43:07 +0800 Subject: [PATCH 37/95] f2fs: use true and false for boolean variable The inode_page_locked should be a boolean variable. struct dnode_of_data { struct inode *inode; /* vfs inode pointer */ struct page *inode_page; /* its inode page, NULL is possible */ struct page *node_page; /* cached direct node page */ nid_t nid; /* node id of the direct node block */ unsigned int ofs_in_node; /* data offset in the node page */ ==> bool inode_page_locked; /* inode page is locked or not */ block_t data_blkaddr; /* block address of the node block */ }; Signed-off-by: Chao Yu [Jaegeuk Kim: add description] Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0fe9a9720b39..d0ab00334b02 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -791,7 +791,7 @@ int truncate_xattr_node(struct inode *inode, struct page *page) set_new_dnode(&dn, inode, page, npage, nid); if (page) - dn.inode_page_locked = 1; + dn.inode_page_locked = true; truncate_node(&dn); return 0; } From 1069bbf7b963d31d5532c36d43a02b0447e5bcfa Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 28 Nov 2013 15:43:43 +0800 Subject: [PATCH 38/95] f2fs: check return value of f2fs_readpage in find_data_page We should return error if we do not get an updated page in find_date_page when f2fs_readpage failed. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 24f752de6a90..c9a76f8c1028 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -240,6 +240,9 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) err = f2fs_readpage(sbi, page, dn.data_blkaddr, sync ? READ_SYNC : READA); + if (err) + return ERR_PTR(err); + if (sync) { wait_on_page_locked(page); if (!PageUptodate(page)) { From 8f99a946f360c4083d6e323e10a5928e1ce385a4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 28 Nov 2013 15:43:43 +0800 Subject: [PATCH 39/95] f2fs: convert recover_orphan_inodes to void The recover_orphan_inodes() returns no error all the time, so we don't need to check its errors. Signed-off-by: Chao Yu [Jaegeuk Kim: add description] Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 +++--- fs/f2fs/f2fs.h | 2 +- fs/f2fs/super.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index c7e0572fdede..40eea42f85ff 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -271,12 +271,12 @@ static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) iput(inode); } -int recover_orphan_inodes(struct f2fs_sb_info *sbi) +void recover_orphan_inodes(struct f2fs_sb_info *sbi) { block_t start_blk, orphan_blkaddr, i, j; if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) - return 0; + return; sbi->por_doing = true; start_blk = __start_cp_addr(sbi) + 1; @@ -296,7 +296,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) /* clear Orphan Flag */ clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); sbi->por_doing = false; - return 0; + return; } static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9a76c83937b1..ca33cda78e02 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1126,7 +1126,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *); void release_orphan_inode(struct f2fs_sb_info *); void add_orphan_inode(struct f2fs_sb_info *, nid_t); void remove_orphan_inode(struct f2fs_sb_info *, nid_t); -int recover_orphan_inodes(struct f2fs_sb_info *); +void recover_orphan_inodes(struct f2fs_sb_info *); int get_valid_checkpoint(struct f2fs_sb_info *); void set_dirty_dir_page(struct inode *, struct page *); void add_dirty_dir_inode(struct inode *); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 43f11cae408b..dd55074a304f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -952,9 +952,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) } /* if there are nt orphan nodes free them */ - err = -EINVAL; - if (recover_orphan_inodes(sbi)) - goto free_node_inode; + recover_orphan_inodes(sbi); /* read root inode and dentry */ root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); @@ -963,8 +961,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) err = PTR_ERR(root); goto free_node_inode; } - if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) + if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { + err = -EINVAL; goto free_root_inode; + } sb->s_root = d_make_root(root); /* allocate root dentry */ if (!sb->s_root) { From 187b5b8b3dfcfc73126f2743c89cc47df3bf07be Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 30 Nov 2013 10:10:31 +0900 Subject: [PATCH 40/95] f2fs: remove the own bi_private allocation Previously f2fs allocates its own bi_private data structure all the time even though we don't use it. But, can we remove this bi_private allocation? This patch removes such the additional bi_private allocation. 1. Retrieve f2fs_sb_info from its page->mapping->host->i_sb. - This removes the usecases of bi_private in end_io. 2. Use bi_private only when we really need it. - The bi_private is used only when the checkpoint procedure is conducted. - When conducting the checkpoint, f2fs submits a META_FLUSH bio to wait its bio completion. - Since we have no dependancies to remove bi_private now, let's just use bi_private pointer as the completion pointer. Reviewed-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 43 ++++++++++++++++--------------------------- fs/f2fs/segment.h | 7 ------- 2 files changed, 16 insertions(+), 34 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 03878634a0fe..0db40271f0d8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -791,7 +791,7 @@ static void f2fs_end_io_write(struct bio *bio, int err) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct bio_private *p = bio->bi_private; + struct f2fs_sb_info *sbi = F2FS_SB(bvec->bv_page->mapping->host->i_sb); do { struct page *page = bvec->bv_page; @@ -802,21 +802,21 @@ static void f2fs_end_io_write(struct bio *bio, int err) SetPageError(page); if (page->mapping) set_bit(AS_EIO, &page->mapping->flags); - set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); - p->sbi->sb->s_flags |= MS_RDONLY; + + set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); + sbi->sb->s_flags |= MS_RDONLY; } end_page_writeback(page); - dec_page_count(p->sbi, F2FS_WRITEBACK); + dec_page_count(sbi, F2FS_WRITEBACK); } while (bvec >= bio->bi_io_vec); - if (p->is_sync) - complete(p->wait); + if (bio->bi_private) + complete(bio->bi_private); - if (!get_pages(p->sbi, F2FS_WRITEBACK) && - !list_empty(&p->sbi->cp_wait.task_list)) - wake_up(&p->sbi->cp_wait); + if (!get_pages(sbi, F2FS_WRITEBACK) && + !list_empty(&sbi->cp_wait.task_list)) + wake_up(&sbi->cp_wait); - kfree(p); bio_put(bio); } @@ -838,7 +838,6 @@ static void do_submit_bio(struct f2fs_sb_info *sbi, int rw = sync ? WRITE_SYNC : WRITE; enum page_type btype = PAGE_TYPE_OF_BIO(type); struct f2fs_bio_info *io = &sbi->write_io[btype]; - struct bio_private *p; if (!io->bio) return; @@ -851,18 +850,16 @@ static void do_submit_bio(struct f2fs_sb_info *sbi, trace_f2fs_submit_write_bio(sbi->sb, rw, btype, io->bio); - p = io->bio->bi_private; - p->sbi = sbi; - io->bio->bi_end_io = f2fs_end_io_write; - + /* + * META_FLUSH is only from the checkpoint procedure, and we should wait + * this metadata bio for FS consistency. + */ if (type == META_FLUSH) { DECLARE_COMPLETION_ONSTACK(wait); - p->is_sync = true; - p->wait = &wait; + io->bio->bi_private = &wait; submit_bio(rw, io->bio); wait_for_completion(&wait); } else { - p->is_sync = false; submit_bio(rw, io->bio); } io->bio = NULL; @@ -897,18 +894,10 @@ static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, do_submit_bio(sbi, type, false); alloc_new: if (io->bio == NULL) { - struct bio_private *priv; -retry: - priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); - if (!priv) { - cond_resched(); - goto retry; - } - bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); io->bio = f2fs_bio_alloc(bdev, bio_blocks); io->bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); - io->bio->bi_private = priv; + io->bio->bi_end_io = f2fs_end_io_write; /* * The end_io will be assigned at the sumbission phase. * Until then, let bio_add_page() merge consecutive IOs as much diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 7fea2ee8a5da..26812fc0fa12 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -92,13 +92,6 @@ #define MAX_BIO_BLOCKS(max_hw_blocks) \ (min((int)max_hw_blocks, BIO_MAX_PAGES)) -/* during checkpoint, bio_private is used to synchronize the last bio */ -struct bio_private { - struct f2fs_sb_info *sbi; - bool is_sync; - struct completion *wait; -}; - /* * indicate a block allocation direction: RIGHT and LEFT. * RIGHT means allocating new sections towards the end of volume. From 93dfe2ac516250755f7d5edd438b0ce67c0e3aa6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 30 Nov 2013 12:51:14 +0900 Subject: [PATCH 41/95] f2fs: refactor bio-related operations This patch integrates redundant bio operations on read and write IOs. 1. Move bio-related codes to the top of data.c. 2. Replace f2fs_submit_bio with f2fs_submit_merged_bio, which handles read bios additionally. 3. Introduce __submit_merged_bio to submit the merged bio. 4. Change f2fs_readpage to f2fs_submit_page_bio. 5. Introduce f2fs_submit_page_mbio to integrate previous submit_read_page and submit_write_page. Reviewed-by: Gu Zheng Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 14 +- fs/f2fs/data.c | 316 +++++++++++++++++++++++------------- fs/f2fs/f2fs.h | 12 +- fs/f2fs/gc.c | 2 +- fs/f2fs/node.c | 14 +- fs/f2fs/recovery.c | 4 +- fs/f2fs/segment.c | 164 ++----------------- include/trace/events/f2fs.h | 30 ++-- 8 files changed, 257 insertions(+), 299 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 40eea42f85ff..38f4a2245085 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -61,7 +61,8 @@ repeat: if (PageUptodate(page)) goto out; - if (f2fs_readpage(sbi, page, index, READ_SYNC | REQ_META | REQ_PRIO)) + if (f2fs_submit_page_bio(sbi, page, index, + READ_SYNC | REQ_META | REQ_PRIO)) goto repeat; lock_page(page); @@ -157,7 +158,8 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, } if (nwritten) - f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX); + f2fs_submit_merged_bio(sbi, type, nr_to_write == LONG_MAX, + WRITE); return nwritten; } @@ -590,7 +592,7 @@ retry: * We should submit bio, since it exists several * wribacking dentry pages in the freeing inode. */ - f2fs_submit_bio(sbi, DATA, true); + f2fs_submit_merged_bio(sbi, DATA, true, WRITE); } goto retry; } @@ -796,9 +798,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); - f2fs_submit_bio(sbi, DATA, true); - f2fs_submit_bio(sbi, NODE, true); - f2fs_submit_bio(sbi, META, true); + f2fs_submit_merged_bio(sbi, DATA, true, WRITE); + f2fs_submit_merged_bio(sbi, NODE, true, WRITE); + f2fs_submit_merged_bio(sbi, META, true, WRITE); /* * update checkpoint pack index diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c9a76f8c1028..4e2fc09f0e4f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -24,6 +24,204 @@ #include "segment.h" #include +/* + * Low-level block read/write IO operations. + */ +static struct bio *__bio_alloc(struct block_device *bdev, int npages) +{ + struct bio *bio; + + /* No failure on bio allocation */ + bio = bio_alloc(GFP_NOIO, npages); + bio->bi_bdev = bdev; + bio->bi_private = NULL; + return bio; +} + +static void f2fs_read_end_io(struct bio *bio, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + + do { + struct page *page = bvec->bv_page; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + SetPageUptodate(page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + unlock_page(page); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +} + +static void f2fs_write_end_io(struct bio *bio, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct f2fs_sb_info *sbi = F2FS_SB(bvec->bv_page->mapping->host->i_sb); + + do { + struct page *page = bvec->bv_page; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (!uptodate) { + SetPageError(page); + set_bit(AS_EIO, &page->mapping->flags); + set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); + sbi->sb->s_flags |= MS_RDONLY; + } + end_page_writeback(page); + dec_page_count(sbi, F2FS_WRITEBACK); + } while (bvec >= bio->bi_io_vec); + + if (bio->bi_private) + complete(bio->bi_private); + + if (!get_pages(sbi, F2FS_WRITEBACK) && + !list_empty(&sbi->cp_wait.task_list)) + wake_up(&sbi->cp_wait); + + bio_put(bio); +} + +static void __submit_merged_bio(struct f2fs_sb_info *sbi, + struct f2fs_bio_info *io, + enum page_type type, bool sync, int rw) +{ + enum page_type btype = PAGE_TYPE_OF_BIO(type); + + if (!io->bio) + return; + + if (btype == META) + rw |= REQ_META; + + if (is_read_io(rw)) { + if (sync) + rw |= READ_SYNC; + submit_bio(rw, io->bio); + trace_f2fs_submit_read_bio(sbi->sb, rw, type, io->bio); + io->bio = NULL; + return; + } + + if (sync) + rw |= WRITE_SYNC; + if (type >= META_FLUSH) + rw |= WRITE_FLUSH_FUA; + + /* + * META_FLUSH is only from the checkpoint procedure, and we should wait + * this metadata bio for FS consistency. + */ + if (type == META_FLUSH) { + DECLARE_COMPLETION_ONSTACK(wait); + io->bio->bi_private = &wait; + submit_bio(rw, io->bio); + wait_for_completion(&wait); + } else { + submit_bio(rw, io->bio); + } + trace_f2fs_submit_write_bio(sbi->sb, rw, btype, io->bio); + io->bio = NULL; +} + +void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, + enum page_type type, bool sync, int rw) +{ + enum page_type btype = PAGE_TYPE_OF_BIO(type); + struct f2fs_bio_info *io; + + io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; + + mutex_lock(&io->io_mutex); + __submit_merged_bio(sbi, io, type, sync, rw); + mutex_unlock(&io->io_mutex); +} + +/* + * Fill the locked page with data located in the block address. + * Return unlocked page. + */ +int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, + block_t blk_addr, int rw) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct bio *bio; + + trace_f2fs_submit_page_bio(page, blk_addr, rw); + + /* Allocate a new bio */ + bio = __bio_alloc(bdev, 1); + + /* Initialize the bio */ + bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + bio->bi_end_io = is_read_io(rw) ? f2fs_read_end_io : f2fs_write_end_io; + + if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { + bio_put(bio); + f2fs_put_page(page, 1); + return -EFAULT; + } + + submit_bio(rw, bio); + return 0; +} + +void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, + block_t blk_addr, enum page_type type, int rw) +{ + enum page_type btype = PAGE_TYPE_OF_BIO(type); + struct block_device *bdev = sbi->sb->s_bdev; + struct f2fs_bio_info *io; + int bio_blocks; + + io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; + + verify_block_addr(sbi, blk_addr); + + mutex_lock(&io->io_mutex); + + if (!is_read_io(rw)) + inc_page_count(sbi, F2FS_WRITEBACK); + + if (io->bio && io->last_block_in_bio != blk_addr - 1) + __submit_merged_bio(sbi, io, type, true, rw); +alloc_new: + if (io->bio == NULL) { + bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); + io->bio = __bio_alloc(bdev, bio_blocks); + io->bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + io->bio->bi_end_io = is_read_io(rw) ? f2fs_read_end_io : + f2fs_write_end_io; + /* + * The end_io will be assigned at the sumbission phase. + * Until then, let bio_add_page() merge consecutive IOs as much + * as possible. + */ + } + + if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < + PAGE_CACHE_SIZE) { + __submit_merged_bio(sbi, io, type, true, rw); + goto alloc_new; + } + + io->last_block_in_bio = blk_addr; + + mutex_unlock(&io->io_mutex); + trace_f2fs_submit_page_mbio(page, rw, type, blk_addr); +} + /* * Lock ordering for the change of data block address: * ->data_page @@ -238,7 +436,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) return page; } - err = f2fs_readpage(sbi, page, dn.data_blkaddr, + err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, sync ? READ_SYNC : READA); if (err) return ERR_PTR(err); @@ -299,7 +497,7 @@ repeat: return page; } - err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC); if (err) return ERR_PTR(err); @@ -349,7 +547,8 @@ repeat: zero_user_segment(page, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); } else { - err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, + READ_SYNC); if (err) return ERR_PTR(err); lock_page(page); @@ -373,110 +572,6 @@ repeat: return page; } -static void read_end_io(struct bio *bio, int err) -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - - do { - struct page *page = bvec->bv_page; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (uptodate) { - SetPageUptodate(page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - unlock_page(page); - } while (bvec >= bio->bi_io_vec); - bio_put(bio); -} - -/* - * Fill the locked page with data located in the block address. - * Return unlocked page. - */ -int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, - block_t blk_addr, int type) -{ - struct block_device *bdev = sbi->sb->s_bdev; - struct bio *bio; - - trace_f2fs_readpage(page, blk_addr, type); - - /* Allocate a new bio */ - bio = f2fs_bio_alloc(bdev, 1); - - /* Initialize the bio */ - bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); - bio->bi_end_io = read_end_io; - - if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { - bio_put(bio); - f2fs_put_page(page, 1); - return -EFAULT; - } - - submit_bio(type, bio); - return 0; -} - -void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, int rw) -{ - struct f2fs_bio_info *io = &sbi->read_io; - - if (!io->bio) - return; - - trace_f2fs_submit_read_bio(sbi->sb, rw, META, io->bio); - - mutex_lock(&io->io_mutex); - if (io->bio) { - submit_bio(rw, io->bio); - io->bio = NULL; - } - mutex_unlock(&io->io_mutex); -} - -void submit_read_page(struct f2fs_sb_info *sbi, struct page *page, - block_t blk_addr, int rw) -{ - struct block_device *bdev = sbi->sb->s_bdev; - struct f2fs_bio_info *io = &sbi->read_io; - int bio_blocks; - - verify_block_addr(sbi, blk_addr); - - mutex_lock(&io->io_mutex); - - if (io->bio && io->last_block_in_bio != blk_addr - 1) { - submit_bio(rw, io->bio); - io->bio = NULL; - } -alloc_new: - if (io->bio == NULL) { - bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); - io->bio = f2fs_bio_alloc(bdev, bio_blocks); - io->bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); - io->bio->bi_end_io = read_end_io; - } - - if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < - PAGE_CACHE_SIZE) { - submit_bio(rw, io->bio); - io->bio = NULL; - goto alloc_new; - } - - io->last_block_in_bio = blk_addr; - - mutex_unlock(&io->io_mutex); - trace_f2fs_submit_read_page(page, rw, META, blk_addr); -} - /* * This function should be used by the data read flow only where it * does not check the "create" flag that indicates block allocation. @@ -638,7 +733,7 @@ write: goto redirty_out; if (wbc->for_reclaim) - f2fs_submit_bio(sbi, DATA, true); + f2fs_submit_merged_bio(sbi, DATA, true, WRITE); clear_cold_data(page); out: @@ -690,7 +785,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); if (locked) mutex_unlock(&sbi->writepages); - f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); + f2fs_submit_merged_bio(sbi, DATA, wbc->sync_mode == WB_SYNC_ALL, WRITE); remove_dirty_dir_inode(inode); @@ -741,7 +836,8 @@ repeat: if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { - err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); + err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, + READ_SYNC); if (err) return err; lock_page(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ca33cda78e02..10eca022e1e1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -364,6 +364,7 @@ enum page_type { META_FLUSH, }; +#define is_read_io(rw) (((rw) & 1) == READ) struct f2fs_bio_info { struct bio *bio; /* bios to merge */ sector_t last_block_in_bio; /* last block number */ @@ -1093,9 +1094,6 @@ void clear_prefree_segments(struct f2fs_sb_info *); int npages_for_summary_flush(struct f2fs_sb_info *); void allocate_new_segments(struct f2fs_sb_info *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); -struct bio *f2fs_bio_alloc(struct block_device *, int); -void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool); -void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); void write_meta_page(struct f2fs_sb_info *, struct page *); void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, block_t, block_t *); @@ -1106,6 +1104,7 @@ void recover_data_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, block_t, block_t); void rewrite_node_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, block_t, block_t); +void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); void write_data_summaries(struct f2fs_sb_info *, block_t); void write_node_summaries(struct f2fs_sb_info *, block_t); int lookup_journal_in_cursum(struct f2fs_summary_block *, @@ -1141,15 +1140,16 @@ void destroy_checkpoint_caches(void); /* * data.c */ +void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, bool, int); +int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int); +void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t, + enum page_type, int); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); void update_extent_cache(block_t, struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); -int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); -void f2fs_submit_read_bio(struct f2fs_sb_info *, int); -void submit_read_page(struct f2fs_sb_info *, struct page *, block_t, int); int do_write_data_page(struct page *); /* diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 5fa54c1ca33b..2886aef35d59 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -631,7 +631,7 @@ next_iput: goto next_step; if (gc_type == FG_GC) { - f2fs_submit_bio(sbi, DATA, true); + f2fs_submit_merged_bio(sbi, DATA, true, WRITE); /* * In the case of FG_GC, it'd be better to reclaim this victim diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d0ab00334b02..0e1a3df18e58 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -106,11 +106,11 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) f2fs_put_page(page, 1); continue; } - submit_read_page(sbi, page, index, READ_SYNC | REQ_META); + f2fs_submit_page_mbio(sbi, page, index, META, READ); mark_page_accessed(page); f2fs_put_page(page, 0); } - f2fs_submit_read_bio(sbi, READ_SYNC | REQ_META); + f2fs_submit_merged_bio(sbi, META, true, READ); } static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) @@ -891,7 +891,7 @@ fail: * LOCKED_PAGE: f2fs_put_page(page, 1) * error: nothing */ -static int read_node_page(struct page *page, int type) +static int read_node_page(struct page *page, int rw) { struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); struct node_info ni; @@ -906,7 +906,7 @@ static int read_node_page(struct page *page, int type) if (PageUptodate(page)) return LOCKED_PAGE; - return f2fs_readpage(sbi, page, ni.blk_addr, type); + return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw); } /* @@ -1136,8 +1136,8 @@ continue_unlock: } if (wrote) - f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL); - + f2fs_submit_merged_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL, + WRITE); return nwritten; } @@ -1592,7 +1592,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi, */ ClearPageUptodate(page); - if (f2fs_readpage(sbi, page, addr, READ_SYNC)) + if (f2fs_submit_page_bio(sbi, page, addr, READ_SYNC)) goto out; lock_page(page); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index fdc81161f254..c209b8652927 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -143,7 +143,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) while (1) { struct fsync_inode_entry *entry; - err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); + err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); if (err) goto out; @@ -386,7 +386,7 @@ static int recover_data(struct f2fs_sb_info *sbi, while (1) { struct fsync_inode_entry *entry; - err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); + err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); if (err) goto out; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0db40271f0d8..ca9adf5914cc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -787,146 +787,6 @@ static const struct segment_allocation default_salloc_ops = { .allocate_segment = allocate_segment_by_default, }; -static void f2fs_end_io_write(struct bio *bio, int err) -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct f2fs_sb_info *sbi = F2FS_SB(bvec->bv_page->mapping->host->i_sb); - - do { - struct page *page = bvec->bv_page; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - if (!uptodate) { - SetPageError(page); - if (page->mapping) - set_bit(AS_EIO, &page->mapping->flags); - - set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); - sbi->sb->s_flags |= MS_RDONLY; - } - end_page_writeback(page); - dec_page_count(sbi, F2FS_WRITEBACK); - } while (bvec >= bio->bi_io_vec); - - if (bio->bi_private) - complete(bio->bi_private); - - if (!get_pages(sbi, F2FS_WRITEBACK) && - !list_empty(&sbi->cp_wait.task_list)) - wake_up(&sbi->cp_wait); - - bio_put(bio); -} - -struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages) -{ - struct bio *bio; - - /* No failure on bio allocation */ - bio = bio_alloc(GFP_NOIO, npages); - bio->bi_bdev = bdev; - bio->bi_private = NULL; - - return bio; -} - -static void do_submit_bio(struct f2fs_sb_info *sbi, - enum page_type type, bool sync) -{ - int rw = sync ? WRITE_SYNC : WRITE; - enum page_type btype = PAGE_TYPE_OF_BIO(type); - struct f2fs_bio_info *io = &sbi->write_io[btype]; - - if (!io->bio) - return; - - if (type >= META_FLUSH) - rw = WRITE_FLUSH_FUA; - - if (btype == META) - rw |= REQ_META; - - trace_f2fs_submit_write_bio(sbi->sb, rw, btype, io->bio); - - /* - * META_FLUSH is only from the checkpoint procedure, and we should wait - * this metadata bio for FS consistency. - */ - if (type == META_FLUSH) { - DECLARE_COMPLETION_ONSTACK(wait); - io->bio->bi_private = &wait; - submit_bio(rw, io->bio); - wait_for_completion(&wait); - } else { - submit_bio(rw, io->bio); - } - io->bio = NULL; -} - -void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync) -{ - struct f2fs_bio_info *io = &sbi->write_io[PAGE_TYPE_OF_BIO(type)]; - - if (!io->bio) - return; - - mutex_lock(&io->io_mutex); - do_submit_bio(sbi, type, sync); - mutex_unlock(&io->io_mutex); -} - -static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, - block_t blk_addr, enum page_type type) -{ - struct block_device *bdev = sbi->sb->s_bdev; - struct f2fs_bio_info *io = &sbi->write_io[type]; - int bio_blocks; - - verify_block_addr(sbi, blk_addr); - - mutex_lock(&io->io_mutex); - - inc_page_count(sbi, F2FS_WRITEBACK); - - if (io->bio && io->last_block_in_bio != blk_addr - 1) - do_submit_bio(sbi, type, false); -alloc_new: - if (io->bio == NULL) { - bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); - io->bio = f2fs_bio_alloc(bdev, bio_blocks); - io->bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); - io->bio->bi_end_io = f2fs_end_io_write; - /* - * The end_io will be assigned at the sumbission phase. - * Until then, let bio_add_page() merge consecutive IOs as much - * as possible. - */ - } - - if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < - PAGE_CACHE_SIZE) { - do_submit_bio(sbi, type, false); - goto alloc_new; - } - - io->last_block_in_bio = blk_addr; - - mutex_unlock(&io->io_mutex); - trace_f2fs_submit_write_page(page, WRITE, type, blk_addr); -} - -void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool sync) -{ - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); - if (PageWriteback(page)) { - f2fs_submit_bio(sbi, type, sync); - wait_on_page_writeback(page); - } -} - static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -1040,7 +900,7 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); /* writeout dirty page into bdev */ - submit_write_page(sbi, page, *new_blkaddr, p_type); + f2fs_submit_page_mbio(sbi, page, *new_blkaddr, p_type, WRITE); mutex_unlock(&curseg->curseg_mutex); } @@ -1048,7 +908,7 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) { set_page_writeback(page); - submit_write_page(sbi, page, page->index, META); + f2fs_submit_page_mbio(sbi, page, page->index, META, WRITE); } void write_node_page(struct f2fs_sb_info *sbi, struct page *page, @@ -1078,7 +938,7 @@ void write_data_page(struct inode *inode, struct page *page, void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page, block_t old_blk_addr) { - submit_write_page(sbi, page, old_blk_addr, DATA); + f2fs_submit_page_mbio(sbi, page, old_blk_addr, DATA, WRITE); } void recover_data_page(struct f2fs_sb_info *sbi, @@ -1165,8 +1025,8 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, /* rewrite node page */ set_page_writeback(page); - submit_write_page(sbi, page, new_blkaddr, NODE); - f2fs_submit_bio(sbi, NODE, true); + f2fs_submit_page_mbio(sbi, page, new_blkaddr, NODE, WRITE); + f2fs_submit_merged_bio(sbi, NODE, true, WRITE); refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); locate_dirty_segment(sbi, old_cursegno); @@ -1176,6 +1036,16 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, mutex_unlock(&curseg->curseg_mutex); } +void f2fs_wait_on_page_writeback(struct page *page, + enum page_type type, bool sync) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + if (PageWriteback(page)) { + f2fs_submit_merged_bio(sbi, type, sync, WRITE); + wait_on_page_writeback(page); + } +} + static int read_compacted_summaries(struct f2fs_sb_info *sbi) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -1723,13 +1593,13 @@ repeat: continue; } - submit_read_page(sbi, page, blk_addr, READ_SYNC | REQ_META); + f2fs_submit_page_mbio(sbi, page, blk_addr, META, READ); mark_page_accessed(page); f2fs_put_page(page, 0); } - f2fs_submit_read_bio(sbi, READ_SYNC | REQ_META); + f2fs_submit_merged_bio(sbi, META, true, READ); return blkno - start; } diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 204fcc3201b1..3b9f28dfc849 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -434,7 +434,7 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, __entry->err) ); -TRACE_EVENT_CONDITION(f2fs_readpage, +TRACE_EVENT_CONDITION(f2fs_submit_page_bio, TP_PROTO(struct page *page, sector_t blkaddr, int type), @@ -641,18 +641,22 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio, __entry->size) ); -DEFINE_EVENT(f2fs__submit_bio, f2fs_submit_write_bio, +DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio, TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), - TP_ARGS(sb, rw, type, bio) + TP_ARGS(sb, rw, type, bio), + + TP_CONDITION(bio) ); -DEFINE_EVENT(f2fs__submit_bio, f2fs_submit_read_bio, +DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio, TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), - TP_ARGS(sb, rw, type, bio) + TP_ARGS(sb, rw, type, bio), + + TP_CONDITION(bio) ); DECLARE_EVENT_CLASS(f2fs__page, @@ -701,7 +705,7 @@ DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite, TP_ARGS(page, type) ); -DECLARE_EVENT_CLASS(f2fs_io_page, +TRACE_EVENT(f2fs_submit_page_mbio, TP_PROTO(struct page *page, int rw, int type, block_t blk_addr), @@ -733,20 +737,6 @@ DECLARE_EVENT_CLASS(f2fs_io_page, (unsigned long long)__entry->block) ); -DEFINE_EVENT(f2fs_io_page, f2fs_submit_write_page, - - TP_PROTO(struct page *page, int rw, int type, block_t blk_addr), - - TP_ARGS(page, rw, type, blk_addr) -); - -DEFINE_EVENT(f2fs_io_page, f2fs_submit_read_page, - - TP_PROTO(struct page *page, int rw, int type, block_t blk_addr), - - TP_ARGS(page, rw, type, blk_addr) -); - TRACE_EVENT(f2fs_write_checkpoint, TP_PROTO(struct super_block *sb, bool is_umount, char *msg), From 9af0ff1c527ebb267d9a3b6aa8af93c5843d4390 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 22 Nov 2013 15:48:54 +0800 Subject: [PATCH 42/95] f2fs: readahead contiguous pages for restore_node_summary If cp has no CP_UMOUNT_FLAG, we will read all pages in whole node segment one by one, it makes low performance. So let's merge contiguous pages and readahead for better performance. Signed-off-by: Chao Yu [Jaegeuk Kim: adjust the new bio operations] Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 89 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 26 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0e1a3df18e58..0855168af7d7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1565,47 +1565,84 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) return 0; } +/* + * ra_sum_pages() merge contiguous pages into one bio and submit. + * these pre-readed pages are linked in pages list. + */ +static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, + int start, int nrpages) +{ + struct page *page; + int page_idx = start; + + for (; page_idx < start + nrpages; page_idx++) { + /* alloc temporal page for read node summary info*/ + page = alloc_page(GFP_NOFS | __GFP_ZERO); + if (!page) { + struct page *tmp; + list_for_each_entry_safe(page, tmp, pages, lru) { + list_del(&page->lru); + unlock_page(page); + __free_pages(page, 0); + } + return -ENOMEM; + } + + lock_page(page); + page->index = page_idx; + list_add_tail(&page->lru, pages); + } + + list_for_each_entry(page, pages, lru) + f2fs_submit_page_mbio(sbi, page, page->index, META, READ); + + f2fs_submit_merged_bio(sbi, META, true, READ); + return 0; +} + int restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum) { struct f2fs_node *rn; struct f2fs_summary *sum_entry; - struct page *page; + struct page *page, *tmp; block_t addr; - int i, last_offset; - - /* alloc temporal page for read node */ - page = alloc_page(GFP_NOFS | __GFP_ZERO); - if (!page) - return -ENOMEM; - lock_page(page); + int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); + int i, last_offset, nrpages, err = 0; + LIST_HEAD(page_list); /* scan the node segment */ last_offset = sbi->blocks_per_seg; addr = START_BLOCK(sbi, segno); sum_entry = &sum->entries[0]; - for (i = 0; i < last_offset; i++, sum_entry++) { - /* - * In order to read next node page, - * we must clear PageUptodate flag. - */ - ClearPageUptodate(page); + for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { + nrpages = min(last_offset - i, bio_blocks); - if (f2fs_submit_page_bio(sbi, page, addr, READ_SYNC)) - goto out; + /* read ahead node pages */ + err = ra_sum_pages(sbi, &page_list, addr, nrpages); + if (err) + return err; - lock_page(page); - rn = F2FS_NODE(page); - sum_entry->nid = rn->footer.nid; - sum_entry->version = 0; - sum_entry->ofs_in_node = 0; - addr++; + list_for_each_entry_safe(page, tmp, &page_list, lru) { + + lock_page(page); + if(PageUptodate(page)) { + rn = F2FS_NODE(page); + sum_entry->nid = rn->footer.nid; + sum_entry->version = 0; + sum_entry->ofs_in_node = 0; + sum_entry++; + } else { + err = -EIO; + } + + list_del(&page->lru); + unlock_page(page); + __free_pages(page, 0); + } } - unlock_page(page); -out: - __free_pages(page, 0); - return 0; + return err; } static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) From c524723ebf5794083819c25320802452fe06da18 Mon Sep 17 00:00:00 2001 From: Younger Liu Date: Tue, 3 Dec 2013 20:11:46 +0800 Subject: [PATCH 43/95] f2fs: remove debufs dir if debugfs_create_file() failed When debugfs_create_file() failed in f2fs_create_root_stats(), debugfs_root should be remove. Signed-off-by: Younger Liu Cc: Younger Liu Cc: Jaegeuk Kim Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a84b0a8e6854..b51fb3c2a8e0 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -340,14 +340,32 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi) void __init f2fs_create_root_stats(void) { + struct dentry *file; + debugfs_root = debugfs_create_dir("f2fs", NULL); - if (debugfs_root) - debugfs_create_file("status", S_IRUGO, debugfs_root, - NULL, &stat_fops); + if (!debugfs_root) + goto bail; + + file = debugfs_create_file("status", S_IRUGO, debugfs_root, + NULL, &stat_fops); + if (!file) + goto free_debugfs_dir; + + return; + +free_debugfs_dir: + debugfs_remove(debugfs_root); + +bail: + debugfs_root = NULL; + return; } void f2fs_destroy_root_stats(void) { + if (!debugfs_root) + return; + debugfs_remove_recursive(debugfs_root); debugfs_root = NULL; } From 40e1ebe97d58b141332e895facf9e69b8b430ae1 Mon Sep 17 00:00:00 2001 From: Younger Liu Date: Tue, 3 Dec 2013 21:09:29 +0800 Subject: [PATCH 44/95] f2fs: replace the debugfs_root with f2fs_debugfs_root This minor change for the naming conventions of debugfs_root to avoid any possible conflicts to the other filesystem. Signed-off-by: Younger Liu Cc: Younger Liu Cc: Jaegeuk Kim [Jaegeuk Kim: change the patch name] Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index b51fb3c2a8e0..61adbcbe9b87 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -24,7 +24,7 @@ #include "gc.h" static LIST_HEAD(f2fs_stat_list); -static struct dentry *debugfs_root; +static struct dentry *f2fs_debugfs_root; static DEFINE_MUTEX(f2fs_stat_mutex); static void update_general_status(struct f2fs_sb_info *sbi) @@ -342,11 +342,11 @@ void __init f2fs_create_root_stats(void) { struct dentry *file; - debugfs_root = debugfs_create_dir("f2fs", NULL); - if (!debugfs_root) + f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); + if (!f2fs_debugfs_root) goto bail; - file = debugfs_create_file("status", S_IRUGO, debugfs_root, + file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL, &stat_fops); if (!file) goto free_debugfs_dir; @@ -354,18 +354,18 @@ void __init f2fs_create_root_stats(void) return; free_debugfs_dir: - debugfs_remove(debugfs_root); + debugfs_remove(f2fs_debugfs_root); bail: - debugfs_root = NULL; + f2fs_debugfs_root = NULL; return; } void f2fs_destroy_root_stats(void) { - if (!debugfs_root) + if (!f2fs_debugfs_root) return; - debugfs_remove_recursive(debugfs_root); - debugfs_root = NULL; + debugfs_remove_recursive(f2fs_debugfs_root); + f2fs_debugfs_root = NULL; } From a0acdfe05a954363861a65eb537573ab417cb7ed Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Dec 2013 09:54:00 +0800 Subject: [PATCH 45/95] f2fs: use inner macro GFP_F2FS_ZERO for simplification Use inner macro GFP_F2FS_ZERO to instead of GFP_NOFS | __GFP_ZERO for simplification of code. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- fs/f2fs/recovery.c | 2 +- fs/f2fs/super.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0855168af7d7..099f06f84e29 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1577,7 +1577,7 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, for (; page_idx < start + nrpages; page_idx++) { /* alloc temporal page for read node summary info*/ - page = alloc_page(GFP_NOFS | __GFP_ZERO); + page = alloc_page(GFP_F2FS_ZERO); if (!page) { struct page *tmp; list_for_each_entry_safe(page, tmp, pages, lru) { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index c209b8652927..7dda1f28f6cb 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -377,7 +377,7 @@ static int recover_data(struct f2fs_sb_info *sbi, blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); /* read node page */ - page = alloc_page(GFP_NOFS | __GFP_ZERO); + page = alloc_page(GFP_F2FS_ZERO); if (!page) return -ENOMEM; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index dd55074a304f..22b07c33662f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -332,7 +332,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) { struct f2fs_inode_info *fi; - fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO); + fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO); if (!fi) return NULL; From b9987a277f1ec9dba203d04c3a20d967c01a1fba Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Dec 2013 09:54:56 +0800 Subject: [PATCH 46/95] f2fs: avoid unneeded page release for correct _count of page In find_fsync_dnodes() and recover_data(), our flow is like this: ->f2fs_submit_page_bio() -> f2fs_put_page() -> page_cache_release() ---- page->_count declined to zero. ->__free_pages() -> put_page_testzero() ---- page->_count will be declined again. We will get a segment fault in put_page_testzero when CONFIG_DEBUG_VM is on, or return MM with a bad page with wrong _count num. So let's just release this page. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 7dda1f28f6cb..d07546575879 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -145,7 +145,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); if (err) - goto out; + return err; lock_page(page); @@ -191,9 +191,10 @@ next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); } + unlock_page(page); -out: __free_pages(page, 0); + return err; } @@ -388,7 +389,7 @@ static int recover_data(struct f2fs_sb_info *sbi, err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); if (err) - goto out; + return err; lock_page(page); @@ -412,8 +413,8 @@ next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); } + unlock_page(page); -out: __free_pages(page, 0); if (!err) From cfb271d485d0ec31eb92b51f4fbe54bf6542e8e6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Dec 2013 17:15:22 +0800 Subject: [PATCH 47/95] f2fs: add unlikely() macro for compiler optimization As we know, some of our branch condition will rarely be true. So we could add 'unlikely' to let compiler optimize these code, by this way we could drop unneeded 'jump' assemble code to improve performance. change log: o add *unlikely* as many as possible across the whole source files at once suggested by Jaegeuk Kim. Suggested-by: Jaegeuk Kim Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 26 ++++++++++++++++---------- fs/f2fs/data.c | 4 ++-- fs/f2fs/dir.c | 4 ++-- fs/f2fs/f2fs.h | 8 ++++---- fs/f2fs/node.c | 16 ++++++++-------- fs/f2fs/segment.h | 2 +- 6 files changed, 33 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 38f4a2245085..6b2106685b7a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -82,13 +82,12 @@ static int f2fs_write_meta_page(struct page *page, struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); /* Should not write any meta pages, if any IO error was occurred */ - if (wbc->for_reclaim || sbi->por_doing || - is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) { - dec_page_count(sbi, F2FS_DIRTY_META); - wbc->pages_skipped++; - set_page_dirty(page); - return AOP_WRITEPAGE_ACTIVATE; - } + if (unlikely(sbi->por_doing || + is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) + goto redirty_out; + + if (wbc->for_reclaim) + goto redirty_out; wait_on_page_writeback(page); @@ -96,6 +95,12 @@ static int f2fs_write_meta_page(struct page *page, dec_page_count(sbi, F2FS_DIRTY_META); unlock_page(page); return 0; + +redirty_out: + dec_page_count(sbi, F2FS_DIRTY_META); + wbc->pages_skipped++; + set_page_dirty(page); + return AOP_WRITEPAGE_ACTIVATE; } static int f2fs_write_meta_pages(struct address_space *mapping, @@ -137,7 +142,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); - if (nr_pages == 0) + if (unlikely(nr_pages == 0)) break; for (i = 0; i < nr_pages; i++) { @@ -150,7 +155,8 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, unlock_page(page); break; } - if (nwritten++ >= nr_to_write) + nwritten++; + if (unlikely(nwritten >= nr_to_write)) break; } pagevec_release(&pvec); @@ -200,7 +206,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; mutex_lock(&sbi->orphan_inode_mutex); - if (sbi->n_orphans >= max_orphans) + if (unlikely(sbi->n_orphans >= max_orphans)) err = -ENOSPC; else sbi->n_orphans++; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4e2fc09f0e4f..2ce5a9ef508b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -251,7 +251,7 @@ int reserve_new_block(struct dnode_of_data *dn) if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) return -EPERM; - if (!inc_valid_block_count(sbi, dn->inode, 1)) + if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) return -ENOSPC; trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); @@ -711,7 +711,7 @@ static int f2fs_write_data_page(struct page *page, zero_user_segment(page, offset, PAGE_CACHE_SIZE); write: - if (sbi->por_doing) { + if (unlikely(sbi->por_doing)) { err = AOP_WRITEPAGE_ACTIVATE; goto redirty_out; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 594fc1bb64ef..0cc26ba07c3b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -190,7 +190,7 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, unsigned int max_depth; unsigned int level; - if (namelen > F2FS_NAME_LEN) + if (unlikely(namelen > F2FS_NAME_LEN)) return NULL; if (npages == 0) @@ -461,7 +461,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in } start: - if (current_depth == MAX_DIR_HASH_DEPTH) + if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) return -ENOSPC; /* Increase the depth, if required */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 10eca022e1e1..dca18b3bcc62 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -574,7 +574,7 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) { WARN_ON((nid >= NM_I(sbi)->max_nid)); - if (nid >= NM_I(sbi)->max_nid) + if (unlikely(nid >= NM_I(sbi)->max_nid)) return -EINVAL; return 0; } @@ -600,7 +600,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); valid_block_count = sbi->total_valid_block_count + (block_t)count; - if (valid_block_count > sbi->user_block_count) { + if (unlikely(valid_block_count > sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); return false; } @@ -719,13 +719,13 @@ static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); valid_block_count = sbi->total_valid_block_count + 1; - if (valid_block_count > sbi->user_block_count) { + if (unlikely(valid_block_count > sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); return false; } valid_node_count = sbi->total_valid_node_count + 1; - if (valid_node_count > sbi->total_node_count) { + if (unlikely(valid_node_count > sbi->total_node_count)) { spin_unlock(&sbi->stat_lock); return false; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 099f06f84e29..2e41636be476 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -94,7 +94,7 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) int i; for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { - if (nid >= nm_i->max_nid) + if (unlikely(nid >= nm_i->max_nid)) nid = 0; index = current_nat_addr(sbi, nid); @@ -1160,7 +1160,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) struct page *page = pvec.pages[i]; /* until radix tree lookup accepts end_index */ - if (page->index > end) + if (unlikely(page->index > end)) continue; if (ino && ino_of_node(page) == ino) { @@ -1190,7 +1190,7 @@ static int f2fs_write_node_page(struct page *page, block_t new_addr; struct node_info ni; - if (sbi->por_doing) + if (unlikely(sbi->por_doing)) goto redirty_out; wait_on_page_writeback(page); @@ -1326,7 +1326,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) return -1; /* 0 nid should not be used */ - if (nid == 0) + if (unlikely(nid == 0)) return 0; if (build) { @@ -1379,7 +1379,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i, for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { - if (start_nid >= nm_i->max_nid) + if (unlikely(start_nid >= nm_i->max_nid)) break; blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); @@ -1413,7 +1413,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) f2fs_put_page(page, 1); nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); - if (nid >= nm_i->max_nid) + if (unlikely(nid >= nm_i->max_nid)) nid = 0; if (i++ == FREE_NID_PAGES) @@ -1447,7 +1447,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct free_nid *i = NULL; struct list_head *this; retry: - if (sbi->total_valid_node_count + 1 >= nm_i->max_nid) + if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid)) return false; spin_lock(&nm_i->free_nid_list_lock); @@ -1557,7 +1557,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) new_ni = old_ni; new_ni.ino = ino; - if (!inc_valid_node_count(sbi, NULL)) + if (unlikely(!inc_valid_node_count(sbi, NULL))) WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR); inc_valid_inode_count(sbi); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 26812fc0fa12..ea563760f4b7 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -457,7 +457,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); - if (sbi->por_doing) + if (unlikely(sbi->por_doing)) return false; return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + From 6bacf52fb58aeb3e89d9a62970b85a5570aa8ace Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 6 Dec 2013 15:00:58 +0900 Subject: [PATCH 48/95] f2fs: add unlikely() macro for compiler more aggressively This patch adds unlikely() macro into the most of codes. The basic rule is to add that when: - checking unusual errors, - checking page mappings, - and the other unlikely conditions. Change log from v1: - Don't add unlikely for the NULL test and error test: advised by Andi Kleen. Cc: Chao Yu Cc: Andi Kleen Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 10 +++++----- fs/f2fs/data.c | 29 ++++++++++++++--------------- fs/f2fs/file.c | 9 ++++----- fs/f2fs/gc.c | 3 +-- fs/f2fs/node.c | 28 ++++++++++++++-------------- fs/f2fs/recovery.c | 2 +- fs/f2fs/super.c | 14 +++++++------- fs/f2fs/xattr.c | 2 +- 8 files changed, 47 insertions(+), 50 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6b2106685b7a..cf505eb843e8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -66,7 +66,7 @@ repeat: goto repeat; lock_page(page); - if (page->mapping != mapping) { + if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } @@ -473,7 +473,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) list_for_each(this, head) { struct dir_inode_entry *entry; entry = list_entry(this, struct dir_inode_entry, list); - if (entry->inode == inode) + if (unlikely(entry->inode == inode)) return -EEXIST; } list_add_tail(&new->list, head); @@ -783,7 +783,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* Here, we only have one bio having CP pack */ sync_meta_pages(sbi, META_FLUSH, LONG_MAX); - if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { + if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { clear_prefree_segments(sbi); F2FS_RESET_SB_DIRT(sbi); } @@ -840,11 +840,11 @@ int __init create_checkpoint_caches(void) { orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", sizeof(struct orphan_inode_entry), NULL); - if (unlikely(!orphan_entry_slab)) + if (!orphan_entry_slab) return -ENOMEM; inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", sizeof(struct dir_inode_entry), NULL); - if (unlikely(!inode_entry_slab)) { + if (!inode_entry_slab) { kmem_cache_destroy(orphan_entry_slab); return -ENOMEM; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2ce5a9ef508b..5607393198df 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -49,11 +49,11 @@ static void f2fs_read_end_io(struct bio *bio, int err) if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); - if (uptodate) { - SetPageUptodate(page); - } else { + if (unlikely(!uptodate)) { ClearPageUptodate(page); SetPageError(page); + } else { + SetPageUptodate(page); } unlock_page(page); } while (bvec >= bio->bi_io_vec); @@ -73,7 +73,7 @@ static void f2fs_write_end_io(struct bio *bio, int err) if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); - if (!uptodate) { + if (unlikely(!uptodate)) { SetPageError(page); set_bit(AS_EIO, &page->mapping->flags); set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); @@ -249,7 +249,7 @@ int reserve_new_block(struct dnode_of_data *dn) { struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); - if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) + if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return -EPERM; if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) return -ENOSPC; @@ -424,7 +424,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) return ERR_PTR(-ENOENT); /* By fallocate(), there is no cached page, but with NEW_ADDR */ - if (dn.data_blkaddr == NEW_ADDR) + if (unlikely(dn.data_blkaddr == NEW_ADDR)) return ERR_PTR(-EINVAL); page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); @@ -443,7 +443,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) if (sync) { wait_on_page_locked(page); - if (!PageUptodate(page)) { + if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 0); return ERR_PTR(-EIO); } @@ -477,7 +477,7 @@ repeat: } f2fs_put_dnode(&dn); - if (dn.data_blkaddr == NULL_ADDR) { + if (unlikely(dn.data_blkaddr == NULL_ADDR)) { f2fs_put_page(page, 1); return ERR_PTR(-ENOENT); } @@ -502,11 +502,11 @@ repeat: return ERR_PTR(err); lock_page(page); - if (!PageUptodate(page)) { + if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); } - if (page->mapping != mapping) { + if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } @@ -534,7 +534,6 @@ struct page *get_new_data_page(struct inode *inode, err = f2fs_reserve_block(&dn, index); if (err) return ERR_PTR(err); - repeat: page = grab_cache_page(mapping, index); if (!page) @@ -552,11 +551,11 @@ repeat: if (err) return ERR_PTR(err); lock_page(page); - if (!PageUptodate(page)) { + if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); } - if (page->mapping != mapping) { + if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } @@ -841,11 +840,11 @@ repeat: if (err) return err; lock_page(page); - if (!PageUptodate(page)) { + if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); return -EIO; } - if (page->mapping != mapping) { + if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2b47adcd852a..5accc964368d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -50,9 +50,9 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, file_update_time(vma->vm_file); lock_page(page); - if (page->mapping != inode->i_mapping || + if (unlikely(page->mapping != inode->i_mapping || page_offset(page) > i_size_read(inode) || - !PageUptodate(page)) { + !PageUptodate(page))) { unlock_page(page); err = -EFAULT; goto out; @@ -120,7 +120,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) .for_reclaim = 0, }; - if (f2fs_readonly(inode->i_sb)) + if (unlikely(f2fs_readonly(inode->i_sb))) return 0; trace_f2fs_sync_file_enter(inode); @@ -241,7 +241,7 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) return; lock_page(page); - if (page->mapping != inode->i_mapping) { + if (unlikely(page->mapping != inode->i_mapping)) { f2fs_put_page(page, 1); return; } @@ -516,7 +516,6 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) break; - if (pg_start == pg_end) new_size = offset + len; else if (index == pg_start && off_start) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2886aef35d59..29ceb9d71c8c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -119,7 +119,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi) kfree(gc_th); sbi->gc_thread = NULL; } - out: return err; } @@ -695,7 +694,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&ilist); gc_more: - if (!(sbi->sb->s_flags & MS_ACTIVE)) + if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) goto stop; if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 2e41636be476..6c6ef772cf01 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -750,7 +750,7 @@ skip_partial: if (offset[1] == 0 && rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { lock_page(page); - if (page->mapping != node_mapping) { + if (unlikely(page->mapping != node_mapping)) { f2fs_put_page(page, 1); goto restart; } @@ -841,14 +841,14 @@ struct page *new_node_page(struct dnode_of_data *dn, struct page *page; int err; - if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) + if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return ERR_PTR(-EPERM); page = grab_cache_page(mapping, dn->nid); if (!page) return ERR_PTR(-ENOMEM); - if (!inc_valid_node_count(sbi, dn->inode)) { + if (unlikely(!inc_valid_node_count(sbi, dn->inode))) { err = -ENOSPC; goto fail; } @@ -898,7 +898,7 @@ static int read_node_page(struct page *page, int rw) get_node_info(sbi, page->index, &ni); - if (ni.blk_addr == NULL_ADDR) { + if (unlikely(ni.blk_addr == NULL_ADDR)) { f2fs_put_page(page, 1); return -ENOENT; } @@ -953,11 +953,11 @@ repeat: goto got_it; lock_page(page); - if (!PageUptodate(page)) { + if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); } - if (page->mapping != mapping) { + if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } @@ -1010,12 +1010,12 @@ repeat: blk_finish_plug(&plug); lock_page(page); - if (page->mapping != mapping) { + if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } page_hit: - if (!PageUptodate(page)) { + if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); return ERR_PTR(-EIO); } @@ -1173,9 +1173,9 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) cond_resched(); } - if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) + if (unlikely(test_and_clear_bit(AS_ENOSPC, &mapping->flags))) ret2 = -ENOSPC; - if (test_and_clear_bit(AS_EIO, &mapping->flags)) + if (unlikely(test_and_clear_bit(AS_EIO, &mapping->flags))) ret2 = -EIO; if (!ret) ret = ret2; @@ -1202,7 +1202,7 @@ static int f2fs_write_node_page(struct page *page, get_node_info(sbi, nid, &ni); /* This page is already truncated */ - if (ni.blk_addr == NULL_ADDR) { + if (unlikely(ni.blk_addr == NULL_ADDR)) { dec_page_count(sbi, F2FS_DIRTY_NODES); unlock_page(page); return 0; @@ -1627,14 +1627,14 @@ int restore_node_summary(struct f2fs_sb_info *sbi, list_for_each_entry_safe(page, tmp, &page_list, lru) { lock_page(page); - if(PageUptodate(page)) { + if (unlikely(!PageUptodate(page))) { + err = -EIO; + } else { rn = F2FS_NODE(page); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; sum_entry->ofs_in_node = 0; sum_entry++; - } else { - err = -EIO; } list_del(&page->lru); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index d07546575879..a3f4542160cf 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -430,7 +430,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", sizeof(struct fsync_inode_entry), NULL); - if (unlikely(!fsync_entry_slab)) + if (!fsync_entry_slab) return -ENOMEM; INIT_LIST_HEAD(&inode_list); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 22b07c33662f..68df44afd14b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -626,7 +626,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb, struct f2fs_sb_info *sbi = F2FS_SB(sb); struct inode *inode; - if (ino < F2FS_ROOT_INO(sbi)) + if (unlikely(ino < F2FS_ROOT_INO(sbi))) return ERR_PTR(-ESTALE); /* @@ -637,7 +637,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb, inode = f2fs_iget(sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); - if (generation && inode->i_generation != generation) { + if (unlikely(generation && inode->i_generation != generation)) { /* we didn't find the right inode.. */ iput(inode); return ERR_PTR(-ESTALE); @@ -740,10 +740,10 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi) fsmeta += le32_to_cpu(ckpt->rsvd_segment_count); fsmeta += le32_to_cpu(raw_super->segment_count_ssa); - if (fsmeta >= total) + if (unlikely(fsmeta >= total)) return 1; - if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { + if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); return 1; } @@ -808,7 +808,7 @@ retry: brelse(*raw_super_buf); f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " "in %dth superblock", block + 1); - if(block == 0) { + if (block == 0) { block++; goto retry; } else { @@ -834,7 +834,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; /* set a block size */ - if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) { + if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) { f2fs_msg(sb, KERN_ERR, "unable to set blocksize"); goto free_sbi; } @@ -1066,7 +1066,7 @@ static int __init init_inodecache(void) { f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", sizeof(struct f2fs_inode_info), NULL); - if (f2fs_inode_cachep == NULL) + if (!f2fs_inode_cachep) return -ENOMEM; return 0; } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index aa7a3f139fe5..b0fb8a27f3da 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -522,7 +522,7 @@ static int __f2fs_setxattr(struct inode *inode, int name_index, if (found) free = free + ENTRY_SIZE(here); - if (free < newsize) { + if (unlikely(free < newsize)) { error = -ENOSPC; goto exit; } From 63a0b7cb33d85aeb0df39b984c08e234db4925d1 Mon Sep 17 00:00:00 2001 From: Fan Li Date: Mon, 9 Dec 2013 16:09:00 +0800 Subject: [PATCH 49/95] f2fs: merge pages with the same sync_mode flag Previously f2fs submits most of write requests using WRITE_SYNC, but f2fs_write_data_pages submits last write requests by sync_mode flags callers pass. This causes a performance problem since continuous pages with different sync flags can't be merged in cfq IO scheduler(thanks yu chao for pointing it out), and synchronous requests often take more time. This patch makes the following modifies to DATA writebacks: 1. every page will be written back using the sync mode caller pass. 2. only pages with the same sync mode can be merged in one bio request. These changes are restricted to DATA pages.Other types of writebacks are modified To remain synchronous. In my test with tiotest, f2fs sequence write performance is improved by about 7%-10% , and this patch has no obvious impact on other performance tests. Signed-off-by: Fan Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 18 ++++++++++-------- fs/f2fs/f2fs.h | 8 +++++--- fs/f2fs/gc.c | 6 +++++- fs/f2fs/segment.c | 27 +++++++++++++++++---------- 4 files changed, 37 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5607393198df..fb5e5c2627e5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -194,8 +194,9 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, if (!is_read_io(rw)) inc_page_count(sbi, F2FS_WRITEBACK); - if (io->bio && io->last_block_in_bio != blk_addr - 1) - __submit_merged_bio(sbi, io, type, true, rw); + if (io->bio && (io->last_block_in_bio != blk_addr - 1 || + io->rw_flag != rw)) + __submit_merged_bio(sbi, io, type, false, io->rw_flag); alloc_new: if (io->bio == NULL) { bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); @@ -203,6 +204,7 @@ alloc_new: io->bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); io->bio->bi_end_io = is_read_io(rw) ? f2fs_read_end_io : f2fs_write_end_io; + io->rw_flag = rw; /* * The end_io will be assigned at the sumbission phase. * Until then, let bio_add_page() merge consecutive IOs as much @@ -212,7 +214,7 @@ alloc_new: if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { - __submit_merged_bio(sbi, io, type, true, rw); + __submit_merged_bio(sbi, io, type, false, rw); goto alloc_new; } @@ -641,7 +643,7 @@ static int f2fs_read_data_pages(struct file *file, return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); } -int do_write_data_page(struct page *page) +int do_write_data_page(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; block_t old_blk_addr, new_blk_addr; @@ -669,10 +671,10 @@ int do_write_data_page(struct page *page) !is_cold_data(page) && need_inplace_update(inode))) { rewrite_data_page(F2FS_SB(inode->i_sb), page, - old_blk_addr); + old_blk_addr, wbc); } else { write_data_page(inode, page, &dn, - old_blk_addr, &new_blk_addr); + old_blk_addr, &new_blk_addr, wbc); update_extent_cache(new_blk_addr, &dn); } out_writepage: @@ -719,10 +721,10 @@ write: if (S_ISDIR(inode->i_mode)) { dec_page_count(sbi, F2FS_DIRTY_DENTS); inode_dec_dirty_dents(inode); - err = do_write_data_page(page); + err = do_write_data_page(page, wbc); } else { f2fs_lock_op(sbi); - err = do_write_data_page(page); + err = do_write_data_page(page, wbc); f2fs_unlock_op(sbi); need_balance_fs = true; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dca18b3bcc62..b1df239ba421 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -368,6 +368,7 @@ enum page_type { struct f2fs_bio_info { struct bio *bio; /* bios to merge */ sector_t last_block_in_bio; /* last block number */ + int rw_flag; /* rw flag for all pages */ struct mutex io_mutex; /* mutex for bio */ }; @@ -1098,8 +1099,9 @@ void write_meta_page(struct f2fs_sb_info *, struct page *); void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, block_t, block_t *); void write_data_page(struct inode *, struct page *, struct dnode_of_data*, - block_t, block_t *); -void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t); + block_t, block_t *, struct writeback_control *); +void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t, + struct writeback_control *); void recover_data_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, block_t, block_t); void rewrite_node_page(struct f2fs_sb_info *, struct page *, @@ -1150,7 +1152,7 @@ void update_extent_cache(block_t, struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); -int do_write_data_page(struct page *); +int do_write_data_page(struct page *, struct writeback_control *); /* * gc.c diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 29ceb9d71c8c..c68fba5ffadb 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -520,6 +520,10 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, static void move_data_page(struct inode *inode, struct page *page, int gc_type) { + struct writeback_control wbc = { + .sync_mode = 1, + }; + if (gc_type == BG_GC) { if (PageWriteback(page)) goto out; @@ -536,7 +540,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) inode_dec_dirty_dents(inode); } set_cold_data(page); - do_write_data_page(page); + do_write_data_page(page, &wbc); clear_cold_data(page); } out: diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ca9adf5914cc..e5dc41114867 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -856,12 +856,13 @@ static int __get_segment_type(struct page *page, enum page_type p_type) static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, - struct f2fs_summary *sum, enum page_type p_type) + struct f2fs_summary *sum, enum page_type p_type, + struct writeback_control *wbc) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg; unsigned int old_cursegno; - int type; + int type, rw = WRITE; type = __get_segment_type(page, p_type); curseg = CURSEG_I(sbi, type); @@ -900,7 +901,9 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); /* writeout dirty page into bdev */ - f2fs_submit_page_mbio(sbi, page, *new_blkaddr, p_type, WRITE); + if (wbc->sync_mode == WB_SYNC_ALL) + rw |= WRITE_SYNC; + f2fs_submit_page_mbio(sbi, page, *new_blkaddr, p_type, rw); mutex_unlock(&curseg->curseg_mutex); } @@ -915,13 +918,16 @@ void write_node_page(struct f2fs_sb_info *sbi, struct page *page, unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) { struct f2fs_summary sum; + struct writeback_control wbc = { + .sync_mode = 1, + }; set_summary(&sum, nid, 0, 0); - do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE); + do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE, &wbc); } void write_data_page(struct inode *inode, struct page *page, struct dnode_of_data *dn, block_t old_blkaddr, - block_t *new_blkaddr) + block_t *new_blkaddr, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_summary sum; @@ -932,13 +938,14 @@ void write_data_page(struct inode *inode, struct page *page, set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); do_write_page(sbi, page, old_blkaddr, - new_blkaddr, &sum, DATA); + new_blkaddr, &sum, DATA, wbc); } void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page, - block_t old_blk_addr) + block_t old_blk_addr, struct writeback_control *wbc) { - f2fs_submit_page_mbio(sbi, page, old_blk_addr, DATA, WRITE); + int rw = wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE; + f2fs_submit_page_mbio(sbi, page, old_blk_addr, DATA, rw); } void recover_data_page(struct f2fs_sb_info *sbi, @@ -1025,7 +1032,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, /* rewrite node page */ set_page_writeback(page); - f2fs_submit_page_mbio(sbi, page, new_blkaddr, NODE, WRITE); + f2fs_submit_page_mbio(sbi, page, new_blkaddr, NODE, WRITE_SYNC); f2fs_submit_merged_bio(sbi, NODE, true, WRITE); refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); @@ -1593,7 +1600,7 @@ repeat: continue; } - f2fs_submit_page_mbio(sbi, page, blk_addr, META, READ); + f2fs_submit_page_mbio(sbi, page, blk_addr, META, READ_SYNC); mark_page_accessed(page); f2fs_put_page(page, 0); From 458e6197c37de53f7be0a837644daabb900c3036 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 11 Dec 2013 13:54:01 +0900 Subject: [PATCH 50/95] f2fs: refactor bio->rw handling This patch introduces f2fs_io_info to mitigate the complex parameter list. struct f2fs_io_info { enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ int rw; /* contains R/RS/W/WS */ int rw_flag; /* contains REQ_META/REQ_PRIO */ } 1. f2fs_write_data_pages - DATA - WRITE_SYNC is set when wbc->WB_SYNC_ALL. 2. sync_node_pages - NODE - WRITE_SYNC all the time 3. sync_meta_pages - META - WRITE_SYNC all the time - REQ_META | REQ_PRIO all the time ** f2fs_submit_merged_bio() handles META_FLUSH. 4. ra_nat_pages, ra_sit_pages, ra_sum_pages - META - READ_SYNC Cc: Fan Li Cc: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 11 +++--- fs/f2fs/data.c | 85 ++++++++++++++++++++++---------------------- fs/f2fs/f2fs.h | 22 +++++++----- fs/f2fs/gc.c | 10 +++--- fs/f2fs/node.c | 22 ++++++++---- fs/f2fs/segment.c | 70 +++++++++++++++++++++--------------- fs/f2fs/super.c | 7 +++- 7 files changed, 132 insertions(+), 95 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index cf505eb843e8..f8c074961e0a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -164,8 +164,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, } if (nwritten) - f2fs_submit_merged_bio(sbi, type, nr_to_write == LONG_MAX, - WRITE); + f2fs_submit_merged_bio(sbi, type, WRITE); return nwritten; } @@ -598,7 +597,7 @@ retry: * We should submit bio, since it exists several * wribacking dentry pages in the freeing inode. */ - f2fs_submit_merged_bio(sbi, DATA, true, WRITE); + f2fs_submit_merged_bio(sbi, DATA, WRITE); } goto retry; } @@ -804,9 +803,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); - f2fs_submit_merged_bio(sbi, DATA, true, WRITE); - f2fs_submit_merged_bio(sbi, NODE, true, WRITE); - f2fs_submit_merged_bio(sbi, META, true, WRITE); + f2fs_submit_merged_bio(sbi, DATA, WRITE); + f2fs_submit_merged_bio(sbi, NODE, WRITE); + f2fs_submit_merged_bio(sbi, META, WRITE); /* * update checkpoint pack index diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index fb5e5c2627e5..ebc91778e815 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -93,37 +93,28 @@ static void f2fs_write_end_io(struct bio *bio, int err) bio_put(bio); } -static void __submit_merged_bio(struct f2fs_sb_info *sbi, - struct f2fs_bio_info *io, - enum page_type type, bool sync, int rw) +static void __submit_merged_bio(struct f2fs_bio_info *io) { - enum page_type btype = PAGE_TYPE_OF_BIO(type); + struct f2fs_io_info *fio = &io->fio; + int rw; if (!io->bio) return; - if (btype == META) - rw |= REQ_META; + rw = fio->rw | fio->rw_flag; if (is_read_io(rw)) { - if (sync) - rw |= READ_SYNC; submit_bio(rw, io->bio); - trace_f2fs_submit_read_bio(sbi->sb, rw, type, io->bio); + trace_f2fs_submit_read_bio(io->sbi->sb, rw, fio->type, io->bio); io->bio = NULL; return; } - if (sync) - rw |= WRITE_SYNC; - if (type >= META_FLUSH) - rw |= WRITE_FLUSH_FUA; - /* * META_FLUSH is only from the checkpoint procedure, and we should wait * this metadata bio for FS consistency. */ - if (type == META_FLUSH) { + if (fio->type == META_FLUSH) { DECLARE_COMPLETION_ONSTACK(wait); io->bio->bi_private = &wait; submit_bio(rw, io->bio); @@ -131,12 +122,12 @@ static void __submit_merged_bio(struct f2fs_sb_info *sbi, } else { submit_bio(rw, io->bio); } - trace_f2fs_submit_write_bio(sbi->sb, rw, btype, io->bio); + trace_f2fs_submit_write_bio(io->sbi->sb, rw, fio->type, io->bio); io->bio = NULL; } void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, - enum page_type type, bool sync, int rw) + enum page_type type, int rw) { enum page_type btype = PAGE_TYPE_OF_BIO(type); struct f2fs_bio_info *io; @@ -144,7 +135,13 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; mutex_lock(&io->io_mutex); - __submit_merged_bio(sbi, io, type, sync, rw); + + /* change META to META_FLUSH in the checkpoint procedure */ + if (type >= META_FLUSH) { + io->fio.type = META_FLUSH; + io->fio.rw = WRITE_FLUSH_FUA; + } + __submit_merged_bio(io); mutex_unlock(&io->io_mutex); } @@ -178,33 +175,33 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, } void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, - block_t blk_addr, enum page_type type, int rw) + block_t blk_addr, struct f2fs_io_info *fio) { - enum page_type btype = PAGE_TYPE_OF_BIO(type); + enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct block_device *bdev = sbi->sb->s_bdev; struct f2fs_bio_info *io; int bio_blocks; - io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; + io = is_read_io(fio->rw) ? &sbi->read_io : &sbi->write_io[btype]; verify_block_addr(sbi, blk_addr); mutex_lock(&io->io_mutex); - if (!is_read_io(rw)) + if (!is_read_io(fio->rw)) inc_page_count(sbi, F2FS_WRITEBACK); if (io->bio && (io->last_block_in_bio != blk_addr - 1 || - io->rw_flag != rw)) - __submit_merged_bio(sbi, io, type, false, io->rw_flag); + io->fio.rw != fio->rw)) + __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); io->bio = __bio_alloc(bdev, bio_blocks); io->bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); - io->bio->bi_end_io = is_read_io(rw) ? f2fs_read_end_io : + io->bio->bi_end_io = is_read_io(fio->rw) ? f2fs_read_end_io : f2fs_write_end_io; - io->rw_flag = rw; + io->fio = *fio; /* * The end_io will be assigned at the sumbission phase. * Until then, let bio_add_page() merge consecutive IOs as much @@ -214,14 +211,14 @@ alloc_new: if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { - __submit_merged_bio(sbi, io, type, false, rw); + __submit_merged_bio(io); goto alloc_new; } io->last_block_in_bio = blk_addr; mutex_unlock(&io->io_mutex); - trace_f2fs_submit_page_mbio(page, rw, type, blk_addr); + trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); } /* @@ -643,10 +640,10 @@ static int f2fs_read_data_pages(struct file *file, return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); } -int do_write_data_page(struct page *page, struct writeback_control *wbc) +int do_write_data_page(struct page *page, struct f2fs_io_info *fio) { struct inode *inode = page->mapping->host; - block_t old_blk_addr, new_blk_addr; + block_t old_blkaddr, new_blkaddr; struct dnode_of_data dn; int err = 0; @@ -655,10 +652,10 @@ int do_write_data_page(struct page *page, struct writeback_control *wbc) if (err) return err; - old_blk_addr = dn.data_blkaddr; + old_blkaddr = dn.data_blkaddr; /* This page is already truncated */ - if (old_blk_addr == NULL_ADDR) + if (old_blkaddr == NULL_ADDR) goto out_writepage; set_page_writeback(page); @@ -667,15 +664,13 @@ int do_write_data_page(struct page *page, struct writeback_control *wbc) * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (unlikely(old_blk_addr != NEW_ADDR && + if (unlikely(old_blkaddr != NEW_ADDR && !is_cold_data(page) && need_inplace_update(inode))) { - rewrite_data_page(F2FS_SB(inode->i_sb), page, - old_blk_addr, wbc); + rewrite_data_page(page, old_blkaddr, fio); } else { - write_data_page(inode, page, &dn, - old_blk_addr, &new_blk_addr, wbc); - update_extent_cache(new_blk_addr, &dn); + write_data_page(page, &dn, &new_blkaddr, fio); + update_extent_cache(new_blkaddr, &dn); } out_writepage: f2fs_put_dnode(&dn); @@ -693,6 +688,11 @@ static int f2fs_write_data_page(struct page *page, unsigned offset; bool need_balance_fs = false; int err = 0; + struct f2fs_io_info fio = { + .type = DATA, + .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC: WRITE, + .rw_flag = 0, + }; if (page->index < end_index) goto write; @@ -721,10 +721,10 @@ write: if (S_ISDIR(inode->i_mode)) { dec_page_count(sbi, F2FS_DIRTY_DENTS); inode_dec_dirty_dents(inode); - err = do_write_data_page(page, wbc); + err = do_write_data_page(page, &fio); } else { f2fs_lock_op(sbi); - err = do_write_data_page(page, wbc); + err = do_write_data_page(page, &fio); f2fs_unlock_op(sbi); need_balance_fs = true; } @@ -734,7 +734,7 @@ write: goto redirty_out; if (wbc->for_reclaim) - f2fs_submit_merged_bio(sbi, DATA, true, WRITE); + f2fs_submit_merged_bio(sbi, DATA, WRITE); clear_cold_data(page); out: @@ -786,7 +786,8 @@ static int f2fs_write_data_pages(struct address_space *mapping, ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); if (locked) mutex_unlock(&sbi->writepages); - f2fs_submit_merged_bio(sbi, DATA, wbc->sync_mode == WB_SYNC_ALL, WRITE); + + f2fs_submit_merged_bio(sbi, DATA, WRITE); remove_dirty_dir_inode(inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b1df239ba421..022ce324b166 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -364,11 +364,18 @@ enum page_type { META_FLUSH, }; +struct f2fs_io_info { + enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ + int rw; /* contains R/RS/W/WS */ + int rw_flag; /* contains REQ_META/REQ_PRIO */ +}; + #define is_read_io(rw) (((rw) & 1) == READ) struct f2fs_bio_info { + struct f2fs_sb_info *sbi; /* f2fs superblock */ struct bio *bio; /* bios to merge */ sector_t last_block_in_bio; /* last block number */ - int rw_flag; /* rw flag for all pages */ + struct f2fs_io_info fio; /* store buffered io info. */ struct mutex io_mutex; /* mutex for bio */ }; @@ -1098,10 +1105,9 @@ struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); void write_meta_page(struct f2fs_sb_info *, struct page *); void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, block_t, block_t *); -void write_data_page(struct inode *, struct page *, struct dnode_of_data*, - block_t, block_t *, struct writeback_control *); -void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t, - struct writeback_control *); +void write_data_page(struct page *, struct dnode_of_data *, block_t *, + struct f2fs_io_info *); +void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); void recover_data_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, block_t, block_t); void rewrite_node_page(struct f2fs_sb_info *, struct page *, @@ -1142,17 +1148,17 @@ void destroy_checkpoint_caches(void); /* * data.c */ -void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, bool, int); +void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int); int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int); void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t, - enum page_type, int); + struct f2fs_io_info *); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); void update_extent_cache(block_t, struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); -int do_write_data_page(struct page *, struct writeback_control *); +int do_write_data_page(struct page *, struct f2fs_io_info *); /* * gc.c diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c68fba5ffadb..69c18e399014 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -520,8 +520,10 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, static void move_data_page(struct inode *inode, struct page *page, int gc_type) { - struct writeback_control wbc = { - .sync_mode = 1, + struct f2fs_io_info fio = { + .type = DATA, + .rw = WRITE_SYNC, + .rw_flag = 0, }; if (gc_type == BG_GC) { @@ -540,7 +542,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) inode_dec_dirty_dents(inode); } set_cold_data(page); - do_write_data_page(page, &wbc); + do_write_data_page(page, &fio); clear_cold_data(page); } out: @@ -634,7 +636,7 @@ next_iput: goto next_step; if (gc_type == FG_GC) { - f2fs_submit_merged_bio(sbi, DATA, true, WRITE); + f2fs_submit_merged_bio(sbi, DATA, WRITE); /* * In the case of FG_GC, it'd be better to reclaim this victim diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6c6ef772cf01..3565caf97005 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -92,6 +92,12 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) struct page *page; pgoff_t index; int i; + struct f2fs_io_info fio = { + .type = META, + .rw = READ_SYNC, + .rw_flag = REQ_META | REQ_PRIO + }; + for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { if (unlikely(nid >= nm_i->max_nid)) @@ -106,11 +112,11 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) f2fs_put_page(page, 1); continue; } - f2fs_submit_page_mbio(sbi, page, index, META, READ); + f2fs_submit_page_mbio(sbi, page, index, &fio); mark_page_accessed(page); f2fs_put_page(page, 0); } - f2fs_submit_merged_bio(sbi, META, true, READ); + f2fs_submit_merged_bio(sbi, META, READ); } static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) @@ -1136,8 +1142,7 @@ continue_unlock: } if (wrote) - f2fs_submit_merged_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL, - WRITE); + f2fs_submit_merged_bio(sbi, NODE, WRITE); return nwritten; } @@ -1574,6 +1579,11 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, { struct page *page; int page_idx = start; + struct f2fs_io_info fio = { + .type = META, + .rw = READ_SYNC, + .rw_flag = REQ_META | REQ_PRIO + }; for (; page_idx < start + nrpages; page_idx++) { /* alloc temporal page for read node summary info*/ @@ -1594,9 +1604,9 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, } list_for_each_entry(page, pages, lru) - f2fs_submit_page_mbio(sbi, page, page->index, META, READ); + f2fs_submit_page_mbio(sbi, page, page->index, &fio); - f2fs_submit_merged_bio(sbi, META, true, READ); + f2fs_submit_merged_bio(sbi, META, READ); return 0; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e5dc41114867..0b2e8ceec983 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -856,15 +856,14 @@ static int __get_segment_type(struct page *page, enum page_type p_type) static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, - struct f2fs_summary *sum, enum page_type p_type, - struct writeback_control *wbc) + struct f2fs_summary *sum, struct f2fs_io_info *fio) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg; unsigned int old_cursegno; - int type, rw = WRITE; + int type; - type = __get_segment_type(page, p_type); + type = __get_segment_type(page, fio->type); curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); @@ -897,55 +896,60 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); mutex_unlock(&sit_i->sentry_lock); - if (p_type == NODE) + if (fio->type == NODE) fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); /* writeout dirty page into bdev */ - if (wbc->sync_mode == WB_SYNC_ALL) - rw |= WRITE_SYNC; - f2fs_submit_page_mbio(sbi, page, *new_blkaddr, p_type, rw); + f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio); mutex_unlock(&curseg->curseg_mutex); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) { + struct f2fs_io_info fio = { + .type = META, + .rw = WRITE_SYNC, + .rw_flag = REQ_META | REQ_PRIO + }; + set_page_writeback(page); - f2fs_submit_page_mbio(sbi, page, page->index, META, WRITE); + f2fs_submit_page_mbio(sbi, page, page->index, &fio); } void write_node_page(struct f2fs_sb_info *sbi, struct page *page, unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) { struct f2fs_summary sum; - struct writeback_control wbc = { - .sync_mode = 1, + struct f2fs_io_info fio = { + .type = NODE, + .rw = WRITE_SYNC, + .rw_flag = 0 }; + set_summary(&sum, nid, 0, 0); - do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE, &wbc); + do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, &fio); } -void write_data_page(struct inode *inode, struct page *page, - struct dnode_of_data *dn, block_t old_blkaddr, - block_t *new_blkaddr, struct writeback_control *wbc) +void write_data_page(struct page *page, struct dnode_of_data *dn, + block_t *new_blkaddr, struct f2fs_io_info *fio) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); struct f2fs_summary sum; struct node_info ni; - f2fs_bug_on(old_blkaddr == NULL_ADDR); + f2fs_bug_on(dn->data_blkaddr == NULL_ADDR); get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - do_write_page(sbi, page, old_blkaddr, - new_blkaddr, &sum, DATA, wbc); + do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio); } -void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page, - block_t old_blk_addr, struct writeback_control *wbc) +void rewrite_data_page(struct page *page, block_t old_blkaddr, struct f2fs_io_info *fio) { - int rw = wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE; - f2fs_submit_page_mbio(sbi, page, old_blk_addr, DATA, rw); + struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + f2fs_submit_page_mbio(sbi, page, old_blkaddr, fio); } void recover_data_page(struct f2fs_sb_info *sbi, @@ -1004,6 +1008,11 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, unsigned int segno, old_cursegno; block_t next_blkaddr = next_blkaddr_of_node(page); unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); + struct f2fs_io_info fio = { + .type = NODE, + .rw = WRITE_SYNC, + .rw_flag = 0 + }; curseg = CURSEG_I(sbi, type); @@ -1032,8 +1041,8 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, /* rewrite node page */ set_page_writeback(page); - f2fs_submit_page_mbio(sbi, page, new_blkaddr, NODE, WRITE_SYNC); - f2fs_submit_merged_bio(sbi, NODE, true, WRITE); + f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio); + f2fs_submit_merged_bio(sbi, NODE, WRITE); refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); locate_dirty_segment(sbi, old_cursegno); @@ -1048,7 +1057,7 @@ void f2fs_wait_on_page_writeback(struct page *page, { struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); if (PageWriteback(page)) { - f2fs_submit_merged_bio(sbi, type, sync, WRITE); + f2fs_submit_merged_bio(sbi, type, WRITE); wait_on_page_writeback(page); } } @@ -1580,6 +1589,11 @@ static int ra_sit_pages(struct f2fs_sb_info *sbi, int start, int nrpages) block_t blk_addr, prev_blk_addr = 0; int sit_blk_cnt = SIT_BLK_CNT(sbi); int blkno = start; + struct f2fs_io_info fio = { + .type = META, + .rw = READ_SYNC, + .rw_flag = REQ_META | REQ_PRIO + }; for (; blkno < start + nrpages && blkno < sit_blk_cnt; blkno++) { @@ -1600,13 +1614,13 @@ repeat: continue; } - f2fs_submit_page_mbio(sbi, page, blk_addr, META, READ_SYNC); + f2fs_submit_page_mbio(sbi, page, blk_addr, &fio); mark_page_accessed(page); f2fs_put_page(page, 0); } - f2fs_submit_merged_bio(sbi, META, true, READ); + f2fs_submit_merged_bio(sbi, META, READ); return blkno - start; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 68df44afd14b..1674f2f9970d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -885,8 +885,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) spin_lock_init(&sbi->stat_lock); mutex_init(&sbi->read_io.io_mutex); - for (i = 0; i < NR_PAGE_TYPE; i++) + sbi->read_io.sbi = sbi; + sbi->read_io.bio = NULL; + for (i = 0; i < NR_PAGE_TYPE; i++) { mutex_init(&sbi->write_io[i].io_mutex); + sbi->write_io[i].sbi = sbi; + sbi->write_io[i].bio = NULL; + } init_rwsem(&sbi->cp_rwsem); init_waitqueue_head(&sbi->cp_wait); From 76130ccabcc39f0716691113f739f28a3088e253 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 11 Dec 2013 14:29:39 +0900 Subject: [PATCH 51/95] f2fs: fix the location of tracepoint We need to get a trace before submit_bio, since its bi_sector is remapped during the submit_bio. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ebc91778e815..15956fa584de 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -104,11 +104,12 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) rw = fio->rw | fio->rw_flag; if (is_read_io(rw)) { - submit_bio(rw, io->bio); trace_f2fs_submit_read_bio(io->sbi->sb, rw, fio->type, io->bio); + submit_bio(rw, io->bio); io->bio = NULL; return; } + trace_f2fs_submit_write_bio(io->sbi->sb, rw, fio->type, io->bio); /* * META_FLUSH is only from the checkpoint procedure, and we should wait @@ -122,7 +123,6 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) } else { submit_bio(rw, io->bio); } - trace_f2fs_submit_write_bio(io->sbi->sb, rw, fio->type, io->bio); io->bio = NULL; } From 5dcd8a71505186a124640f9f7c5c81fb269cc476 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Wed, 11 Dec 2013 14:32:13 +0900 Subject: [PATCH 52/95] f2fs: missing kmem_cache_destroy for discard_entry insmod f2fs.ko is failed after insmod and rmmod firstly. $ sudo insmod fs/f2fs/f2fs.ko insmod: error inserting 'fs/f2fs/f2fs.ko': -1 Cannot allocate memory -- dmesg -- kmem_cache_sanity_check (free_nid): Cache name already exists. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1674f2f9970d..5c574fa1143c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1140,6 +1140,7 @@ static void __exit exit_f2fs_fs(void) unregister_filesystem(&f2fs_fs_type); destroy_checkpoint_caches(); destroy_gc_caches(); + destroy_segment_manager_caches(); destroy_node_manager_caches(); destroy_inodecache(); kset_unregister(f2fs_kset); From 216fbd64437452d23db54ae845916facd7215caa Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Nov 2013 13:13:42 +0900 Subject: [PATCH 53/95] f2fs: introduce sysfs entry to control in-place-update policy This patch introduces new sysfs entries for users to control the policy of in-place-updates, namely IPU, in f2fs. Sometimes f2fs suffers from performance degradation due to its out-of-place update policy that produces many additional node block writes. If the storage performance is very dependant on the amount of data writes instead of IO patterns, we'd better drop this out-of-place update policy. This patch suggests 5 polcies and their triggering conditions as follows. [sysfs entry name = ipu_policy] 0: F2FS_IPU_FORCE all the time, 1: F2FS_IPU_SSR if SSR mode is activated, 2: F2FS_IPU_UTIL if FS utilization is over threashold, 3: F2FS_IPU_SSR_UTIL if SSR mode is activated and FS utilization is over threashold, 4: F2FS_IPU_DISABLE disable IPU. (=default option) [sysfs entry name = min_ipu_util] This parameter controls the threshold to trigger in-place-updates. The number indicates percentage of the filesystem utilization, and used by F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies. For more details, see need_inplace_update() in segment.h. Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 11 ++++++++ fs/f2fs/f2fs.h | 3 ++ fs/f2fs/segment.c | 2 ++ fs/f2fs/segment.h | 44 ++++++++++++++++++++++++++---- fs/f2fs/super.c | 4 +++ 5 files changed, 58 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index a3fe811bbdbc..1a94ac7fee86 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -171,6 +171,17 @@ Files in /sys/fs/f2fs/ conduct checkpoint to reclaim the prefree segments to free segments. By default, 100 segments, 200MB. + ipu_policy This parameter controls the policy of in-place + updates in f2fs. There are five policies: + 0: F2FS_IPU_FORCE, 1: F2FS_IPU_SSR, + 2: F2FS_IPU_UTIL, 3: F2FS_IPU_SSR_UTIL, + 4: F2FS_IPU_DISABLE. + + min_ipu_util This parameter controls the threshold to trigger + in-place-updates. The number indicates percentage + of the filesystem utilization, and used by + F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies. + ================================================================================ USAGE ================================================================================ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 022ce324b166..1b05a628670a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -325,6 +325,9 @@ struct f2fs_sm_info { struct list_head discard_list; /* 4KB discard list */ int nr_discards; /* # of discards in the list */ int max_discards; /* max. discards to be issued */ + + unsigned int ipu_policy; /* in-place-update policy */ + unsigned int min_ipu_util; /* in-place-update threshold */ }; /* diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0b2e8ceec983..5b890ce74b15 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1799,6 +1799,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS; + sm_info->ipu_policy = F2FS_IPU_DISABLE; + sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; INIT_LIST_HEAD(&sm_info->discard_list); sm_info->nr_discards = 0; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index ea563760f4b7..e9a10bdd7fed 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -476,19 +476,51 @@ static inline int utilization(struct f2fs_sb_info *sbi) /* * Sometimes f2fs may be better to drop out-of-place update policy. - * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write - * data in the original place likewise other traditional file systems. - * But, currently set 100 in percentage, which means it is disabled. - * See below need_inplace_update(). + * And, users can control the policy through sysfs entries. + * There are five policies with triggering conditions as follows. + * F2FS_IPU_FORCE - all the time, + * F2FS_IPU_SSR - if SSR mode is activated, + * F2FS_IPU_UTIL - if FS utilization is over threashold, + * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over + * threashold, + * F2FS_IPUT_DISABLE - disable IPU. (=default option) */ -#define MIN_IPU_UTIL 100 +#define DEF_MIN_IPU_UTIL 70 + +enum { + F2FS_IPU_FORCE, + F2FS_IPU_SSR, + F2FS_IPU_UTIL, + F2FS_IPU_SSR_UTIL, + F2FS_IPU_DISABLE, +}; + static inline bool need_inplace_update(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + + /* IPU can be done only for the user data */ if (S_ISDIR(inode->i_mode)) return false; - if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL) + + switch (SM_I(sbi)->ipu_policy) { + case F2FS_IPU_FORCE: return true; + case F2FS_IPU_SSR: + if (need_SSR(sbi)) + return true; + break; + case F2FS_IPU_UTIL: + if (utilization(sbi) > SM_I(sbi)->min_ipu_util) + return true; + break; + case F2FS_IPU_SSR_UTIL: + if (need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util) + return true; + break; + case F2FS_IPU_DISABLE: + break; + } return false; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5c574fa1143c..f16da92a7899 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -178,6 +178,8 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -187,6 +189,8 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_idle), ATTR_LIST(reclaim_segments), ATTR_LIST(max_small_discards), + ATTR_LIST(ipu_policy), + ATTR_LIST(min_ipu_util), NULL, }; From bfad7c2d40332be6a1d7a89660bceb0f6ea1d73a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 16 Dec 2013 19:04:05 +0900 Subject: [PATCH 54/95] f2fs: introduce a new direct_IO write path Previously, f2fs doesn't support direct IOs with high performance, which throws every write requests via the buffered write path, resulting in highly performance degradation due to memory opeations like copy_from_user. This patch introduces a new direct IO path in which every write requests are processed by generic blockdev_direct_IO() with enhanced get_block function. The get_data_block() in f2fs handles: 1. if original data blocks are allocates, then give them to blockdev. 2. otherwise, a. preallocate requested block addresses b. do not use extent cache for better performance c. give the block addresses to blockdev This policy induces that: - new allocated data are sequentially written to the disk - updated data are randomly written to the disk. - f2fs gives consistency on its file meta, not file data. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 156 +++++++++++++++++++++++++++++++++------------- fs/f2fs/f2fs.h | 2 + fs/f2fs/segment.c | 23 ++++--- 3 files changed, 131 insertions(+), 50 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 15956fa584de..a0950bcbf568 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -570,74 +570,151 @@ repeat: return page; } +static int __allocate_data_block(struct dnode_of_data *dn) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); + struct f2fs_summary sum; + block_t new_blkaddr; + struct node_info ni; + int type; + + if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) + return -EPERM; + if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) + return -ENOSPC; + + __set_data_blkaddr(dn, NEW_ADDR); + dn->data_blkaddr = NEW_ADDR; + + get_node_info(sbi, dn->nid, &ni); + set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); + + type = CURSEG_WARM_DATA; + + allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type); + + /* direct IO doesn't use extent cache to maximize the performance */ + set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); + update_extent_cache(new_blkaddr, dn); + clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); + + dn->data_blkaddr = new_blkaddr; + return 0; +} + /* * This function should be used by the data read flow only where it * does not check the "create" flag that indicates block allocation. * The reason for this special functionality is to exploit VFS readahead * mechanism. */ -static int get_data_block_ro(struct inode *inode, sector_t iblock, +static int get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); unsigned int blkbits = inode->i_sb->s_blocksize_bits; unsigned maxblocks = bh_result->b_size >> blkbits; struct dnode_of_data dn; - pgoff_t pgofs; - int err; + int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; + pgoff_t pgofs, end_offset; + int err = 0, ofs = 1; + bool allocated = false; /* Get the page offset from the block offset(iblock) */ pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); - if (check_extent_cache(inode, pgofs, bh_result)) { - trace_f2fs_get_data_block(inode, iblock, bh_result, 0); - return 0; - } + if (check_extent_cache(inode, pgofs, bh_result)) + goto out; + + if (create) + f2fs_lock_op(sbi); /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); - if (err) { - trace_f2fs_get_data_block(inode, iblock, bh_result, err); - return (err == -ENOENT) ? 0 : err; + err = get_dnode_of_data(&dn, pgofs, mode); + if (err || dn.data_blkaddr == NEW_ADDR) { + if (err == -ENOENT) + err = 0; + goto unlock_out; } - /* It does not support data allocation */ - f2fs_bug_on(create); - - if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { - int i; - unsigned int end_offset; - - end_offset = IS_INODE(dn.node_page) ? - ADDRS_PER_INODE(F2FS_I(inode)) : - ADDRS_PER_BLOCK; - - clear_buffer_new(bh_result); - - /* Give more consecutive addresses for the read ahead */ - for (i = 0; i < end_offset - dn.ofs_in_node; i++) - if (((datablock_addr(dn.node_page, - dn.ofs_in_node + i)) - != (dn.data_blkaddr + i)) || maxblocks == i) - break; + if (dn.data_blkaddr != NULL_ADDR) { map_bh(bh_result, inode->i_sb, dn.data_blkaddr); - bh_result->b_size = (((size_t)i) << blkbits); + } else if (create) { + err = __allocate_data_block(&dn); + if (err) + goto put_out; + allocated = true; + map_bh(bh_result, inode->i_sb, dn.data_blkaddr); + } else { + goto put_out; } + + end_offset = IS_INODE(dn.node_page) ? + ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; + bh_result->b_size = (((size_t)1) << blkbits); + dn.ofs_in_node++; + pgofs++; + +get_next: + if (dn.ofs_in_node >= end_offset) { + if (allocated) + sync_inode_page(&dn); + allocated = false; + f2fs_put_dnode(&dn); + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, pgofs, mode); + if (err || dn.data_blkaddr == NEW_ADDR) { + if (err == -ENOENT) + err = 0; + goto unlock_out; + } + end_offset = IS_INODE(dn.node_page) ? + ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; + } + + if (maxblocks > (bh_result->b_size >> blkbits)) { + block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + if (blkaddr == NULL_ADDR && create) { + err = __allocate_data_block(&dn); + if (err) + goto sync_out; + allocated = true; + blkaddr = dn.data_blkaddr; + } + /* Give more consecutive addresses for the read ahead */ + if (blkaddr == (bh_result->b_blocknr + ofs)) { + ofs++; + dn.ofs_in_node++; + pgofs++; + bh_result->b_size += (((size_t)1) << blkbits); + goto get_next; + } + } +sync_out: + if (allocated) + sync_inode_page(&dn); +put_out: f2fs_put_dnode(&dn); - trace_f2fs_get_data_block(inode, iblock, bh_result, 0); - return 0; +unlock_out: + if (create) + f2fs_unlock_op(sbi); +out: + trace_f2fs_get_data_block(inode, iblock, bh_result, err); + return err; } static int f2fs_read_data_page(struct file *file, struct page *page) { - return mpage_readpage(page, get_data_block_ro); + return mpage_readpage(page, get_data_block); } static int f2fs_read_data_pages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); + return mpage_readpages(mapping, pages, nr_pages, get_data_block); } int do_write_data_page(struct page *page, struct f2fs_io_info *fio) @@ -883,13 +960,8 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - - if (rw == WRITE) - return 0; - - /* Needs synchronization with the cleaner */ return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - get_data_block_ro); + get_data_block); } static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, @@ -928,7 +1000,7 @@ static int f2fs_set_data_page_dirty(struct page *page) static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { - return generic_block_bmap(mapping, block, get_data_block_ro); + return generic_block_bmap(mapping, block, get_data_block); } const struct address_space_operations f2fs_dblock_aops = { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1b05a628670a..8cbc5a6bf484 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1115,6 +1115,8 @@ void recover_data_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, block_t, block_t); void rewrite_node_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, block_t, block_t); +void allocate_data_block(struct f2fs_sb_info *, struct page *, + block_t, block_t *, struct f2fs_summary *, int); void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); void write_data_summaries(struct f2fs_sb_info *, block_t); void write_node_summaries(struct f2fs_sb_info *, block_t); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5b890ce74b15..9f8bdd02e3a8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -854,16 +854,14 @@ static int __get_segment_type(struct page *page, enum page_type p_type) return __get_segment_type_6(page, p_type); } -static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, - block_t old_blkaddr, block_t *new_blkaddr, - struct f2fs_summary *sum, struct f2fs_io_info *fio) +void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, + block_t old_blkaddr, block_t *new_blkaddr, + struct f2fs_summary *sum, int type) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg; unsigned int old_cursegno; - int type; - type = __get_segment_type(page, fio->type); curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); @@ -896,13 +894,22 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); mutex_unlock(&sit_i->sentry_lock); - if (fio->type == NODE) + if (page && IS_NODESEG(type)) fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + mutex_unlock(&curseg->curseg_mutex); +} + +static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, + block_t old_blkaddr, block_t *new_blkaddr, + struct f2fs_summary *sum, struct f2fs_io_info *fio) +{ + int type = __get_segment_type(page, fio->type); + + allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type); + /* writeout dirty page into bdev */ f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio); - - mutex_unlock(&curseg->curseg_mutex); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) From 5459aa9770fe2e8c6a660a0985ea454c42cfd8c1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 17 Dec 2013 17:28:41 +0900 Subject: [PATCH 55/95] f2fs: write dirty meta pages collectively This patch enhances writing dirty meta pages collectively in background. During the file data writes, it'd better avoid to write small dirty meta pages frequently. So let's give a chance to collect a number of dirty meta pages for a while. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f8c074961e0a..2fc3b6beaeb8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -107,18 +107,19 @@ static int f2fs_write_meta_pages(struct address_space *mapping, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); - struct block_device *bdev = sbi->sb->s_bdev; + int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); long written; if (wbc->for_kupdate) return 0; - if (get_pages(sbi, F2FS_DIRTY_META) == 0) + /* collect a number of dirty meta pages and write together */ + if (get_pages(sbi, F2FS_DIRTY_META) < nrpages) return 0; /* if mounting is failed, skip writing node pages */ mutex_lock(&sbi->cp_mutex); - written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev)); + written = sync_meta_pages(sbi, META, nrpages); mutex_unlock(&sbi->cp_mutex); wbc->nr_to_write -= written; return 0; From ba0697ec98f0244ea180ce336f399d1a24eb7bf8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 19 Dec 2013 17:44:41 +0900 Subject: [PATCH 56/95] f2fs: add description about small_discards in document This patch adds a description about small_disacrds in the f2fs document. Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 1a94ac7fee86..02357e35d4f6 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -171,6 +171,12 @@ Files in /sys/fs/f2fs/ conduct checkpoint to reclaim the prefree segments to free segments. By default, 100 segments, 200MB. + max_small_discards This parameter controls the number of discard + commands that consist small blocks less than 2MB. + The candidates to be discarded are cached until + checkpoint is triggered, and issued during the + checkpoint. By default, it is disabled with 0. + ipu_policy This parameter controls the policy of in-place updates in f2fs. There are five policies: 0: F2FS_IPU_FORCE, 1: F2FS_IPU_SSR, From 940a6d34b31b96f0748a4b688a551a0890b2b229 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 20 Dec 2013 17:39:59 +0800 Subject: [PATCH 57/95] f2fs: move all the bio initialization into __bio_alloc Move all the bio initialization into __bio_alloc, and some minor cleanups are also added. v3: Use 'bool' rather than 'int' as Kim suggested. v2: Use 'is_read' rather than 'rw' as Yu Chao suggested. Remove the needless initialization of bio->bi_private. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 92 +++++++++++++++++++++++--------------------------- 1 file changed, 42 insertions(+), 50 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a0950bcbf568..154a4f93a548 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -24,20 +24,6 @@ #include "segment.h" #include -/* - * Low-level block read/write IO operations. - */ -static struct bio *__bio_alloc(struct block_device *bdev, int npages) -{ - struct bio *bio; - - /* No failure on bio allocation */ - bio = bio_alloc(GFP_NOIO, npages); - bio->bi_bdev = bdev; - bio->bi_private = NULL; - return bio; -} - static void f2fs_read_end_io(struct bio *bio, int err) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); @@ -93,6 +79,24 @@ static void f2fs_write_end_io(struct bio *bio, int err) bio_put(bio); } +/* + * Low-level block read/write IO operations. + */ +static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, + int npages, bool is_read) +{ + struct bio *bio; + + /* No failure on bio allocation */ + bio = bio_alloc(GFP_NOIO, npages); + + bio->bi_bdev = sbi->sb->s_bdev; + bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; + + return bio; +} + static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; @@ -104,25 +108,26 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) rw = fio->rw | fio->rw_flag; if (is_read_io(rw)) { - trace_f2fs_submit_read_bio(io->sbi->sb, rw, fio->type, io->bio); + trace_f2fs_submit_read_bio(io->sbi->sb, rw, + fio->type, io->bio); submit_bio(rw, io->bio); - io->bio = NULL; - return; - } - trace_f2fs_submit_write_bio(io->sbi->sb, rw, fio->type, io->bio); - - /* - * META_FLUSH is only from the checkpoint procedure, and we should wait - * this metadata bio for FS consistency. - */ - if (fio->type == META_FLUSH) { - DECLARE_COMPLETION_ONSTACK(wait); - io->bio->bi_private = &wait; - submit_bio(rw, io->bio); - wait_for_completion(&wait); } else { - submit_bio(rw, io->bio); + trace_f2fs_submit_write_bio(io->sbi->sb, rw, + fio->type, io->bio); + /* + * META_FLUSH is only from the checkpoint procedure, and we + * should wait this metadata bio for FS consistency. + */ + if (fio->type == META_FLUSH) { + DECLARE_COMPLETION_ONSTACK(wait); + io->bio->bi_private = &wait; + submit_bio(rw, io->bio); + wait_for_completion(&wait); + } else { + submit_bio(rw, io->bio); + } } + io->bio = NULL; } @@ -152,17 +157,12 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, block_t blk_addr, int rw) { - struct block_device *bdev = sbi->sb->s_bdev; struct bio *bio; trace_f2fs_submit_page_bio(page, blk_addr, rw); /* Allocate a new bio */ - bio = __bio_alloc(bdev, 1); - - /* Initialize the bio */ - bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); - bio->bi_end_io = is_read_io(rw) ? f2fs_read_end_io : f2fs_write_end_io; + bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw)); if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { bio_put(bio); @@ -178,17 +178,16 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, block_t blk_addr, struct f2fs_io_info *fio) { enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); - struct block_device *bdev = sbi->sb->s_bdev; struct f2fs_bio_info *io; - int bio_blocks; + bool is_read = is_read_io(fio->rw); - io = is_read_io(fio->rw) ? &sbi->read_io : &sbi->write_io[btype]; + io = is_read ? &sbi->read_io : &sbi->write_io[btype]; verify_block_addr(sbi, blk_addr); mutex_lock(&io->io_mutex); - if (!is_read_io(fio->rw)) + if (!is_read) inc_page_count(sbi, F2FS_WRITEBACK); if (io->bio && (io->last_block_in_bio != blk_addr - 1 || @@ -196,17 +195,10 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { - bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); - io->bio = __bio_alloc(bdev, bio_blocks); - io->bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); - io->bio->bi_end_io = is_read_io(fio->rw) ? f2fs_read_end_io : - f2fs_write_end_io; + int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); + + io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); io->fio = *fio; - /* - * The end_io will be assigned at the sumbission phase. - * Until then, let bio_add_page() merge consecutive IOs as much - * as possible. - */ } if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < From 7e8f23081ab3a11de90d7389f2c6fd44676c8df9 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 20 Dec 2013 18:17:49 +0800 Subject: [PATCH 58/95] f2fs: remove the rw_flag domain from f2fs_io_info When using the f2fs_io_info in the low level, we still need to merge the rw and rw_flag, so use the rw to hold all the io flags directly, and remove the rw_flag field. ps.It is based on the previous patch: f2fs: move all the bio initialization into __bio_alloc Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +-- fs/f2fs/f2fs.h | 5 ++--- fs/f2fs/gc.c | 1 - fs/f2fs/node.c | 6 ++---- fs/f2fs/segment.c | 8 ++------ 5 files changed, 7 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 154a4f93a548..e46b5c52d2ed 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -105,7 +105,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) if (!io->bio) return; - rw = fio->rw | fio->rw_flag; + rw = fio->rw; if (is_read_io(rw)) { trace_f2fs_submit_read_bio(io->sbi->sb, rw, @@ -760,7 +760,6 @@ static int f2fs_write_data_page(struct page *page, struct f2fs_io_info fio = { .type = DATA, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC: WRITE, - .rw_flag = 0, }; if (page->index < end_index) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8cbc5a6bf484..42f28d4134c9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -368,9 +368,8 @@ enum page_type { }; struct f2fs_io_info { - enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ - int rw; /* contains R/RS/W/WS */ - int rw_flag; /* contains REQ_META/REQ_PRIO */ + enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ + int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */ }; #define is_read_io(rw) (((rw) & 1) == READ) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 69c18e399014..599f546d042c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -523,7 +523,6 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) struct f2fs_io_info fio = { .type = DATA, .rw = WRITE_SYNC, - .rw_flag = 0, }; if (gc_type == BG_GC) { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3565caf97005..0af0a715e367 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -94,8 +94,7 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) int i; struct f2fs_io_info fio = { .type = META, - .rw = READ_SYNC, - .rw_flag = REQ_META | REQ_PRIO + .rw = READ_SYNC | REQ_META | REQ_PRIO }; @@ -1581,8 +1580,7 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, int page_idx = start; struct f2fs_io_info fio = { .type = META, - .rw = READ_SYNC, - .rw_flag = REQ_META | REQ_PRIO + .rw = READ_SYNC | REQ_META | REQ_PRIO }; for (; page_idx < start + nrpages; page_idx++) { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9f8bdd02e3a8..555ae7693ea0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -916,8 +916,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) { struct f2fs_io_info fio = { .type = META, - .rw = WRITE_SYNC, - .rw_flag = REQ_META | REQ_PRIO + .rw = WRITE_SYNC | REQ_META | REQ_PRIO }; set_page_writeback(page); @@ -931,7 +930,6 @@ void write_node_page(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_io_info fio = { .type = NODE, .rw = WRITE_SYNC, - .rw_flag = 0 }; set_summary(&sum, nid, 0, 0); @@ -1018,7 +1016,6 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, struct f2fs_io_info fio = { .type = NODE, .rw = WRITE_SYNC, - .rw_flag = 0 }; curseg = CURSEG_I(sbi, type); @@ -1598,8 +1595,7 @@ static int ra_sit_pages(struct f2fs_sb_info *sbi, int start, int nrpages) int blkno = start; struct f2fs_io_info fio = { .type = META, - .rw = READ_SYNC, - .rw_flag = REQ_META | REQ_PRIO + .rw = READ_SYNC | REQ_META | REQ_PRIO }; for (; blkno < start + nrpages && blkno < sit_blk_cnt; blkno++) { From 4f4124d0b99682efa7307191a28ec050872d2079 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 21 Dec 2013 18:02:14 +0800 Subject: [PATCH 59/95] f2fs: update several comments Update several comments: 1. use f2fs_{un}lock_op install of mutex_{un}lock_op. 2. update comment of get_data_block(). 3. update description of node offset. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 14 ++++++++------ fs/f2fs/dir.c | 4 ++-- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 8 ++++---- fs/f2fs/node.h | 8 +++++++- 5 files changed, 22 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e46b5c52d2ed..0879d2aa97e6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -508,8 +508,8 @@ repeat: * Caller ensures that this data page is never allocated. * A new zero-filled data page is allocated in the page cache. * - * Also, caller should grab and release a mutex by calling mutex_lock_op() and - * mutex_unlock_op(). + * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and + * f2fs_unlock_op(). * Note that, npage is set only by make_empty_dir. */ struct page *get_new_data_page(struct inode *inode, @@ -595,10 +595,12 @@ static int __allocate_data_block(struct dnode_of_data *dn) } /* - * This function should be used by the data read flow only where it - * does not check the "create" flag that indicates block allocation. - * The reason for this special functionality is to exploit VFS readahead - * mechanism. + * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh. + * If original data blocks are allocated, then give them to blockdev. + * Otherwise, + * a. preallocate requested block addresses + * b. do not use extent cache for better performance + * c. give the block addresses to blockdev */ static int get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 0cc26ba07c3b..28206109a293 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -432,8 +432,8 @@ next: } /* - * Caller should grab and release a mutex by calling mutex_lock_op() and - * mutex_unlock_op(). + * Caller should grab and release a rwsem by calling f2fs_lock_op() and + * f2fs_unlock_op(). */ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 42f28d4134c9..ba91186823dd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -165,7 +165,7 @@ enum { LOOKUP_NODE, /* look up a node without readahead */ LOOKUP_NODE_RA, /* * look up a node with readahead called - * by get_datablock_ro. + * by get_data_block. */ }; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0af0a715e367..e8fe52d6073c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -393,8 +393,8 @@ got: /* * Caller should call f2fs_put_dnode(dn). - * Also, it should grab and release a mutex by calling mutex_lock_op() and - * mutex_unlock_op() only if ro is not set RDONLY_NODE. + * Also, it should grab and release a rwsem by calling f2fs_lock_op() and + * f2fs_unlock_op() only if ro is not set RDONLY_NODE. * In the case of RDONLY_NODE, we don't need to care about mutex. */ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) @@ -802,8 +802,8 @@ int truncate_xattr_node(struct inode *inode, struct page *page) } /* - * Caller should grab and release a mutex by calling mutex_lock_op() and - * mutex_unlock_op(). + * Caller should grab and release a rwsem by calling f2fs_lock_op() and + * f2fs_unlock_op(). */ void remove_inode_page(struct inode *inode) { diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 3496bb3e15dc..c4c79885c993 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -224,7 +224,13 @@ static inline block_t next_blkaddr_of_node(struct page *node_page) * | `- direct node (5 + N => 5 + 2N - 1) * `- double indirect node (5 + 2N) * `- indirect node (6 + 2N) - * `- direct node (x(N + 1)) + * `- direct node + * ...... + * `- indirect node ((6 + 2N) + x(N + 1)) + * `- direct node + * ...... + * `- indirect node ((6 + 2N) + (N - 1)(N + 1)) + * `- direct node */ static inline bool IS_DNODE(struct page *node_page) { From deead09009fc5136185fe95026c395b5c2337e1f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 21 Dec 2013 18:03:28 +0800 Subject: [PATCH 60/95] f2fs: avoid to set wrong pino of inode when rename dir When we rename a dir to new name which is not exist previous, we will set pino of parent inode with ino of child inode in f2fs_set_link. It destroy consistency of pino, it should be fixed. Thanks for previous work of Shu Tan. Signed-off-by: Shu Tan Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 3 --- fs/f2fs/namei.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 28206109a293..07ad850bbf97 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -259,9 +259,6 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty(dir); - /* update parent inode number before releasing dentry page */ - F2FS_I(inode)->i_pino = dir->i_ino; - f2fs_put_page(page, 1); } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 575adac17f8b..a68838dae335 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -424,6 +424,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } f2fs_set_link(new_dir, new_entry, new_page, old_inode); + F2FS_I(old_inode)->i_pino = new_dir->i_ino; new_inode->i_ctime = CURRENT_TIME; if (old_dir_entry) @@ -457,6 +458,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_dir != new_dir) { f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); + F2FS_I(old_inode)->i_pino = new_dir->i_ino; } else { kunmap(old_dir_page); f2fs_put_page(old_dir_page, 0); From d96b143151a11820ee3eee552554209f2453799e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 23 Dec 2013 11:12:21 +0800 Subject: [PATCH 61/95] f2fs: check filename length in recover_dentry In current flow, we will get Null return value of f2fs_find_entry in recover_dentry when name.len is bigger than F2FS_NAME_LEN, and then we still add this inode into its dir entry. To avoid this situation, we must check filename length before we use it. Another point is that we could remove the code of checking filename length In f2fs_find_entry, because f2fs_lookup will be called previously to ensure of validity of filename length. V2: o add WARN_ON() as Jaegeuk Kim suggested. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 3 --- fs/f2fs/recovery.c | 6 ++++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 07ad850bbf97..f0b463049444 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -190,9 +190,6 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, unsigned int max_depth; unsigned int level; - if (unlikely(namelen > F2FS_NAME_LEN)) - return NULL; - if (npages == 0) return NULL; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index a3f4542160cf..4d411a26b85d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -62,6 +62,12 @@ static int recover_dentry(struct page *ipage, struct inode *inode) name.len = le32_to_cpu(raw_inode->i_namelen); name.name = raw_inode->i_name; + + if (unlikely(name.len > F2FS_NAME_LEN)) { + WARN_ON(1); + err = -ENAMETOOLONG; + goto out; + } retry: de = f2fs_find_entry(dir, &name, &page); if (de && inode->i_ino == le32_to_cpu(de->ino)) From 58bfaf44df58082c72882b235cae611c975537d4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 26 Dec 2013 16:30:41 +0900 Subject: [PATCH 62/95] f2fs: introduce F2FS_INODE macro to get f2fs_inode This patch introduces F2FS_INODE that returns struct f2fs_inode * from the inode page. By using this macro, we can remove unnecessary casting codes like below. struct f2fs_inode *ri = &F2FS_NODE(inode_page)->i; -> struct f2fs_inode *ri = F2FS_INODE(inode_page); Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 8 ++++---- fs/f2fs/f2fs.h | 5 +++++ fs/f2fs/inode.c | 8 ++------ fs/f2fs/node.c | 30 +++++++++++++++--------------- fs/f2fs/recovery.c | 6 ++---- 5 files changed, 28 insertions(+), 29 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index f0b463049444..6da77e5c753b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -261,12 +261,12 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, static void init_dent_inode(const struct qstr *name, struct page *ipage) { - struct f2fs_node *rn; + struct f2fs_inode *ri; /* copy name info. to this inode page */ - rn = F2FS_NODE(ipage); - rn->i.i_namelen = cpu_to_le32(name->len); - memcpy(rn->i.i_name, name->name, name->len); + ri = F2FS_INODE(ipage); + ri->i_namelen = cpu_to_le32(name->len); + memcpy(ri->i_name, name->name, name->len); set_page_dirty(ipage); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ba91186823dd..1a06f0aaa7d9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -498,6 +498,11 @@ static inline struct f2fs_node *F2FS_NODE(struct page *page) return (struct f2fs_node *)page_address(page); } +static inline struct f2fs_inode *F2FS_INODE(struct page *page) +{ + return &((struct f2fs_node *)page_address(page))->i; +} + static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) { return (struct f2fs_nm_info *)(sbi->nm_info); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index d0eaa9faeca0..a91f45177cd9 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -67,7 +67,6 @@ static int do_read_inode(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode_info *fi = F2FS_I(inode); struct page *node_page; - struct f2fs_node *rn; struct f2fs_inode *ri; /* Check if ino is within scope */ @@ -81,8 +80,7 @@ static int do_read_inode(struct inode *inode) if (IS_ERR(node_page)) return PTR_ERR(node_page); - rn = F2FS_NODE(node_page); - ri = &(rn->i); + ri = F2FS_INODE(node_page); inode->i_mode = le16_to_cpu(ri->i_mode); i_uid_write(inode, le32_to_cpu(ri->i_uid)); @@ -175,13 +173,11 @@ bad_inode: void update_inode(struct inode *inode, struct page *node_page) { - struct f2fs_node *rn; struct f2fs_inode *ri; f2fs_wait_on_page_writeback(node_page, NODE, false); - rn = F2FS_NODE(node_page); - ri = &(rn->i); + ri = F2FS_INODE(node_page); ri->i_mode = cpu_to_le16(inode->i_mode); ri->i_advise = F2FS_I(inode)->i_advise; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e8fe52d6073c..9405a17a671e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -684,7 +684,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from) int err = 0, cont = 1; int level, offset[4], noffset[4]; unsigned int nofs = 0; - struct f2fs_node *rn; + struct f2fs_inode *ri; struct dnode_of_data dn; struct page *page; @@ -701,7 +701,7 @@ restart: set_new_dnode(&dn, inode, page, NULL, 0); unlock_page(page); - rn = F2FS_NODE(page); + ri = F2FS_INODE(page); switch (level) { case 0: case 1: @@ -711,7 +711,7 @@ restart: nofs = noffset[1]; if (!offset[level - 1]) goto skip_partial; - err = truncate_partial_nodes(&dn, &rn->i, offset, level); + err = truncate_partial_nodes(&dn, ri, offset, level); if (err < 0 && err != -ENOENT) goto fail; nofs += 1 + NIDS_PER_BLOCK; @@ -720,7 +720,7 @@ restart: nofs = 5 + 2 * NIDS_PER_BLOCK; if (!offset[level - 1]) goto skip_partial; - err = truncate_partial_nodes(&dn, &rn->i, offset, level); + err = truncate_partial_nodes(&dn, ri, offset, level); if (err < 0 && err != -ENOENT) goto fail; break; @@ -730,7 +730,7 @@ restart: skip_partial: while (cont) { - dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]); + dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); switch (offset[0]) { case NODE_DIR1_BLOCK: case NODE_DIR2_BLOCK: @@ -753,14 +753,14 @@ skip_partial: if (err < 0 && err != -ENOENT) goto fail; if (offset[1] == 0 && - rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { + ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { lock_page(page); if (unlikely(page->mapping != node_mapping)) { f2fs_put_page(page, 1); goto restart; } wait_on_page_writeback(page); - rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; + ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; set_page_dirty(page); unlock_page(page); } @@ -1533,7 +1533,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) { struct address_space *mapping = sbi->node_inode->i_mapping; - struct f2fs_node *src, *dst; + struct f2fs_inode *src, *dst; nid_t ino = ino_of_node(page); struct node_info old_ni, new_ni; struct page *ipage; @@ -1549,14 +1549,14 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); - src = F2FS_NODE(page); - dst = F2FS_NODE(ipage); + src = F2FS_INODE(page); + dst = F2FS_INODE(ipage); - memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i); - dst->i.i_size = 0; - dst->i.i_blocks = cpu_to_le64(1); - dst->i.i_links = cpu_to_le32(1); - dst->i.i_xattr_nid = 0; + memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src); + dst->i_size = 0; + dst->i_blocks = cpu_to_le64(1); + dst->i_links = cpu_to_le32(1); + dst->i_xattr_nid = 0; new_ni = old_ni; new_ni.ino = ino; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 4d411a26b85d..96e690b6f0fa 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -40,8 +40,7 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, static int recover_dentry(struct page *ipage, struct inode *inode) { - struct f2fs_node *raw_node = F2FS_NODE(ipage); - struct f2fs_inode *raw_inode = &(raw_node->i); + struct f2fs_inode *raw_inode = F2FS_INODE(ipage); nid_t pino = le32_to_cpu(raw_inode->i_pino); struct f2fs_dir_entry *de; struct qstr name; @@ -105,8 +104,7 @@ out: static int recover_inode(struct inode *inode, struct page *node_page) { - struct f2fs_node *raw_node = F2FS_NODE(node_page); - struct f2fs_inode *raw_inode = &(raw_node->i); + struct f2fs_inode *raw_inode = F2FS_INODE(node_page); if (!IS_INODE(node_page)) return 0; From 1ec79083b2d4614d9dbaea67b5f55b60d7137a2d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 26 Dec 2013 16:55:22 +0900 Subject: [PATCH 63/95] f2fs: should put the dnode when NEW_ADDR is detected When get_dnode_of_data() in get_data_block() returns a successful dnode, we should put the dnode. But, previously, if its data block address is equal to NEW_ADDR, we didn't do that, resulting in a deadlock condition. So, this patch splits original error conditions with this case, and then calls f2fs_put_dnode before finishing the function. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0879d2aa97e6..991e36835df8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -626,11 +626,13 @@ static int get_data_block(struct inode *inode, sector_t iblock, /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, pgofs, mode); - if (err || dn.data_blkaddr == NEW_ADDR) { + if (err) { if (err == -ENOENT) err = 0; goto unlock_out; } + if (dn.data_blkaddr == NEW_ADDR) + goto put_out; if (dn.data_blkaddr != NULL_ADDR) { map_bh(bh_result, inode->i_sb, dn.data_blkaddr); @@ -659,11 +661,14 @@ get_next: set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, pgofs, mode); - if (err || dn.data_blkaddr == NEW_ADDR) { + if (err) { if (err == -ENOENT) err = 0; goto unlock_out; } + if (dn.data_blkaddr == NEW_ADDR) + goto put_out; + end_offset = IS_INODE(dn.node_page) ? ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; } From 944fcfc18445b22d7ad1953a6effcfbdc4ffec74 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 26 Dec 2013 20:15:09 +0900 Subject: [PATCH 64/95] f2fs: check the blocksize before calling generic_direct_IO path The f2fs supports 4KB block size. If user requests dwrite with under 4KB data, it allocates a new 4KB data block. However, f2fs doesn't add zero data into the untouched data area inside the newly allocated data block. This incurs an error during the xfstest #263 test as follow. 263 12s ... [failed, exit status 1] - output mismatch (see 263.out.bad) --- 263.out 2013-03-09 03:37:15.043967603 +0900 +++ 263.out.bad 2013-12-27 04:20:39.230203114 +0900 @@ -1,3 +1,976 @@ QA output created by 263 fsx -N 10000 -o 8192 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z -fsx -N 10000 -o 128000 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z +fsx -N 10000 -o 8192 -l 500000 -r PSIZE -t BSIZE -w BSIZE -Z +truncating to largest ever: 0x12a00 +truncating to largest ever: 0x75400 +fallocating to largest ever: 0x79cbf ... (Run 'diff -u 263.out 263.out.bad' to see the entire diff) Ran: 263 Failures: 263 Failed 1 of 1 tests It turns out that, when the test tries to write 2KB data with dio, the new dio path allocates 4KB data block without filling zero data inside the remained 2KB area. Finally, the output file contains a garbage data for that region. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 991e36835df8..e0965b43d16e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -953,11 +953,33 @@ static int f2fs_write_end(struct file *file, return copied; } +static int check_direct_IO(struct inode *inode, int rw, + const struct iovec *iov, loff_t offset, unsigned long nr_segs) +{ + unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; + int i; + + if (rw == READ) + return 0; + + if (offset & blocksize_mask) + return -EINVAL; + + for (i = 0; i < nr_segs; i++) + if (iov[i].iov_len & blocksize_mask) + return -EINVAL; + return 0; +} + static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + + if (check_direct_IO(inode, rw, iov, offset, nr_segs)) + return 0; + return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, get_data_block); } From 0d47c1adc2a1f1e4f4673f122a8328c90c58e232 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Thu, 26 Dec 2013 18:24:19 +0800 Subject: [PATCH 65/95] f2fs: convert max_orphans to a field of f2fs_sb_info Previously, we need to calculate the max orphan num when we try to acquire an orphan inode, but it's a stable value since the super block was inited. So converting it to a field of f2fs_sb_info and use it directly when needed seems a better choose. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 20 ++++++++++---------- fs/f2fs/f2fs.h | 1 + 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2fc3b6beaeb8..0d78bbe79f98 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -194,23 +194,15 @@ const struct address_space_operations f2fs_meta_aops = { int acquire_orphan_inode(struct f2fs_sb_info *sbi) { - unsigned int max_orphans; int err = 0; - /* - * considering 512 blocks in a segment 8 blocks are needed for cp - * and log segment summaries. Remaining blocks are used to keep - * orphan entries with the limitation one reserved segment - * for cp pack we can have max 1020*504 orphan entries - */ - max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) - * F2FS_ORPHANS_PER_BLOCK; mutex_lock(&sbi->orphan_inode_mutex); - if (unlikely(sbi->n_orphans >= max_orphans)) + if (unlikely(sbi->n_orphans >= sbi->max_orphans)) err = -ENOSPC; else sbi->n_orphans++; mutex_unlock(&sbi->orphan_inode_mutex); + return err; } @@ -834,6 +826,14 @@ void init_orphan_info(struct f2fs_sb_info *sbi) mutex_init(&sbi->orphan_inode_mutex); INIT_LIST_HEAD(&sbi->orphan_inode_list); sbi->n_orphans = 0; + /* + * considering 512 blocks in a segment 8 blocks are needed for cp + * and log segment summaries. Remaining blocks are used to keep + * orphan entries with the limitation one reserved segment + * for cp pack we can have max 1020*504 orphan entries + */ + sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) + * F2FS_ORPHANS_PER_BLOCK; } int __init create_checkpoint_caches(void) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1a06f0aaa7d9..36211cb16263 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -414,6 +414,7 @@ struct f2fs_sb_info { struct list_head orphan_inode_list; /* orphan inode list */ struct mutex orphan_inode_mutex; /* for orphan inode list */ unsigned int n_orphans; /* # of orphan inodes */ + unsigned int max_orphans; /* max orphan inodes */ /* for directory inode management */ struct list_head dir_inode_list; /* dir inode list */ From e18c65b2ac91aa59f89333da595d5155184f76cf Mon Sep 17 00:00:00 2001 From: Huajun Li Date: Sun, 10 Nov 2013 23:13:19 +0800 Subject: [PATCH 66/95] f2fs: key functions to handle inline data Functions to implement inline data read/write, and move inline data to normal data block when file size exceeds inline data limitation. Signed-off-by: Huajun Li Signed-off-by: Haicheng Li Signed-off-by: Weihong Xu Signed-off-by: Jaegeuk Kim --- fs/f2fs/Makefile | 2 +- fs/f2fs/f2fs.h | 8 +++ fs/f2fs/inline.c | 176 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 fs/f2fs/inline.c diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 27a0820340b9..2e35da12d292 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_F2FS_FS) += f2fs.o -f2fs-y := dir.o file.o inode.o namei.o hash.o super.o +f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 36211cb16263..b01ccaa73b23 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1296,4 +1296,12 @@ extern const struct inode_operations f2fs_dir_inode_operations; extern const struct inode_operations f2fs_symlink_inode_operations; extern const struct inode_operations f2fs_special_inode_operations; +/* + * inline.c + */ +inline int f2fs_has_inline_data(struct inode *); +bool f2fs_may_inline(struct inode *); +int f2fs_read_inline_data(struct inode *, struct page *); +int f2fs_convert_inline_data(struct inode *, struct page *, unsigned); +int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); #endif diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c new file mode 100644 index 000000000000..62c72aa84acc --- /dev/null +++ b/fs/f2fs/inline.c @@ -0,0 +1,176 @@ +/* + * fs/f2fs/inline.c + * Copyright (c) 2013, Intel Corporation + * Authors: Huajun Li + * Haicheng Li + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +#include "f2fs.h" + +inline int f2fs_has_inline_data(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); +} + +bool f2fs_may_inline(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + block_t nr_blocks; + loff_t i_size; + + if (!test_opt(sbi, INLINE_DATA)) + return false; + + nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; + if (inode->i_blocks > nr_blocks) + return false; + + i_size = i_size_read(inode); + if (i_size > MAX_INLINE_DATA) + return false; + + return true; +} + +int f2fs_read_inline_data(struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *ipage; + void *src_addr, *dst_addr; + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + + zero_user_segment(page, INLINE_DATA_OFFSET, + INLINE_DATA_OFFSET + MAX_INLINE_DATA); + + /* Copy the whole inline data block */ + src_addr = inline_data_addr(ipage); + dst_addr = kmap(page); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + kunmap(page); + f2fs_put_page(ipage, 1); + + SetPageUptodate(page); + unlock_page(page); + + return 0; +} + +static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) +{ + int err; + struct page *ipage; + struct dnode_of_data dn; + void *src_addr, *dst_addr; + block_t new_blk_addr; + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_io_info fio = { + .type = DATA, + .rw = WRITE_SYNC | REQ_PRIO, + }; + + f2fs_lock_op(sbi); + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + + /* + * i_addr[0] is not used for inline data, + * so reserving new block will not destroy inline data + */ + set_new_dnode(&dn, inode, ipage, ipage, 0); + err = f2fs_reserve_block(&dn, 0); + if (err) { + f2fs_put_page(ipage, 1); + f2fs_unlock_op(sbi); + return err; + } + + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + + /* Copy the whole inline data block */ + src_addr = inline_data_addr(ipage); + dst_addr = kmap(page); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + kunmap(page); + + /* write data page to try to make data consistent */ + set_page_writeback(page); + write_data_page(page, &dn, &new_blk_addr, &fio); + update_extent_cache(new_blk_addr, &dn); + f2fs_wait_on_page_writeback(page, DATA, true); + + /* clear inline data and flag after data writeback */ + zero_user_segment(ipage, INLINE_DATA_OFFSET, + INLINE_DATA_OFFSET + MAX_INLINE_DATA); + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + + sync_inode_page(&dn); + f2fs_put_page(ipage, 1); + f2fs_unlock_op(sbi); + + return err; +} + +int f2fs_convert_inline_data(struct inode *inode, + struct page *p, unsigned flags) +{ + int err; + struct page *page; + + if (!p || p->index) { + page = grab_cache_page_write_begin(inode->i_mapping, 0, flags); + if (IS_ERR(page)) + return PTR_ERR(page); + } else { + page = p; + } + + err = __f2fs_convert_inline_data(inode, page); + + if (!p || p->index) + f2fs_put_page(page, 1); + + return err; +} + +int f2fs_write_inline_data(struct inode *inode, + struct page *page, unsigned size) +{ + void *src_addr, *dst_addr; + struct page *ipage; + struct dnode_of_data dn; + int err; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); + if (err) + return err; + ipage = dn.inode_page; + + zero_user_segment(ipage, INLINE_DATA_OFFSET, + INLINE_DATA_OFFSET + MAX_INLINE_DATA); + src_addr = kmap(page); + dst_addr = inline_data_addr(ipage); + memcpy(dst_addr, src_addr, size); + kunmap(page); + + /* Release the first data block if it is allocated */ + if (!f2fs_has_inline_data(inode)) { + truncate_data_blocks_range(&dn, 1); + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + } + + sync_inode_page(&dn); + f2fs_put_dnode(&dn); + + return 0; +} From 9ffe0fb5f3bbd01c766162bb17a62dee0df7e125 Mon Sep 17 00:00:00 2001 From: Huajun Li Date: Sun, 10 Nov 2013 23:13:20 +0800 Subject: [PATCH 67/95] f2fs: handle inline data operations Hook inline data read/write, truncate, fallocate, setattr, etc. Files need meet following 2 requirement to inline: 1) file size is not greater than MAX_INLINE_DATA; 2) file doesn't pre-allocate data blocks by fallocate(). FI_INLINE_DATA will not be set while creating a new regular inode because most of the files are bigger than ~3.4K. Set FI_INLINE_DATA only when data is submitted to block layer, ranther than set it while creating a new inode, this also avoids converting data from inline to normal data block and vice versa. While writting inline data to inode block, the first data block should be released if the file has a block indexed by i_addr[0]. On the other hand, when a file operation is appied to a file with inline data, we need to test if this file can remain inline by doing this operation, otherwise it should be convert into normal file by reserving a new data block, copying inline data to this new block and clear FI_INLINE_DATA flag. Because reserve a new data block here will make use of i_addr[0], if we save inline data in i_addr[0..872], then the first 4 bytes would be overwriten. This problem can be avoided simply by not using i_addr[0] for inline data. Signed-off-by: Huajun Li Signed-off-by: Haicheng Li Signed-off-by: Weihong Xu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 49 ++++++++++++++++++++++++++++++++++++++++++++----- fs/f2fs/file.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 86 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e0965b43d16e..bf39eed2442f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -706,13 +706,28 @@ out: static int f2fs_read_data_page(struct file *file, struct page *page) { - return mpage_readpage(page, get_data_block); + struct inode *inode = page->mapping->host; + int ret; + + /* If the file has inline data, try to read it directlly */ + if (f2fs_has_inline_data(inode)) + ret = f2fs_read_inline_data(inode, page); + else + ret = mpage_readpage(page, get_data_block); + + return ret; } static int f2fs_read_data_pages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { + struct inode *inode = file->f_mapping->host; + + /* If the file has inline data, skip readpages */ + if (f2fs_has_inline_data(inode)) + return 0; + return mpage_readpages(mapping, pages, nr_pages, get_data_block); } @@ -761,7 +776,7 @@ static int f2fs_write_data_page(struct page *page, loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long) i_size) >> PAGE_CACHE_SHIFT; - unsigned offset; + unsigned offset = 0; bool need_balance_fs = false; int err = 0; struct f2fs_io_info fio = { @@ -799,7 +814,15 @@ write: err = do_write_data_page(page, &fio); } else { f2fs_lock_op(sbi); - err = do_write_data_page(page, &fio); + + if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) { + err = f2fs_write_inline_data(inode, page, offset); + f2fs_unlock_op(sbi); + goto out; + } else { + err = do_write_data_page(page, &fio); + } + f2fs_unlock_op(sbi); need_balance_fs = true; } @@ -888,6 +911,15 @@ repeat: return -ENOMEM; *pagep = page; + if ((pos + len) < MAX_INLINE_DATA) { + if (f2fs_has_inline_data(inode)) + goto inline_data; + } else if (f2fs_has_inline_data(inode)) { + err = f2fs_convert_inline_data(inode, page, flags); + if (err) + return err; + } + f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_reserve_block(&dn, index); @@ -897,7 +929,7 @@ repeat: f2fs_put_page(page, 1); return err; } - +inline_data: if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) return 0; @@ -913,7 +945,10 @@ repeat: if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, + if (f2fs_has_inline_data(inode)) + err = f2fs_read_inline_data(inode, page); + else + err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC); if (err) return err; @@ -977,6 +1012,10 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + /* Let buffer I/O handle the inline data case. */ + if (f2fs_has_inline_data(inode)) + return 0; + if (check_direct_IO(inode, rw, iov, offset, nr_segs)) return 0; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5accc964368d..dd80e725acb6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -257,8 +257,7 @@ static int truncate_blocks(struct inode *inode, u64 from) unsigned int blocksize = inode->i_sb->s_blocksize; struct dnode_of_data dn; pgoff_t free_from; - int count = 0; - int err; + int count = 0, err = 0; trace_f2fs_truncate_blocks_enter(inode, from); @@ -266,6 +265,10 @@ static int truncate_blocks(struct inode *inode, u64 from) ((from + blocksize - 1) >> (sbi->log_blocksize)); f2fs_lock_op(sbi); + + if (f2fs_has_inline_data(inode)) + goto done; + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { @@ -292,6 +295,7 @@ static int truncate_blocks(struct inode *inode, u64 from) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); +done: f2fs_unlock_op(sbi); /* lastly zero out the first data page */ @@ -367,8 +371,17 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { + if (f2fs_has_inline_data(inode) && + (attr->ia_size > MAX_INLINE_DATA)) { + unsigned flags = AOP_FLAG_NOFS; + err = f2fs_convert_inline_data(inode, NULL, flags); + if (err) + return err; + } + truncate_setsize(inode, attr->ia_size); - f2fs_truncate(inode); + if (!f2fs_has_inline_data(inode)) + f2fs_truncate(inode); f2fs_balance_fs(F2FS_SB(inode->i_sb)); } @@ -450,6 +463,26 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) loff_t off_start, off_end; int ret = 0; + if (f2fs_has_inline_data(inode)) { + struct page *page; + unsigned flags = AOP_FLAG_NOFS; + page = grab_cache_page_write_begin(inode->i_mapping, 0, flags); + if (IS_ERR(page)) + return PTR_ERR(page); + if (offset + len > MAX_INLINE_DATA) { + ret = f2fs_convert_inline_data(inode, page, flags); + f2fs_put_page(page, 1); + if (ret) + return ret; + } else { + zero_user_segment(page, offset, offset + len); + SetPageUptodate(page); + set_page_dirty(page); + f2fs_put_page(page, 1); + return ret; + } + } + pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -496,6 +529,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset, loff_t off_start, off_end; int ret = 0; + if (f2fs_has_inline_data(inode) && (offset + len > MAX_INLINE_DATA)) { + ret = f2fs_convert_inline_data(inode, NULL, AOP_FLAG_NOFS); + if (ret) + return ret; + } + ret = inode_newsize_ok(inode, (len + offset)); if (ret) return ret; From e4024e86c28d9b7b297b35d3a697faad44e1e19c Mon Sep 17 00:00:00 2001 From: Huajun Li Date: Sun, 10 Nov 2013 23:13:21 +0800 Subject: [PATCH 68/95] f2fs: update f2fs Documentation This patch describes the inline_data support in f2fs document. Signed-off-by: Huajun Li Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 02357e35d4f6..998e698cf455 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -120,6 +120,8 @@ active_logs=%u Support configuring the number of active logs. In the disable_ext_identify Disable the extension list configured by mkfs, so f2fs does not aware of cold files such as media files. inline_xattr Enable the inline xattrs feature. +inline_data Enable the inline data feature: New created small(<~3.4k) + files can be written into inode block. ================================================================================ DEBUGFS ENTRIES From f185ff979f97943c9d4966ed4edc9819c3342c5b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 27 Dec 2013 11:01:54 +0900 Subject: [PATCH 69/95] f2fs: don't need to get f2fs_lock_op for the inline_data test This patch locates checking the inline_data prior to calling f2fs_lock_op() in truncate_blocks(), since getting the lock is unnecessary. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index dd80e725acb6..7ef2d6af24ce 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -261,14 +261,14 @@ static int truncate_blocks(struct inode *inode, u64 from) trace_f2fs_truncate_blocks_enter(inode, from); + if (f2fs_has_inline_data(inode)) + goto done; + free_from = (pgoff_t) ((from + blocksize - 1) >> (sbi->log_blocksize)); f2fs_lock_op(sbi); - if (f2fs_has_inline_data(inode)) - goto done; - set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { @@ -295,9 +295,8 @@ static int truncate_blocks(struct inode *inode, u64 from) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); -done: f2fs_unlock_op(sbi); - +done: /* lastly zero out the first data page */ truncate_partial_data_page(inode, from); From 8230a0a49fc3f587644b284e51ac092607aa74bf Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 27 Dec 2013 11:13:21 +0900 Subject: [PATCH 70/95] f2fs: convert inline_data for punch_hole In the punch_hole(), let's convert inline_data all the time for simplicity and to avoid potential deadlock conditions. It is pretty much not a big deal to do this. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7ef2d6af24ce..f64a1c8291af 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -462,25 +462,9 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) loff_t off_start, off_end; int ret = 0; - if (f2fs_has_inline_data(inode)) { - struct page *page; - unsigned flags = AOP_FLAG_NOFS; - page = grab_cache_page_write_begin(inode->i_mapping, 0, flags); - if (IS_ERR(page)) - return PTR_ERR(page); - if (offset + len > MAX_INLINE_DATA) { - ret = f2fs_convert_inline_data(inode, page, flags); - f2fs_put_page(page, 1); - if (ret) - return ret; - } else { - zero_user_segment(page, offset, offset + len); - SetPageUptodate(page); - set_page_dirty(page); - f2fs_put_page(page, 1); - return ret; - } - } + ret = f2fs_convert_inline_data(inode, NULL, AOP_FLAG_NOFS); + if (ret) + return ret; pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; From 26f466f4a948ddc765f9b474ad6e0bdb94fb1a66 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 27 Dec 2013 12:21:49 +0900 Subject: [PATCH 71/95] f2fs: call f2fs_put_page at the error case In f2fs_write_begin(), if f2fs_conver_inline_data() returns an error like -ENOSPC, f2fs should call f2fs_put_page(). Otherwise, it is remained as a locked page, resulting in the following bug. [] sleep_on_page+0xe/0x20 [] __lock_page+0x67/0x70 [] truncate_inode_pages_range+0x368/0x5d0 [] truncate_inode_pages+0x15/0x20 [] truncate_pagecache+0x4b/0x70 [] truncate_setsize+0x12/0x20 [] f2fs_setattr+0x72/0x270 [f2fs] [] notify_change+0x213/0x400 [] do_truncate+0x66/0xa0 [] vfs_truncate+0x191/0x1b0 [] do_sys_truncate+0x5c/0xa0 [] SyS_truncate+0xe/0x10 [] system_call_fastpath+0x16/0x1b [] 0xffffffffffffffff Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index bf39eed2442f..253e6633dbf6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -916,8 +916,10 @@ repeat: goto inline_data; } else if (f2fs_has_inline_data(inode)) { err = f2fs_convert_inline_data(inode, page, flags); - if (err) + if (err) { + f2fs_put_page(page, 1); return err; + } } f2fs_lock_op(sbi); From 9e09fc855dd6f6ed510b3db7f3c3c1dd73631ac7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 27 Dec 2013 12:28:59 +0900 Subject: [PATCH 72/95] f2fs: refactor f2fs_convert_inline_data Change log from v1: o handle NULL pointer of grab_cache_page_write_begin() pointed by Chao Yu. This patch refactors f2fs_convert_inline_data to check a couple of conditions internally for deciding whether it needs to convert inline_data or not. So, the new f2fs_convert_inline_data initially checks: 1) f2fs_has_inline_data(), and 2) the data size to be changed. If the inode has inline_data but the size to fill is less than MAX_INLINE_DATA, then we don't need to convert the inline_data with data allocation. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 16 ++++++---------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 25 +++++++++---------------- fs/f2fs/inline.c | 26 ++++++++++++-------------- 4 files changed, 28 insertions(+), 41 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 253e6633dbf6..fc7a28c5ad23 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -906,21 +906,17 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, f2fs_balance_fs(sbi); repeat: + err = f2fs_convert_inline_data(inode, pos + len); + if (err) + return err; + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; *pagep = page; - if ((pos + len) < MAX_INLINE_DATA) { - if (f2fs_has_inline_data(inode)) - goto inline_data; - } else if (f2fs_has_inline_data(inode)) { - err = f2fs_convert_inline_data(inode, page, flags); - if (err) { - f2fs_put_page(page, 1); - return err; - } - } + if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA) + goto inline_data; f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b01ccaa73b23..af35039c38f8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1302,6 +1302,6 @@ extern const struct inode_operations f2fs_special_inode_operations; inline int f2fs_has_inline_data(struct inode *); bool f2fs_may_inline(struct inode *); int f2fs_read_inline_data(struct inode *, struct page *); -int f2fs_convert_inline_data(struct inode *, struct page *, unsigned); +int f2fs_convert_inline_data(struct inode *, pgoff_t); int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f64a1c8291af..68dd7bfce1a1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -370,17 +370,12 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { - if (f2fs_has_inline_data(inode) && - (attr->ia_size > MAX_INLINE_DATA)) { - unsigned flags = AOP_FLAG_NOFS; - err = f2fs_convert_inline_data(inode, NULL, flags); - if (err) - return err; - } + err = f2fs_convert_inline_data(inode, attr->ia_size); + if (err) + return err; truncate_setsize(inode, attr->ia_size); - if (!f2fs_has_inline_data(inode)) - f2fs_truncate(inode); + f2fs_truncate(inode); f2fs_balance_fs(F2FS_SB(inode->i_sb)); } @@ -462,7 +457,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) loff_t off_start, off_end; int ret = 0; - ret = f2fs_convert_inline_data(inode, NULL, AOP_FLAG_NOFS); + ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1); if (ret) return ret; @@ -512,16 +507,14 @@ static int expand_inode_data(struct inode *inode, loff_t offset, loff_t off_start, off_end; int ret = 0; - if (f2fs_has_inline_data(inode) && (offset + len > MAX_INLINE_DATA)) { - ret = f2fs_convert_inline_data(inode, NULL, AOP_FLAG_NOFS); - if (ret) - return ret; - } - ret = inode_newsize_ok(inode, (len + offset)); if (ret) return ret; + ret = f2fs_convert_inline_data(inode, offset + len); + if (ret) + return ret; + pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 62c72aa84acc..e8891aa3ab8c 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -101,6 +101,7 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) dst_addr = kmap(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); kunmap(page); + SetPageUptodate(page); /* write data page to try to make data consistent */ set_page_writeback(page); @@ -120,25 +121,22 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) return err; } -int f2fs_convert_inline_data(struct inode *inode, - struct page *p, unsigned flags) +int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) { - int err; struct page *page; + int err; - if (!p || p->index) { - page = grab_cache_page_write_begin(inode->i_mapping, 0, flags); - if (IS_ERR(page)) - return PTR_ERR(page); - } else { - page = p; - } + if (!f2fs_has_inline_data(inode)) + return 0; + else if (to_size <= MAX_INLINE_DATA) + return 0; + + page = grab_cache_page_write_begin(inode->i_mapping, 0, AOP_FLAG_NOFS); + if (!page) + return -ENOMEM; err = __f2fs_convert_inline_data(inode, page); - - if (!p || p->index) - f2fs_put_page(page, 1); - + f2fs_put_page(page, 1); return err; } From 0dbdc2ae9bba0a358816cc4a22e41a6ef16db8a2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 26 Nov 2013 11:08:57 +0900 Subject: [PATCH 73/95] f2fs: add the number of inline_data files to status info This patch adds the number of inline_data files into the status information. Note that the number is reset whenever the filesystem is newly mounted. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 3 +++ fs/f2fs/f2fs.h | 22 ++++++++++++++++++++-- fs/f2fs/inline.c | 7 ++----- fs/f2fs/inode.c | 1 + 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 61adbcbe9b87..711a0d4d1cba 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -45,6 +45,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->valid_count = valid_user_blocks(sbi); si->valid_node_count = valid_node_count(sbi); si->valid_inode_count = valid_inode_count(sbi); + si->inline_inode = sbi->inline_inode; si->utilization = utilization(sbi); si->free_segs = free_segments(sbi); @@ -200,6 +201,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "Other: %u)\n - Data: %u\n", si->valid_node_count - si->valid_inode_count, si->valid_count - si->valid_node_count); + seq_printf(s, " - Inline_data Inode: %u\n", + si->inline_inode); seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", si->main_area_segs, si->main_area_sections, si->main_area_zones); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index af35039c38f8..0b6badadbfd8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -460,6 +460,7 @@ struct f2fs_sb_info { unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ + int inline_inode; /* # of inline_data inodes */ int bg_gc; /* background gc calls */ unsigned int n_dirty_dirs; /* # of dir inodes */ #endif @@ -992,6 +993,11 @@ static inline int inline_xattr_size(struct inode *inode) return 0; } +static inline int f2fs_has_inline_data(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); +} + static inline void *inline_data_addr(struct page *page) { struct f2fs_inode *ri; @@ -1201,7 +1207,7 @@ struct f2fs_stat_info { int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; int nats, sits, fnids; int total_count, utilization; - int bg_gc; + int bg_gc, inline_inode; unsigned int valid_count, valid_node_count, valid_inode_count; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; @@ -1230,6 +1236,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) #define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++) #define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++) +#define stat_inc_inline_inode(inode) \ + do { \ + if (f2fs_has_inline_data(inode)) \ + ((F2FS_SB(inode->i_sb))->inline_inode++); \ + } while (0) +#define stat_dec_inline_inode(inode) \ + do { \ + if (f2fs_has_inline_data(inode)) \ + ((F2FS_SB(inode->i_sb))->inline_inode--); \ + } while (0) + #define stat_inc_seg_type(sbi, curseg) \ ((sbi)->segment_count[(curseg)->alloc_type]++) #define stat_inc_block_count(sbi, curseg) \ @@ -1273,6 +1290,8 @@ void f2fs_destroy_root_stats(void); #define stat_dec_dirty_dir(sbi) #define stat_inc_total_hit(sb) #define stat_inc_read_hit(sb) +#define stat_inc_inline_inode(inode) +#define stat_dec_inline_inode(inode) #define stat_inc_seg_type(sbi, curseg) #define stat_inc_block_count(sbi, curseg) #define stat_inc_seg_count(si, type) @@ -1299,7 +1318,6 @@ extern const struct inode_operations f2fs_special_inode_operations; /* * inline.c */ -inline int f2fs_has_inline_data(struct inode *); bool f2fs_may_inline(struct inode *); int f2fs_read_inline_data(struct inode *, struct page *); int f2fs_convert_inline_data(struct inode *, pgoff_t); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e8891aa3ab8c..3c9261cd215f 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -13,11 +13,6 @@ #include "f2fs.h" -inline int f2fs_has_inline_data(struct inode *inode) -{ - return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); -} - bool f2fs_may_inline(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); @@ -113,6 +108,7 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + stat_dec_inline_inode(inode); sync_inode_page(&dn); f2fs_put_page(ipage, 1); @@ -165,6 +161,7 @@ int f2fs_write_inline_data(struct inode *inode, if (!f2fs_has_inline_data(inode)) { truncate_data_blocks_range(&dn, 1); set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + stat_inc_inline_inode(inode); } sync_inode_page(&dn); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index a91f45177cd9..915f9a8f3ee6 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -277,6 +277,7 @@ void f2fs_evict_inode(struct inode *inode) f2fs_lock_op(sbi); remove_inode_page(inode); + stat_dec_inline_inode(inode); f2fs_unlock_op(sbi); sb_end_intwrite(inode->i_sb); From 1e1bb4baf10be371f72150e2801d97a04d40b3b9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 26 Dec 2013 12:49:48 +0900 Subject: [PATCH 74/95] f2fs: add inline_data recovery routine This patch adds a inline_data recovery routine with the following policy. [prev.] [next] of inline_data flag o o -> recover inline_data o x -> remove inline_data, and then recover data blocks x o -> remove inline_data, and then recover inline_data x x -> recover data blocks Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 2 +- fs/f2fs/inline.c | 48 +++++++++++++++++++++++++++++++++++++++++ fs/f2fs/recovery.c | 8 +++++-- include/linux/f2fs_fs.h | 3 +-- 5 files changed, 58 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0b6badadbfd8..07a7ae0d4413 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1015,6 +1015,7 @@ static inline int f2fs_readonly(struct super_block *sb) */ int f2fs_sync_file(struct file *, loff_t, loff_t, int); void truncate_data_blocks(struct dnode_of_data *); +int truncate_blocks(struct inode *, u64); void f2fs_truncate(struct inode *); int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); int f2fs_setattr(struct dentry *, struct iattr *); @@ -1322,4 +1323,5 @@ bool f2fs_may_inline(struct inode *); int f2fs_read_inline_data(struct inode *, struct page *); int f2fs_convert_inline_data(struct inode *, pgoff_t); int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); +int recover_inline_data(struct inode *, struct page *); #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 68dd7bfce1a1..c77ad4d8b564 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -251,7 +251,7 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) f2fs_put_page(page, 1); } -static int truncate_blocks(struct inode *inode, u64 from) +int truncate_blocks(struct inode *inode, u64 from) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); unsigned int blocksize = inode->i_sb->s_blocksize; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 3c9261cd215f..2a756e57aed9 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -169,3 +169,51 @@ int f2fs_write_inline_data(struct inode *inode, return 0; } + +int recover_inline_data(struct inode *inode, struct page *npage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_inode *ri = NULL; + void *src_addr, *dst_addr; + struct page *ipage; + + /* + * The inline_data recovery policy is as follows. + * [prev.] [next] of inline_data flag + * o o -> recover inline_data + * o x -> remove inline_data, and then recover data blocks + * x o -> remove inline_data, and then recover inline_data + * x x -> recover data blocks + */ + if (IS_INODE(npage)) + ri = F2FS_INODE(npage); + + if (f2fs_has_inline_data(inode) && + ri && ri->i_inline & F2FS_INLINE_DATA) { +process_inline: + ipage = get_node_page(sbi, inode->i_ino); + f2fs_bug_on(IS_ERR(ipage)); + + src_addr = inline_data_addr(npage); + dst_addr = inline_data_addr(ipage); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + update_inode(inode, ipage); + f2fs_put_page(ipage, 1); + return -1; + } + + if (f2fs_has_inline_data(inode)) { + ipage = get_node_page(sbi, inode->i_ino); + f2fs_bug_on(IS_ERR(ipage)); + zero_user_segment(ipage, INLINE_DATA_OFFSET, + INLINE_DATA_OFFSET + MAX_INLINE_DATA); + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + update_inode(inode, ipage); + f2fs_put_page(ipage, 1); + } else if (ri && ri->i_inline & F2FS_INLINE_DATA) { + truncate_blocks(inode, 0); + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + goto process_inline; + } + return 0; +} diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 96e690b6f0fa..655791e518cf 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -298,6 +298,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct node_info ni; int err = 0, recovered = 0; + if (recover_inline_data(inode, page)) + goto out; + start = start_bidx_of_node(ofs_of_node(page), fi); if (IS_INODE(page)) end = start + ADDRS_PER_INODE(fi); @@ -305,12 +308,13 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, end = start + ADDRS_PER_BLOCK; f2fs_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, start, ALLOC_NODE); if (err) { f2fs_unlock_op(sbi); - return err; + goto out; } wait_on_page_writeback(dn.node_page); @@ -361,7 +365,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, err: f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); - +out: f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " "recovered_data = %d blocks, err = %d", inode->i_ino, recovered, err); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index aea5eedbb1ca..da74d878dc4f 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -155,12 +155,11 @@ struct f2fs_extent { #define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ - #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) #define INLINE_DATA_OFFSET (PAGE_CACHE_SIZE - sizeof(struct node_footer) \ - - sizeof(__le32)*(DEF_ADDRS_PER_INODE + 5 - 1)) + - sizeof(__le32) * (DEF_ADDRS_PER_INODE + 5 - 1)) struct f2fs_inode { __le16 i_mode; /* file mode */ From a8865372a8414298982e07f4ac8d6dc0ab1e0a3d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 27 Dec 2013 17:04:17 +0900 Subject: [PATCH 75/95] f2fs: handle errors correctly during f2fs_reserve_block The get_dnode_of_data nullifies inode and node page when error is occurred. There are two cases that passes inode page into get_dnode_of_data(). 1. make_empty_dir() -> get_new_data_page() -> f2fs_reserve_block(ipage) -> get_dnode_of_data() 2. f2fs_convert_inline_data() -> __f2fs_convert_inline_data() -> f2fs_reserve_block(ipage) -> get_dnode_of_data() This patch adds correct error handling codes when get_dnode_of_data() returns an error. At first, f2fs_reserve_block() calls f2fs_put_dnode() whenever reserve_new_block returns an error. So, the rule of f2fs_reserve_block() is to nullify inode page when there is any error internally. Finally, two callers of f2fs_reserve_block() should call f2fs_put_dnode() appropriately if they got an error since successful f2fs_reserve_block(). Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 29 ++++++++++++++++++++--------- fs/f2fs/dir.c | 7 ++++--- fs/f2fs/inline.c | 6 ++---- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index fc7a28c5ad23..63d190264a36 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -258,13 +258,16 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) bool need_put = dn->inode_page ? false : true; int err; + /* if inode_page exists, index should be zero */ + f2fs_bug_on(!need_put && index); + err = get_dnode_of_data(dn, index, ALLOC_NODE); if (err) return err; + if (dn->data_blkaddr == NULL_ADDR) err = reserve_new_block(dn); - - if (need_put) + if (err || need_put) f2fs_put_dnode(dn); return err; } @@ -510,10 +513,10 @@ repeat: * * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). - * Note that, npage is set only by make_empty_dir. + * Note that, ipage is set only by make_empty_dir. */ struct page *get_new_data_page(struct inode *inode, - struct page *npage, pgoff_t index, bool new_i_size) + struct page *ipage, pgoff_t index, bool new_i_size) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct address_space *mapping = inode->i_mapping; @@ -521,14 +524,16 @@ struct page *get_new_data_page(struct inode *inode, struct dnode_of_data dn; int err; - set_new_dnode(&dn, inode, npage, npage, 0); + set_new_dnode(&dn, inode, ipage, NULL, 0); err = f2fs_reserve_block(&dn, index); if (err) return ERR_PTR(err); repeat: page = grab_cache_page(mapping, index); - if (!page) - return ERR_PTR(-ENOMEM); + if (!page) { + err = -ENOMEM; + goto put_err; + } if (PageUptodate(page)) return page; @@ -540,11 +545,13 @@ repeat: err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC); if (err) - return ERR_PTR(err); + goto put_err; + lock_page(page); if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); - return ERR_PTR(-EIO); + err = -EIO; + goto put_err; } if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); @@ -560,6 +567,10 @@ repeat: mark_inode_dirty_sync(inode); } return page; + +put_err: + f2fs_put_dnode(&dn); + return ERR_PTR(err); } static int __allocate_data_block(struct dnode_of_data *dn) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 6da77e5c753b..f815ca0c5819 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -342,11 +342,11 @@ static struct page *init_inode_metadata(struct inode *inode, err = f2fs_init_acl(inode, dir, page); if (err) - goto error; + goto put_error; err = f2fs_init_security(inode, dir, name, page); if (err) - goto error; + goto put_error; wait_on_page_writeback(page); } else { @@ -370,8 +370,9 @@ static struct page *init_inode_metadata(struct inode *inode, } return page; -error: +put_error: f2fs_put_page(page, 1); +error: remove_inode_page(inode); return ERR_PTR(err); } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 2a756e57aed9..688305afbc74 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -81,10 +81,9 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) * i_addr[0] is not used for inline data, * so reserving new block will not destroy inline data */ - set_new_dnode(&dn, inode, ipage, ipage, 0); + set_new_dnode(&dn, inode, ipage, NULL, 0); err = f2fs_reserve_block(&dn, 0); if (err) { - f2fs_put_page(ipage, 1); f2fs_unlock_op(sbi); return err; } @@ -111,9 +110,8 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) stat_dec_inline_inode(inode); sync_inode_page(&dn); - f2fs_put_page(ipage, 1); + f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); - return err; } From a225dca39495f7c18f1210306281c9c41a3454f6 Mon Sep 17 00:00:00 2001 From: "shifei10.ge" Date: Tue, 29 Oct 2013 15:32:34 +0800 Subject: [PATCH 76/95] f2fs: fix truncate_partial_nodes bug The truncate_partial_nodes puts pages incorrectly in the following two cases. Note that the value for argc 'depth' can only be 2 or 3. Please see truncate_inode_blocks() and truncate_partial_nodes(). 1) An err is occurred in the first 'for' loop When err is occurred with depth = 2, pages[0] is invalid, so this page doesn't need to be put. There is no problem, however, when depth is 3, it doesn't put the pages correctly where pages[0] is valid and pages[1] is invalid. In this case, depth is set to 2 (ref to statemnt depth = i + 1), and then 'goto fail'. In label 'fail', for (i = depth - 3; i >= 0; i--) cannot meet the condition because i = -1, so pages[0] cann't be put. 2) An err happened in the second 'for' loop Now we've got pages[0] with depth = 2, or we've got pages[0] and pages[1] with depth = 3. When an err is detected, we need 'goto fail' to put such the pages. When depth is 2, in label 'fail', for (i = depth - 3; i >= 0; i--) cann't meet the condition because i = -1, so pages[0] cann't be put. When depth is 3, in label 'fail', for (i = depth - 3; i >= 0; i--) can only put pages[0], pages[1] also cann't be put. Note that 'depth' has been changed before first 'goto fail' (ref to statemnt depth = i + 1), so passing this modified 'depth' to the tracepoint, trace_f2fs_truncate_partial_nodes, is also incorrect. Signed-off-by: Shifei Ge [Jaegeuk Kim: modify the description and fix one bug] Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9405a17a671e..0230326be495 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -633,19 +633,19 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, return 0; /* get indirect nodes in the path */ - for (i = 0; i < depth - 1; i++) { + for (i = 0; i < idx + 1; i++) { /* refernece count'll be increased */ pages[i] = get_node_page(sbi, nid[i]); if (IS_ERR(pages[i])) { - depth = i + 1; err = PTR_ERR(pages[i]); + idx = i - 1; goto fail; } nid[i + 1] = get_nid(pages[i], offset[i + 1], false); } /* free direct nodes linked to a partial indirect node */ - for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) { + for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { child_nid = get_nid(pages[idx], i, false); if (!child_nid) continue; @@ -656,7 +656,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, set_nid(pages[idx], i, 0, false); } - if (offset[depth - 1] == 0) { + if (offset[idx + 1] == 0) { dn->node_page = pages[idx]; dn->nid = nid[idx]; truncate_node(dn); @@ -664,9 +664,10 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, f2fs_put_page(pages[idx], 1); } offset[idx]++; - offset[depth - 1] = 0; + offset[idx + 1] = 0; + idx--; fail: - for (i = depth - 3; i >= 0; i--) + for (i = idx; i >= 0; i--) f2fs_put_page(pages[i], 1); trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); From 18309aaa41909cfddb93e932b16a7d14ec425c9b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 30 Dec 2013 09:29:06 +0800 Subject: [PATCH 77/95] f2fs: avoid to left uninitialized data in page when read inline data Change log from v1: o reduce unneeded memset in __f2fs_convert_inline_data >From 58796be2bd2becbe8d52305210fb2a64e7dd80b6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 30 Dec 2013 09:21:33 +0800 Subject: [PATCH] f2fs: avoid to left uninitialized data in page when read inline data We left uninitialized data in the tail of page when we read an inline data page. So let's initialize left part of the page excluding inline data region. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 688305afbc74..89f0c18cd73c 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -43,8 +43,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) if (IS_ERR(ipage)) return PTR_ERR(ipage); - zero_user_segment(page, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); + zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); /* Copy the whole inline data block */ src_addr = inline_data_addr(ipage); @@ -88,7 +87,7 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) return err; } - zero_user_segment(page, 0, PAGE_CACHE_SIZE); + zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); /* Copy the whole inline data block */ src_addr = inline_data_addr(ipage); From 04a17fb17fafada39f96bfb41ceb2dc1c11b2af6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 30 Dec 2013 18:36:23 +0800 Subject: [PATCH 78/95] f2fs: avoid to read inline data except first page Here is a case which could read inline page data not from first page. 1. write inline data 2. lseek to offset 4096 3. read 4096 bytes from offset 4096 (read_inline_data read inline data page to non-first page, And previously VFS has add this page to page cache) 4. ftruncate offset 8192 5. read 4096 bytes from offset 4096 (we meet this updated page with inline data in cache) So we should leave this page with inited data and uptodate flag for this case. Change log from v1: o fix a deadlock bug Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 89f0c18cd73c..e0d800a1d79f 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -39,6 +39,11 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) struct page *ipage; void *src_addr, *dst_addr; + if (page->index) { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + goto out; + } + ipage = get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) return PTR_ERR(ipage); @@ -52,6 +57,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) kunmap(page); f2fs_put_page(ipage, 1); +out: SetPageUptodate(page); unlock_page(page); From fb5566da9181d33ecdd9892e44f90320e7d4cc9f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 8 Jan 2014 10:09:51 +0900 Subject: [PATCH 79/95] f2fs: improve write performance under frequent fsync calls When considering a bunch of data writes with very frequent fsync calls, we are able to think the following performance regression. N: Node IO, D: Data IO, IO scheduler: cfq Issue pending IOs D1 D2 D3 D4 D1 D2 D3 D4 N1 D2 D3 D4 N1 N2 N1 D3 D4 N2 D1 --> N1 can be selected by cfq becase of the same priority of N and D. Then D3 and D4 would be delayed, resuling in performance degradation. So, when processing the fsync call, it'd better give higher priority to data IOs than node IOs by assigning WRITE and WRITE_SYNC respectively. This patch improves the random wirte performance with frequent fsync calls by up to 10%. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/file.c | 2 +- fs/f2fs/node.c | 7 ++++++- fs/f2fs/segment.c | 8 ++------ 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 07a7ae0d4413..b69f190fb195 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1118,8 +1118,8 @@ int npages_for_summary_flush(struct f2fs_sb_info *); void allocate_new_segments(struct f2fs_sb_info *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); void write_meta_page(struct f2fs_sb_info *, struct page *); -void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, - block_t, block_t *); +void write_node_page(struct f2fs_sb_info *, struct page *, + struct f2fs_io_info *, unsigned int, block_t, block_t *); void write_data_page(struct page *, struct dnode_of_data *, block_t *, struct f2fs_io_info *); void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c77ad4d8b564..14511b00fffa 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -115,7 +115,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ret = 0; bool need_cp = false; struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, + .sync_mode = WB_SYNC_NONE, .nr_to_write = LONG_MAX, .for_reclaim = 0, }; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0230326be495..b8c9301db52c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1194,6 +1194,10 @@ static int f2fs_write_node_page(struct page *page, nid_t nid; block_t new_addr; struct node_info ni; + struct f2fs_io_info fio = { + .type = NODE, + .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC: WRITE, + }; if (unlikely(sbi->por_doing)) goto redirty_out; @@ -1218,7 +1222,7 @@ static int f2fs_write_node_page(struct page *page, mutex_lock(&sbi->node_write); set_page_writeback(page); - write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); + write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); set_node_addr(sbi, &ni, new_addr); dec_page_count(sbi, F2FS_DIRTY_NODES); mutex_unlock(&sbi->node_write); @@ -1253,6 +1257,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, /* if mounting is failed, skip writing node pages */ wbc->nr_to_write = 3 * max_hw_blocks(sbi); + wbc->sync_mode = WB_SYNC_NONE; sync_node_pages(sbi, 0, wbc); wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) - wbc->nr_to_write); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 555ae7693ea0..5f84639354e3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -924,16 +924,12 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) } void write_node_page(struct f2fs_sb_info *sbi, struct page *page, + struct f2fs_io_info *fio, unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) { struct f2fs_summary sum; - struct f2fs_io_info fio = { - .type = NODE, - .rw = WRITE_SYNC, - }; - set_summary(&sum, nid, 0, 0); - do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, &fio); + do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio); } void write_data_page(struct page *page, struct dnode_of_data *dn, From b1c57c1caa753cec299e62bb4272da0e85a01ef0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 8 Jan 2014 13:45:08 +0900 Subject: [PATCH 80/95] f2fs: add a sysfs entry to control max_victim_search Previously during SSR and GC, the maximum number of retrials to find a victim segment was hard-coded by MAX_VICTIM_SEARCH, 4096 by default. This number makes an effect on IO locality, when SSR mode is activated, which results in performance fluctuation on some low-end devices. If max_victim_search = 4, the victim will be searched like below. ("D" represents a dirty segment, and "*" indicates a selected victim segment.) D1 D2 D3 D4 D5 D6 D7 D8 D9 [ * ] [ * ] [ * ] [ ....] This patch adds a sysfs entry to control the number dynamically through: /sys/fs/f2fs/$dev/max_victim_search Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 +++ fs/f2fs/gc.c | 4 ++-- fs/f2fs/gc.h | 2 +- fs/f2fs/super.c | 6 ++++++ 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b69f190fb195..5f7dc4f6eb09 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -451,6 +451,9 @@ struct f2fs_sb_info { struct f2fs_gc_kthread *gc_thread; /* GC thread */ unsigned int cur_victim_sec; /* current victim section num */ + /* maximum # of trials to find a victim segment for SSR and GC */ + unsigned int max_victim_search; + /* * for stat information. * one is for the LFS mode, and the other is for the SSR mode. diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 599f546d042c..9117ccaf254a 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -163,8 +163,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->ofs_unit = sbi->segs_per_sec; } - if (p->max_search > MAX_VICTIM_SEARCH) - p->max_search = MAX_VICTIM_SEARCH; + if (p->max_search > sbi->max_victim_search) + p->max_search = sbi->max_victim_search; p->offset = sbi->last_victim[p->gc_mode]; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 507056d22205..5d5eb6047bf4 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -20,7 +20,7 @@ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ /* Search max. number of dirty segments to select a victim segment */ -#define MAX_VICTIM_SEARCH 4096 /* covers 8GB */ +#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ struct f2fs_gc_kthread { struct task_struct *f2fs_gc_task; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f16da92a7899..b070f3010ce9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -74,6 +74,7 @@ static match_table_t f2fs_tokens = { enum { GC_THREAD, /* struct f2fs_gc_thread */ SM_INFO, /* struct f2fs_sm_info */ + F2FS_SBI, /* struct f2fs_sb_info */ }; struct f2fs_attr { @@ -91,6 +92,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) return (unsigned char *)sbi->gc_thread; else if (struct_type == SM_INFO) return (unsigned char *)SM_I(sbi); + else if (struct_type == F2FS_SBI) + return (unsigned char *)sbi; return NULL; } @@ -180,6 +183,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -191,6 +195,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(max_small_discards), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), + ATTR_LIST(max_victim_search), NULL, }; @@ -775,6 +780,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); sbi->cur_victim_sec = NULL_SECNO; + sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; for (i = 0; i < NR_COUNT_TYPE; i++) atomic_set(&sbi->nr_pages[i], 0); From 3bac380c901206bccba1f5b108c1c8382ab21a16 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Jan 2014 21:00:06 +0900 Subject: [PATCH 81/95] f2fs: update documents and a MAINTAINERS entry This patch adds missing some description of sysfs entries in - Documentation/ABI/testing/sysfs-fs-f2fs - Documentation/filesystems/f2fs.txt. And it adds a maintained document entry of F2FS in MAINTAINERS. Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 31 +++++++++++++++++++++++++ Documentation/filesystems/f2fs.txt | 5 ++++ MAINTAINERS | 1 + 3 files changed, 37 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 31942efcaf0e..32b0809203dd 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -24,3 +24,34 @@ Date: July 2013 Contact: "Namjae Jeon" Description: Controls the victim selection policy for garbage collection. + +What: /sys/fs/f2fs//reclaim_segments +Date: October 2013 +Contact: "Jaegeuk Kim" +Description: + Controls the issue rate of segment discard commands. + +What: /sys/fs/f2fs//ipu_policy +Date: November 2013 +Contact: "Jaegeuk Kim" +Description: + Controls the in-place-update policy. + +What: /sys/fs/f2fs//min_ipu_util +Date: November 2013 +Contact: "Jaegeuk Kim" +Description: + Controls the FS utilization condition for the in-place-update + policies. + +What: /sys/fs/f2fs//max_small_discards +Date: November 2013 +Contact: "Jaegeuk Kim" +Description: + Controls the issue rate of small discard commands. + +What: /sys/fs/f2fs//max_victim_search +Date: January 2014 +Contact: "Jaegeuk Kim" +Description: + Controls the number of trials to find a victim segment. diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 998e698cf455..b8d284975f0f 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -190,6 +190,11 @@ Files in /sys/fs/f2fs/ of the filesystem utilization, and used by F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies. + max_victim_search This parameter controls the number of trials to + find a victim segment when conducting SSR and + cleaning operations. The default value is 4096 + which covers 8GB block address range. + ================================================================================ USAGE ================================================================================ diff --git a/MAINTAINERS b/MAINTAINERS index d5e4ff328cc7..bd58e8e81649 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3629,6 +3629,7 @@ W: http://en.wikipedia.org/wiki/F2FS T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git S: Maintained F: Documentation/filesystems/f2fs.txt +F: Documentation/ABI/testing/sysfs-fs-f2fs F: fs/f2fs/ F: include/linux/f2fs_fs.h From 5514f0aadddcdfaaaea697b60203f5402552eb7b Mon Sep 17 00:00:00 2001 From: Yuan Zhong Date: Fri, 10 Jan 2014 07:26:14 +0000 Subject: [PATCH 82/95] f2fs: remove the needless parameter of f2fs_wait_on_page_writeback "boo sync" parameter is never referenced in f2fs_wait_on_page_writeback. We should remove this parameter. Signed-off-by: Yuan Zhong Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 4 ++-- fs/f2fs/inline.c | 2 +- fs/f2fs/inode.c | 2 +- fs/f2fs/segment.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 63d190264a36..19ad06694478 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -226,7 +226,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) struct page *node_page = dn->node_page; unsigned int ofs_in_node = dn->ofs_in_node; - f2fs_wait_on_page_writeback(node_page, NODE, false); + f2fs_wait_on_page_writeback(node_page, NODE); rn = F2FS_NODE(node_page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5f7dc4f6eb09..8466b5ea76f7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1132,7 +1132,7 @@ void rewrite_node_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, block_t, block_t); void allocate_data_block(struct f2fs_sb_info *, struct page *, block_t, block_t *, struct f2fs_summary *, int); -void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); +void f2fs_wait_on_page_writeback(struct page *, enum page_type); void write_data_summaries(struct f2fs_sb_info *, block_t); void write_node_summaries(struct f2fs_sb_info *, block_t); int lookup_journal_in_cursum(struct f2fs_summary_block *, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9117ccaf254a..ea0371e854b4 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -428,7 +428,7 @@ next_step: /* set page dirty and write it */ if (gc_type == FG_GC) { - f2fs_wait_on_page_writeback(node_page, NODE, true); + f2fs_wait_on_page_writeback(node_page, NODE); set_page_dirty(node_page); } else { if (!PageWriteback(node_page)) @@ -533,7 +533,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) } else { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA); if (clear_page_dirty_for_io(page) && S_ISDIR(inode->i_mode)) { diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e0d800a1d79f..31ee5b164ff9 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -106,7 +106,7 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) set_page_writeback(page); write_data_page(page, &dn, &new_blk_addr, &fio); update_extent_cache(new_blk_addr, &dn); - f2fs_wait_on_page_writeback(page, DATA, true); + f2fs_wait_on_page_writeback(page, DATA); /* clear inline data and flag after data writeback */ zero_user_segment(ipage, INLINE_DATA_OFFSET, diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 915f9a8f3ee6..ffa4c6d0fab4 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -175,7 +175,7 @@ void update_inode(struct inode *inode, struct page *node_page) { struct f2fs_inode *ri; - f2fs_wait_on_page_writeback(node_page, NODE, false); + f2fs_wait_on_page_writeback(node_page, NODE); ri = F2FS_INODE(node_page); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5f84639354e3..a934e6f2738b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1053,7 +1053,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, } void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool sync) + enum page_type type) { struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); if (PageWriteback(page)) { From 4531929e3922f2cdd34208d7dc1404ac06e6ced5 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 10 Jan 2014 18:09:02 +0800 Subject: [PATCH 83/95] f2fs: move grabing orphan pages out of protection region Move grabing orphan block page out of protection region, and grab all the orphan block pages ahead. Signed-off-by: Gu Zheng Reviewed-by: Chao Yu [Jaegeuk Kim: remove unnecessary code pointed by Chao Yu] Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0d78bbe79f98..fdc5a12c0edd 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -303,22 +303,25 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) { struct list_head *head; struct f2fs_orphan_block *orphan_blk = NULL; - struct page *page = NULL; unsigned int nentries = 0; - unsigned short index = 1; - unsigned short orphan_blocks; + unsigned short index; + unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + + (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); + struct page *page = NULL; + struct page *pages[orphan_blocks]; struct orphan_inode_entry *orphan = NULL; - orphan_blocks = (unsigned short)((sbi->n_orphans + - (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); + for (index = 0; index < orphan_blocks; index++) + pages[index] = grab_meta_page(sbi, start_blk + index); + index = 1; mutex_lock(&sbi->orphan_inode_mutex); head = &sbi->orphan_inode_list; /* loop for each orphan inode entry and write them in Jornal block */ list_for_each_entry(orphan, head, list) { if (!page) { - page = grab_meta_page(sbi, start_blk); + page = pages[index - 1]; orphan_blk = (struct f2fs_orphan_block *)page_address(page); memset(orphan_blk, 0, sizeof(*orphan_blk)); @@ -338,7 +341,6 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) set_page_dirty(page); f2fs_put_page(page, 1); index++; - start_blk++; nentries = 0; page = NULL; } From c1ef37257229dc8903615eaf1d1abaa5da3f0686 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 10 Jan 2014 18:09:08 +0800 Subject: [PATCH 84/95] f2fs: move alloc new orphan node out of lock protection region Move alloc new orphan node out of lock protection region. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index fdc5a12c0edd..681782c715fb 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -219,26 +219,29 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) struct list_head *head, *this; struct orphan_inode_entry *new = NULL, *orphan = NULL; + new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); + new->ino = ino; + mutex_lock(&sbi->orphan_inode_mutex); head = &sbi->orphan_inode_list; list_for_each(this, head) { orphan = list_entry(this, struct orphan_inode_entry, list); - if (orphan->ino == ino) - goto out; + if (orphan->ino == ino) { + mutex_unlock(&sbi->orphan_inode_mutex); + kmem_cache_free(orphan_entry_slab, new); + return; + } + if (orphan->ino > ino) break; orphan = NULL; } - new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); - new->ino = ino; - /* add new_oentry into list which is sorted by inode number */ if (orphan) list_add(&new->list, this->prev); else list_add_tail(&new->list, head); -out: mutex_unlock(&sbi->orphan_inode_mutex); } From 17b692f60e93f5d417fcdbfd681b0f20f9f31ec8 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 10 Jan 2014 18:09:14 +0800 Subject: [PATCH 85/95] f2fs: use spinlock rather than mutex for better speed With the 2 previous changes, all the long time operations are moved out of the protection region, so here we can use spinlock rather than mutex (orphan_inode_mutex) for lower overhead. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 24 ++++++++++++------------ fs/f2fs/f2fs.h | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 681782c715fb..4de0345f73f3 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -196,22 +196,22 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) { int err = 0; - mutex_lock(&sbi->orphan_inode_mutex); + spin_lock(&sbi->orphan_inode_lock); if (unlikely(sbi->n_orphans >= sbi->max_orphans)) err = -ENOSPC; else sbi->n_orphans++; - mutex_unlock(&sbi->orphan_inode_mutex); + spin_unlock(&sbi->orphan_inode_lock); return err; } void release_orphan_inode(struct f2fs_sb_info *sbi) { - mutex_lock(&sbi->orphan_inode_mutex); + spin_lock(&sbi->orphan_inode_lock); f2fs_bug_on(sbi->n_orphans == 0); sbi->n_orphans--; - mutex_unlock(&sbi->orphan_inode_mutex); + spin_unlock(&sbi->orphan_inode_lock); } void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) @@ -222,12 +222,12 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); new->ino = ino; - mutex_lock(&sbi->orphan_inode_mutex); + spin_lock(&sbi->orphan_inode_lock); head = &sbi->orphan_inode_list; list_for_each(this, head) { orphan = list_entry(this, struct orphan_inode_entry, list); if (orphan->ino == ino) { - mutex_unlock(&sbi->orphan_inode_mutex); + spin_unlock(&sbi->orphan_inode_lock); kmem_cache_free(orphan_entry_slab, new); return; } @@ -242,7 +242,7 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) list_add(&new->list, this->prev); else list_add_tail(&new->list, head); - mutex_unlock(&sbi->orphan_inode_mutex); + spin_unlock(&sbi->orphan_inode_lock); } void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) @@ -250,7 +250,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) struct list_head *head; struct orphan_inode_entry *orphan; - mutex_lock(&sbi->orphan_inode_mutex); + spin_lock(&sbi->orphan_inode_lock); head = &sbi->orphan_inode_list; list_for_each_entry(orphan, head, list) { if (orphan->ino == ino) { @@ -261,7 +261,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) break; } } - mutex_unlock(&sbi->orphan_inode_mutex); + spin_unlock(&sbi->orphan_inode_lock); } static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) @@ -318,7 +318,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) pages[index] = grab_meta_page(sbi, start_blk + index); index = 1; - mutex_lock(&sbi->orphan_inode_mutex); + spin_lock(&sbi->orphan_inode_lock); head = &sbi->orphan_inode_list; /* loop for each orphan inode entry and write them in Jornal block */ @@ -357,7 +357,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) f2fs_put_page(page, 1); } - mutex_unlock(&sbi->orphan_inode_mutex); + spin_unlock(&sbi->orphan_inode_lock); } static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, @@ -828,7 +828,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) void init_orphan_info(struct f2fs_sb_info *sbi) { - mutex_init(&sbi->orphan_inode_mutex); + spin_lock_init(&sbi->orphan_inode_lock); INIT_LIST_HEAD(&sbi->orphan_inode_list); sbi->n_orphans = 0; /* diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8466b5ea76f7..ee304fb71e1f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -412,7 +412,7 @@ struct f2fs_sb_info { /* for orphan inode management */ struct list_head orphan_inode_list; /* orphan inode list */ - struct mutex orphan_inode_mutex; /* for orphan inode list */ + spinlock_t orphan_inode_lock; /* for orphan inode list */ unsigned int n_orphans; /* # of orphan inodes */ unsigned int max_orphans; /* max orphan inodes */ From 499046ab2c256c4b239cf389ff48e0c7478f3061 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Mon, 13 Jan 2014 10:34:18 +0900 Subject: [PATCH 86/95] f2fs: add delimiter to seperate name and value in debug phrase Support for f2fs-tools/tools/f2stat to monitor /sys/kernel/debug/f2fs/status Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 711a0d4d1cba..6e4ac9a25dfd 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -246,13 +246,13 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", si->hit_ext, si->total_ext); seq_printf(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - nodes %4d in %4d\n", + seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); - seq_printf(s, " - dents %4d in dirs:%4d\n", + seq_printf(s, " - dents: %4d in dirs:%4d\n", si->ndirty_dent, si->ndirty_dirs); - seq_printf(s, " - meta %4d in %4d\n", + seq_printf(s, " - meta: %4d in %4d\n", si->ndirty_meta, si->meta_pages); - seq_printf(s, " - NATs %5d > %lu\n", + seq_printf(s, " - NATs: %5d > %lu\n", si->nats, NM_WOUT_THRESHOLD); seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", si->sits, si->fnids); From c33ec32692e1f2f4650f7bf5bb1108bb346b82a4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 16 Jan 2014 16:20:40 +0900 Subject: [PATCH 87/95] f2fs: avoid f2fs_balance_fs call during pageout This patch should resolve the following bug. ========================================================= [ INFO: possible irq lock inversion dependency detected ] 3.13.0-rc5.f2fs+ #6 Not tainted --------------------------------------------------------- kswapd0/41 just changed the state of lock: (&sbi->gc_mutex){+.+.-.}, at: [] f2fs_balance_fs+0xae/0xd0 [f2fs] but this lock took another, RECLAIM_FS-READ-unsafe lock in the past: (&sbi->cp_rwsem){++++.?} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Chain exists of: &sbi->gc_mutex --> &sbi->cp_mutex --> &sbi->cp_rwsem Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&sbi->cp_rwsem); local_irq_disable(); lock(&sbi->gc_mutex); lock(&sbi->cp_mutex); lock(&sbi->gc_mutex); *** DEADLOCK *** This bug is due to the f2fs_balance_fs call in f2fs_write_data_page. If f2fs_write_data_page is triggered by wbc->for_reclaim via kswapd, it should not call f2fs_balance_fs which tries to get a mutex grabbed by original syscall flow. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 19ad06694478..e57bde02e37f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -842,8 +842,10 @@ write: else if (err) goto redirty_out; - if (wbc->for_reclaim) + if (wbc->for_reclaim) { f2fs_submit_merged_bio(sbi, DATA, WRITE); + need_balance_fs = false; + } clear_cold_data(page); out: From c434cbc0edda6a7a59d22b9d5d4279989d0ab804 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Thu, 16 Jan 2014 11:58:54 +0900 Subject: [PATCH 88/95] f2fs: missing REQ_META and REQ_PRIO when sync_meta_pages(META_FLUSH) Doing sync_meta_pages with META_FLUSH when checkpoint, we overide rw using WRITE_FLUSH_FUA. At this time, we also should set REQ_META|REQ_PRIO. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e57bde02e37f..bda889e2ca63 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -144,7 +144,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, /* change META to META_FLUSH in the checkpoint procedure */ if (type >= META_FLUSH) { io->fio.type = META_FLUSH; - io->fio.rw = WRITE_FLUSH_FUA; + io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; } __submit_merged_bio(io); mutex_unlock(&io->io_mutex); From 6c311ec6c2d9e015d454b4e3fda8008b5bebf316 Mon Sep 17 00:00:00 2001 From: Chris Fries Date: Fri, 17 Jan 2014 14:44:39 -0600 Subject: [PATCH 89/95] f2fs: clean checkpatch warnings Fixed a variety of trivial checkpatch warnings. The only delta should be some minor formatting on log strings that were split / too long. Signed-off-by: Chris Fries Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/debug.c | 2 +- fs/f2fs/dir.c | 5 +++-- fs/f2fs/f2fs.h | 6 +++--- fs/f2fs/file.c | 2 +- fs/f2fs/inode.c | 12 ++++++++---- fs/f2fs/node.c | 2 +- fs/f2fs/recovery.c | 12 ++++++------ fs/f2fs/segment.c | 6 ++++-- fs/f2fs/segment.h | 13 +++++++------ fs/f2fs/super.c | 8 +++++--- 11 files changed, 40 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index bda889e2ca63..f5fac16bc6e7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -792,7 +792,7 @@ static int f2fs_write_data_page(struct page *page, int err = 0; struct f2fs_io_info fio = { .type = DATA, - .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC: WRITE, + .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, }; if (page->index < end_index) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 6e4ac9a25dfd..63cb7e215e00 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -245,7 +245,7 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - node blocks : %d\n", si->node_blks); seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", si->hit_ext, si->total_ext); - seq_printf(s, "\nBalancing F2FS Async:\n"); + seq_puts(s, "\nBalancing F2FS Async:\n"); seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d\n", diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index f815ca0c5819..cd055b6fcffe 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -430,7 +430,8 @@ next: * Caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). */ -int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode) +int __f2fs_add_link(struct inode *dir, const struct qstr *name, + struct inode *inode) { unsigned int bit_pos; unsigned int level; @@ -631,7 +632,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); - for ( ; n < npages; n++) { + for (; n < npages; n++) { dentry_page = get_lock_data_page(inode, n); if (IS_ERR(dentry_page)) continue; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ee304fb71e1f..5ab3981938d4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -607,9 +607,9 @@ static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) static inline int F2FS_HAS_BLOCKS(struct inode *inode) { if (F2FS_I(inode)->i_xattr_nid) - return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1); + return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1; else - return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS); + return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS; } static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, @@ -1231,7 +1231,7 @@ struct f2fs_stat_info { static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) { - return (struct f2fs_stat_info*)sbi->stat_info; + return (struct f2fs_stat_info *)sbi->stat_info; } #define stat_inc_call_count(si) ((si)->call_count++) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 14511b00fffa..85e91ca88d57 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -202,7 +202,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) raw_node = F2FS_NODE(dn->node_page); addr = blkaddr_in_node(raw_node) + ofs; - for ( ; count > 0; count--, addr++, dn->ofs_in_node++) { + for (; count > 0; count--, addr++, dn->ofs_in_node++) { block_t blkaddr = le32_to_cpu(*addr); if (blkaddr == NULL_ADDR) continue; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ffa4c6d0fab4..4d67ed736dca 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -42,9 +42,11 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { if (ri->i_addr[0]) - inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0])); + inode->i_rdev = + old_decode_dev(le32_to_cpu(ri->i_addr[0])); else - inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1])); + inode->i_rdev = + new_decode_dev(le32_to_cpu(ri->i_addr[1])); } } @@ -52,11 +54,13 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) { if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { if (old_valid_dev(inode->i_rdev)) { - ri->i_addr[0] = cpu_to_le32(old_encode_dev(inode->i_rdev)); + ri->i_addr[0] = + cpu_to_le32(old_encode_dev(inode->i_rdev)); ri->i_addr[1] = 0; } else { ri->i_addr[0] = 0; - ri->i_addr[1] = cpu_to_le32(new_encode_dev(inode->i_rdev)); + ri->i_addr[1] = + cpu_to_le32(new_encode_dev(inode->i_rdev)); ri->i_addr[2] = 0; } } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b8c9301db52c..226a05a27e33 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1196,7 +1196,7 @@ static int f2fs_write_node_page(struct page *page, struct node_info ni; struct f2fs_io_info fio = { .type = NODE, - .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC: WRITE, + .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, }; if (unlikely(sbi->por_doing)) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 655791e518cf..976a7a934db5 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -95,9 +95,9 @@ out_unmap_put: kunmap(page); f2fs_put_page(page, 0); out: - f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " - "ino = %x, name = %s, dir = %lx, err = %d", - ino_of_node(ipage), raw_inode->i_name, + f2fs_msg(inode->i_sb, KERN_NOTICE, + "%s: ino = %x, name = %s, dir = %lx, err = %d", + __func__, ino_of_node(ipage), raw_inode->i_name, IS_ERR(dir) ? 0 : dir->i_ino, err); return err; } @@ -366,9 +366,9 @@ err: f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); out: - f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " - "recovered_data = %d blocks, err = %d", - inode->i_ino, recovered, err); + f2fs_msg(sbi->sb, KERN_NOTICE, + "recover_data: ino = %lx, recovered = %d blocks, err = %d", + inode->i_ino, recovered, err); return err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a934e6f2738b..e82423fbcb9d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -946,7 +946,8 @@ void write_data_page(struct page *page, struct dnode_of_data *dn, do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio); } -void rewrite_data_page(struct page *page, block_t old_blkaddr, struct f2fs_io_info *fio) +void rewrite_data_page(struct page *page, block_t old_blkaddr, + struct f2fs_io_info *fio) { struct inode *inode = page->mapping->host; struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); @@ -1647,7 +1648,8 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) mutex_lock(&curseg->curseg_mutex); for (i = 0; i < sits_in_cursum(sum); i++) { - if (le32_to_cpu(segno_in_journal(sum, i)) == start) { + if (le32_to_cpu(segno_in_journal(sum, i)) + == start) { sit = sit_in_journal(sum, i); mutex_unlock(&curseg->curseg_mutex); goto got_it; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index e9a10bdd7fed..5731682d7516 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -448,8 +448,8 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) static inline bool need_SSR(struct f2fs_sb_info *sbi) { - return ((prefree_segments(sbi) / sbi->segs_per_sec) - + free_sections(sbi) < overprovision_sections(sbi)); + return (prefree_segments(sbi) / sbi->segs_per_sec) + + free_sections(sbi) < overprovision_sections(sbi); } static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) @@ -460,18 +460,19 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) if (unlikely(sbi->por_doing)) return false; - return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + - reserved_sections(sbi))); + return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + + reserved_sections(sbi)); } static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) { - return (prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments); + return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments; } static inline int utilization(struct f2fs_sb_info *sbi) { - return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count); + return div_u64((u64)valid_user_blocks(sbi) * 100, + sbi->user_block_count); } /* diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b070f3010ce9..1a85f83abd53 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -535,7 +535,8 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset) { struct super_block *sb = seq->private; struct f2fs_sb_info *sbi = F2FS_SB(sb); - unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); + unsigned int total_segs = + le32_to_cpu(sbi->raw_super->segment_count_main); int i; for (i = 0; i < total_segs; i++) { @@ -816,8 +817,9 @@ retry: /* sanity checking of raw super */ if (sanity_check_raw_super(sb, *raw_super)) { brelse(*raw_super_buf); - f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " - "in %dth superblock", block + 1); + f2fs_msg(sb, KERN_ERR, + "Can't find valid F2FS filesystem in %dth superblock", + block + 1); if (block == 0) { block++; goto retry; From a18ff063406dd6aec41fda598eabe2691007a30d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 21 Jan 2014 13:32:12 +0900 Subject: [PATCH 90/95] f2fs: call mark_inode_dirty to flush dirty pages If a dentry page is updated, we should call mark_inode_dirty to add the inode into the dirty list, so that its dentry pages are flushed to the disk. Otherwise, the inode can be evicted without flush. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +++- fs/f2fs/dir.c | 6 ++---- fs/f2fs/namei.c | 3 +++ 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f5fac16bc6e7..0ae558723506 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -249,6 +249,7 @@ int reserve_new_block(struct dnode_of_data *dn) __set_data_blkaddr(dn, NEW_ADDR); dn->data_blkaddr = NEW_ADDR; + mark_inode_dirty(dn->inode); sync_inode_page(dn); return 0; } @@ -564,7 +565,6 @@ repeat: i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); /* Only the directory inode sets new_i_size */ set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); - mark_inode_dirty_sync(inode); } return page; @@ -1060,6 +1060,8 @@ static int f2fs_set_data_page_dirty(struct page *page) trace_f2fs_set_page_dirty(page, DATA); SetPageUptodate(page); + mark_inode_dirty(inode); + if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); set_dirty_dir_page(inode, page); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index cd055b6fcffe..b2b77ccea82b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -388,6 +388,8 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode, clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); } dir->i_mtime = dir->i_ctime = CURRENT_TIME; + mark_inode_dirty(dir); + if (F2FS_I(dir)->i_current_depth != current_depth) { F2FS_I(dir)->i_current_depth = current_depth; set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); @@ -395,8 +397,6 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode, if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) update_inode_page(dir); - else - mark_inode_dirty(dir); if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) clear_inode_flag(F2FS_I(inode), FI_INC_LINK); @@ -553,8 +553,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, if (inode && S_ISDIR(inode->i_mode)) { drop_nlink(dir); update_inode_page(dir); - } else { - mark_inode_dirty(dir); } if (inode) { diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a68838dae335..3d32f2969c5e 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -430,6 +430,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_dir_entry) drop_nlink(new_inode); drop_nlink(new_inode); + mark_inode_dirty(new_inode); if (!new_inode->i_nlink) add_orphan_inode(sbi, new_inode->i_ino); @@ -459,11 +460,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); F2FS_I(old_inode)->i_pino = new_dir->i_ino; + update_inode_page(old_inode); } else { kunmap(old_dir_page); f2fs_put_page(old_dir_page, 0); } drop_nlink(old_dir); + mark_inode_dirty(old_dir); update_inode_page(old_dir); } From e8dae6045882e32a067326f47b7ccd3aaf8814e2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 21 Jan 2014 18:31:38 +0900 Subject: [PATCH 91/95] f2fs: move a branch for code redability This patch moves a function in f2fs_delete_entry for code readability. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b2b77ccea82b..2b7c255bcbdf 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -550,12 +550,11 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, dir->i_ctime = dir->i_mtime = CURRENT_TIME; - if (inode && S_ISDIR(inode->i_mode)) { - drop_nlink(dir); - update_inode_page(dir); - } - if (inode) { + if (S_ISDIR(inode->i_mode)) { + drop_nlink(dir); + update_inode_page(dir); + } inode->i_ctime = CURRENT_TIME; drop_nlink(inode); if (S_ISDIR(inode->i_mode)) { From 9df27d982d58b9372bc476fb6b9bab861d617029 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 20 Jan 2014 18:37:04 +0800 Subject: [PATCH 92/95] f2fs: add help function META_MAPPING Introduce help function META_MAPPING() to get the cache meta blocks' address space. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++---- fs/f2fs/debug.c | 4 ++-- fs/f2fs/f2fs.h | 5 +++++ fs/f2fs/node.c | 2 +- fs/f2fs/segment.c | 2 +- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 4de0345f73f3..f9d4f7de75ad 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -30,7 +30,7 @@ static struct kmem_cache *inode_entry_slab; */ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) { - struct address_space *mapping = sbi->meta_inode->i_mapping; + struct address_space *mapping = META_MAPPING(sbi); struct page *page = NULL; repeat: page = grab_cache_page(mapping, index); @@ -50,7 +50,7 @@ repeat: */ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) { - struct address_space *mapping = sbi->meta_inode->i_mapping; + struct address_space *mapping = META_MAPPING(sbi); struct page *page; repeat: page = grab_cache_page(mapping, index); @@ -128,7 +128,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping, long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, long nr_to_write) { - struct address_space *mapping = sbi->meta_inode->i_mapping; + struct address_space *mapping = META_MAPPING(sbi); pgoff_t index = 0, end = LONG_MAX; struct pagevec pvec; long nwritten = 0; @@ -771,7 +771,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) wait_on_all_pages_writeback(sbi); filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); - filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); + filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX); /* update user_block_counts */ sbi->last_valid_block_count = sbi->total_valid_block_count; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 63cb7e215e00..8bdc365be9e3 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -53,7 +53,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->prefree_count = prefree_segments(sbi); si->dirty_count = dirty_segments(sbi); si->node_pages = sbi->node_inode->i_mapping->nrpages; - si->meta_pages = sbi->meta_inode->i_mapping->nrpages; + si->meta_pages = META_MAPPING(sbi)->nrpages; si->nats = NM_I(sbi)->nat_cnt; si->sits = SIT_I(sbi)->dirty_sentries; si->fnids = NM_I(sbi)->fcnt; @@ -168,7 +168,7 @@ get_cache: si->cache_mem += NM_I(sbi)->nat_cnt; npages = sbi->node_inode->i_mapping->nrpages; si->cache_mem += npages << PAGE_CACHE_SHIFT; - npages = sbi->meta_inode->i_mapping->nrpages; + npages = META_MAPPING(sbi)->nrpages; si->cache_mem += npages << PAGE_CACHE_SHIFT; si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5ab3981938d4..117e30f6b88c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -533,6 +533,11 @@ static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi) return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info); } +static inline struct address_space *META_MAPPING(struct f2fs_sb_info *sbi) +{ + return sbi->meta_inode->i_mapping; +} + static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) { sbi->s_dirty = 1; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 226a05a27e33..527bd12d8ae1 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -87,7 +87,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) */ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) { - struct address_space *mapping = sbi->meta_inode->i_mapping; + struct address_space *mapping = META_MAPPING(sbi); struct f2fs_nm_info *nm_i = NM_I(sbi); struct page *page; pgoff_t index; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e82423fbcb9d..7caac5f2ca9e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1585,7 +1585,7 @@ static int build_curseg(struct f2fs_sb_info *sbi) static int ra_sit_pages(struct f2fs_sb_info *sbi, int start, int nrpages) { - struct address_space *mapping = sbi->meta_inode->i_mapping; + struct address_space *mapping = META_MAPPING(sbi); struct page *page; block_t blk_addr, prev_blk_addr = 0; int sit_blk_cnt = SIT_BLK_CNT(sbi); From 63f5384c9a7df95a0e0eb6745f3038c703bdf4c3 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 20 Jan 2014 18:37:30 +0800 Subject: [PATCH 93/95] f2fs: remove the orphan block page array As the orphan_blocks may be max to 504, so it is not security and rigorous to store such a large array in the kernel stack as Dan Carpenter said. In fact, grab_meta_page has locked the page in the page cache, and we can use find_get_page() to fetch the page safely in the downstream, so we can remove the page array directly. Reported-by: Dan Carpenter Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f9d4f7de75ad..ed82de6bfb47 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -311,11 +311,10 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); struct page *page = NULL; - struct page *pages[orphan_blocks]; struct orphan_inode_entry *orphan = NULL; for (index = 0; index < orphan_blocks; index++) - pages[index] = grab_meta_page(sbi, start_blk + index); + grab_meta_page(sbi, start_blk + index); index = 1; spin_lock(&sbi->orphan_inode_lock); @@ -324,10 +323,12 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) /* loop for each orphan inode entry and write them in Jornal block */ list_for_each_entry(orphan, head, list) { if (!page) { - page = pages[index - 1]; + page = find_get_page(META_MAPPING(sbi), start_blk++); + f2fs_bug_on(!page); orphan_blk = (struct f2fs_orphan_block *)page_address(page); memset(orphan_blk, 0, sizeof(*orphan_blk)); + f2fs_put_page(page, 0); } orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); From 4ef51a8fcc7c54ca3ad948a8b4310b3bd5490c72 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 21 Jan 2014 18:51:16 +0900 Subject: [PATCH 94/95] f2fs: introduce NODE_MAPPING for code consistency This patch adds NODE_MAPPING which is similar as META_MAPPING introduced by Gu Zheng. Cc: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/debug.c | 4 ++-- fs/f2fs/f2fs.h | 5 +++++ fs/f2fs/node.c | 50 +++++++++++++++++++------------------------- 4 files changed, 30 insertions(+), 31 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index ed82de6bfb47..293d0486a40f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -771,7 +771,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* wait for previous submitted node/meta pages writeback */ wait_on_all_pages_writeback(sbi); - filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); + filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX); filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX); /* update user_block_counts */ diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8bdc365be9e3..3de9d20d0c14 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -52,7 +52,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->free_secs = free_sections(sbi); si->prefree_count = prefree_segments(sbi); si->dirty_count = dirty_segments(sbi); - si->node_pages = sbi->node_inode->i_mapping->nrpages; + si->node_pages = NODE_MAPPING(sbi)->nrpages; si->meta_pages = META_MAPPING(sbi)->nrpages; si->nats = NM_I(sbi)->nat_cnt; si->sits = SIT_I(sbi)->dirty_sentries; @@ -166,7 +166,7 @@ get_cache: /* free nids */ si->cache_mem = NM_I(sbi)->fcnt; si->cache_mem += NM_I(sbi)->nat_cnt; - npages = sbi->node_inode->i_mapping->nrpages; + npages = NODE_MAPPING(sbi)->nrpages; si->cache_mem += npages << PAGE_CACHE_SHIFT; npages = META_MAPPING(sbi)->nrpages; si->cache_mem += npages << PAGE_CACHE_SHIFT; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 117e30f6b88c..af51a0bd2dee 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -538,6 +538,11 @@ static inline struct address_space *META_MAPPING(struct f2fs_sb_info *sbi) return sbi->meta_inode->i_mapping; } +static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi) +{ + return sbi->node_inode->i_mapping; +} + static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) { sbi->s_dirty = 1; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 527bd12d8ae1..bbfc655493dc 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -681,7 +681,6 @@ fail: int truncate_inode_blocks(struct inode *inode, pgoff_t from) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct address_space *node_mapping = sbi->node_inode->i_mapping; int err = 0, cont = 1; int level, offset[4], noffset[4]; unsigned int nofs = 0; @@ -756,7 +755,7 @@ skip_partial: if (offset[1] == 0 && ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { lock_page(page); - if (unlikely(page->mapping != node_mapping)) { + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { f2fs_put_page(page, 1); goto restart; } @@ -842,7 +841,6 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); - struct address_space *mapping = sbi->node_inode->i_mapping; struct node_info old_ni, new_ni; struct page *page; int err; @@ -850,7 +848,7 @@ struct page *new_node_page(struct dnode_of_data *dn, if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return ERR_PTR(-EPERM); - page = grab_cache_page(mapping, dn->nid); + page = grab_cache_page(NODE_MAPPING(sbi), dn->nid); if (!page) return ERR_PTR(-ENOMEM); @@ -920,18 +918,17 @@ static int read_node_page(struct page *page, int rw) */ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) { - struct address_space *mapping = sbi->node_inode->i_mapping; struct page *apage; int err; - apage = find_get_page(mapping, nid); + apage = find_get_page(NODE_MAPPING(sbi), nid); if (apage && PageUptodate(apage)) { f2fs_put_page(apage, 0); return; } f2fs_put_page(apage, 0); - apage = grab_cache_page(mapping, nid); + apage = grab_cache_page(NODE_MAPPING(sbi), nid); if (!apage) return; @@ -944,11 +941,10 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) { - struct address_space *mapping = sbi->node_inode->i_mapping; struct page *page; int err; repeat: - page = grab_cache_page(mapping, nid); + page = grab_cache_page(NODE_MAPPING(sbi), nid); if (!page) return ERR_PTR(-ENOMEM); @@ -963,7 +959,7 @@ repeat: f2fs_put_page(page, 1); return ERR_PTR(-EIO); } - if (unlikely(page->mapping != mapping)) { + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { f2fs_put_page(page, 1); goto repeat; } @@ -980,7 +976,6 @@ got_it: struct page *get_node_page_ra(struct page *parent, int start) { struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); - struct address_space *mapping = sbi->node_inode->i_mapping; struct blk_plug plug; struct page *page; int err, i, end; @@ -991,7 +986,7 @@ struct page *get_node_page_ra(struct page *parent, int start) if (!nid) return ERR_PTR(-ENOENT); repeat: - page = grab_cache_page(mapping, nid); + page = grab_cache_page(NODE_MAPPING(sbi), nid); if (!page) return ERR_PTR(-ENOMEM); @@ -1016,7 +1011,7 @@ repeat: blk_finish_plug(&plug); lock_page(page); - if (unlikely(page->mapping != mapping)) { + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { f2fs_put_page(page, 1); goto repeat; } @@ -1047,7 +1042,6 @@ void sync_inode_page(struct dnode_of_data *dn) int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, struct writeback_control *wbc) { - struct address_space *mapping = sbi->node_inode->i_mapping; pgoff_t index, end; struct pagevec pvec; int step = ino ? 2 : 0; @@ -1061,7 +1055,7 @@ next_step: while (index <= end) { int i, nr_pages; - nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, PAGECACHE_TAG_DIRTY, min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); if (nr_pages == 0) @@ -1094,7 +1088,7 @@ next_step: else if (!trylock_page(page)) continue; - if (unlikely(page->mapping != mapping)) { + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { continue_unlock: unlock_page(page); continue; @@ -1121,7 +1115,7 @@ continue_unlock: set_fsync_mark(page, 0); set_dentry_mark(page, 0); } - mapping->a_ops->writepage(page, wbc); + NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); wrote++; if (--wbc->nr_to_write == 0) @@ -1148,18 +1142,19 @@ continue_unlock: int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) { - struct address_space *mapping = sbi->node_inode->i_mapping; pgoff_t index = 0, end = LONG_MAX; struct pagevec pvec; - int nr_pages; int ret2 = 0, ret = 0; pagevec_init(&pvec, 0); - while ((index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_WRITEBACK, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { - unsigned i; + + while (index <= end) { + int i, nr_pages; + nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, + PAGECACHE_TAG_WRITEBACK, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -1178,9 +1173,9 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) cond_resched(); } - if (unlikely(test_and_clear_bit(AS_ENOSPC, &mapping->flags))) + if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags))) ret2 = -ENOSPC; - if (unlikely(test_and_clear_bit(AS_EIO, &mapping->flags))) + if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags))) ret2 = -EIO; if (!ret) ret = ret2; @@ -1538,13 +1533,12 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) { - struct address_space *mapping = sbi->node_inode->i_mapping; struct f2fs_inode *src, *dst; nid_t ino = ino_of_node(page); struct node_info old_ni, new_ni; struct page *ipage; - ipage = grab_cache_page(mapping, ino); + ipage = grab_cache_page(NODE_MAPPING(sbi), ino); if (!ipage) return -ENOMEM; From bf39c00a9a7f3cdb5ce7d6695d9f044daf8f0b53 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 Jan 2014 20:41:57 +0900 Subject: [PATCH 95/95] f2fs: drop obsolete node page when it is truncated If a node page is trucated, we'd better drop the page in the node_inode's page cache for better memory footprint. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bbfc655493dc..b0649b76eb4f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -518,6 +518,10 @@ invalidate: F2FS_SET_SB_DIRT(sbi); f2fs_put_page(dn->node_page, 1); + + invalidate_mapping_pages(NODE_MAPPING(sbi), + dn->node_page->index, dn->node_page->index); + dn->node_page = NULL; trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); }