From 99bbe307013059951368d12f5454d34568de4850 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 16 Jun 2020 09:56:51 -0700 Subject: [PATCH 01/52] f2fs: avoid checkpatch error ERROR:INITIALISED_STATIC: do not initialise statics to NULL Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 1e02a8c106b0..4dca9e456734 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -506,7 +506,7 @@ bool f2fs_is_compress_backend_ready(struct inode *inode) return f2fs_cops[F2FS_I(inode)->i_compress_algorithm]; } -static mempool_t *compress_page_pool = NULL; +static mempool_t *compress_page_pool; static int num_compress_pages = 512; module_param(num_compress_pages, uint, 0444); MODULE_PARM_DESC(num_compress_pages, From 742532d11d8328231d099f557e6287e75b2d247b Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Wed, 10 Jun 2020 01:14:46 +0300 Subject: [PATCH 02/52] f2fs: use kfree() instead of kvfree() to free superblock data Use kfree() instead of kvfree() to free super in read_raw_super_block() because the memory is allocated with kzalloc() in the function. Use kfree() instead of kvfree() to free sbi, raw_super in f2fs_fill_super() and f2fs_put_super() because the memory is allocated with kzalloc(). Signed-off-by: Denis Efremov Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 20e56b0fa46a..426cebdd84e5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1243,7 +1243,7 @@ static void f2fs_put_super(struct super_block *sb) sb->s_fs_info = NULL; if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - kvfree(sbi->raw_super); + kfree(sbi->raw_super); destroy_device_list(sbi); f2fs_destroy_xattr_caches(sbi); @@ -1259,7 +1259,7 @@ static void f2fs_put_super(struct super_block *sb) #ifdef CONFIG_UNICODE utf8_unload(sbi->s_encoding); #endif - kvfree(sbi); + kfree(sbi); } int f2fs_sync_fs(struct super_block *sb, int sync) @@ -3137,7 +3137,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, /* No valid superblock */ if (!*raw_super) - kvfree(super); + kfree(super); else err = 0; @@ -3816,11 +3816,11 @@ free_options: fscrypt_free_dummy_context(&F2FS_OPTION(sbi).dummy_enc_ctx); kvfree(options); free_sb_buf: - kvfree(raw_super); + kfree(raw_super); free_sbi: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - kvfree(sbi); + kfree(sbi); /* give only one another chance */ if (retry_cnt > 0 && skip_recovery) { From 6f6489288ed196a962ef8d33b1b9cf6369ff0807 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Mon, 15 Jun 2020 16:11:38 +0800 Subject: [PATCH 03/52] f2fs: remove useless truncate in f2fs_collapse_range() Since offset < new_size, no need to do truncate_pagecache() again with new_size. Signed-off-by: Wei Fang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3268f8dd59bb..98721f9bef25 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1373,8 +1373,6 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) truncate_pagecache(inode, offset); new_size = i_size_read(inode) - len; - truncate_pagecache(inode, new_size); - ret = f2fs_truncate_blocks(inode, new_size, true); up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) From db5ae363292705d52c5b653d5079a07a6417ef4c Mon Sep 17 00:00:00 2001 From: Wuyun Zhao Date: Thu, 18 Jun 2020 10:58:37 +0800 Subject: [PATCH 04/52] f2fs: fix a race condition between f2fs_write_end_io and f2fs_del_fsync_node_entry Under some condition, the __write_node_page will submit a page which is not f2fs_in_warm_node_list and will not call f2fs_add_fsync_node_entry. f2fs_gc continue to run to invoke f2fs_iget -> do_read_inode to read the same node page and set code node, which make f2fs_in_warm_node_list become true, that will cause f2fs_bug_on in f2fs_del_fsync_node_entry when f2fs_write_end_io called. - f2fs_write_end_io - f2fs_iget - do_read_inode - set_cold_node recover cold node flag - f2fs_in_warm_node_list - is_cold_node if node is cold, assume we have added node to fsync_node_list during writepages() - f2fs_del_fsync_node_entry - f2fs_bug_on() due to node page is not in fsync_node_list [ 34.966133] Call trace: [ 34.969902] f2fs_del_fsync_node_entry+0x100/0x108 [ 34.976071] f2fs_write_end_io+0x1e0/0x288 [ 34.981539] bio_endio+0x248/0x270 [ 34.986289] blk_update_request+0x2b0/0x4d8 [ 34.991841] scsi_end_request+0x40/0x440 [ 34.997126] scsi_io_completion+0xa4/0x748 [ 35.002593] scsi_finish_command+0xdc/0x110 [ 35.008143] scsi_softirq_done+0x118/0x150 [ 35.013610] blk_done_softirq+0x8c/0xe8 [ 35.018811] __do_softirq+0x2e8/0x578 [ 35.023828] irq_exit+0xfc/0x120 [ 35.028398] handle_IPI+0x1d8/0x330 [ 35.033233] gic_handle_irq+0x110/0x1d4 [ 35.038433] el1_irq+0xb4/0x130 [ 35.042917] kmem_cache_alloc+0x3f0/0x418 [ 35.048288] radix_tree_node_alloc+0x50/0xf8 [ 35.053933] __radix_tree_create+0xf8/0x188 [ 35.059484] __radix_tree_insert+0x3c/0x128 [ 35.065035] add_gc_inode+0x90/0x118 [ 35.069967] f2fs_gc+0x1b80/0x2d70 [ 35.074718] f2fs_disable_checkpoint+0x94/0x1d0 [ 35.080621] f2fs_fill_super+0x10c4/0x1b88 [ 35.086088] mount_bdev+0x194/0x1e0 [ 35.090923] f2fs_mount+0x40/0x50 [ 35.095589] mount_fs+0xb4/0x190 [ 35.100159] vfs_kern_mount+0x80/0x1d8 [ 35.105260] do_mount+0x478/0xf18 [ 35.109926] ksys_mount+0x90/0xd0 [ 35.114592] __arm64_sys_mount+0x24/0x38 Signed-off-by: Wuyun Zhao Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 44582a4db513..33affa788588 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -402,6 +402,7 @@ static int do_read_inode(struct inode *inode) /* try to recover cold bit for non-dir inode */ if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_page)) { + f2fs_wait_on_page_writeback(node_page, NODE, true, true); set_cold_node(node_page, false); set_page_dirty(node_page); } From da52f8ade40b032eb8111a0fd514c8ac5f8f0f1b Mon Sep 17 00:00:00 2001 From: Jack Qiu Date: Thu, 18 Jun 2020 12:37:10 +0800 Subject: [PATCH 05/52] f2fs: get the right gc victim section when section has several segments Assume each section has 4 segment: .___________________________. |_Segment0_|_..._|_Segment3_| . . . . .__________. |_section0_| Segment 0~2 has 0 valid block, segment 3 has 512 valid blocks. It will fail if we want to gc section0 in this scenes, because all 4 segments in section0 is not dirty. So we should use dirty section bitmap instead of dirty segment bitmap to get right victim section. Signed-off-by: Jack Qiu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 39 +++++++++++++++++------------- fs/f2fs/segment.c | 61 +++++++++++++++++++++++++++++++++++++++++++++-- fs/f2fs/segment.h | 8 +++++-- 3 files changed, 88 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 5b95d5a146eb..9b6fc61ce649 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -21,6 +21,9 @@ #include "gc.h" #include +static unsigned int count_bits(const unsigned long *addr, + unsigned int offset, unsigned int len); + static int gc_thread_func(void *data) { struct f2fs_sb_info *sbi = data; @@ -187,14 +190,20 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, if (p->alloc_mode == SSR) { p->gc_mode = GC_GREEDY; - p->dirty_segmap = dirty_i->dirty_segmap[type]; + p->dirty_bitmap = dirty_i->dirty_segmap[type]; p->max_search = dirty_i->nr_dirty[type]; p->ofs_unit = 1; } else { p->gc_mode = select_gc_type(sbi, gc_type); - p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; - p->max_search = dirty_i->nr_dirty[DIRTY]; p->ofs_unit = sbi->segs_per_sec; + if (__is_large_section(sbi)) { + p->dirty_bitmap = dirty_i->dirty_secmap; + p->max_search = count_bits(p->dirty_bitmap, + 0, MAIN_SECS(sbi)); + } else { + p->dirty_bitmap = dirty_i->dirty_segmap[DIRTY]; + p->max_search = dirty_i->nr_dirty[DIRTY]; + } } /* @@ -365,10 +374,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, } while (1) { - unsigned long cost; - unsigned int segno; + unsigned long cost, *dirty_bitmap; + unsigned int unit_no, segno; - segno = find_next_bit(p.dirty_segmap, last_segment, p.offset); + dirty_bitmap = p.dirty_bitmap; + unit_no = find_next_bit(dirty_bitmap, + last_segment / p.ofs_unit, + p.offset / p.ofs_unit); + segno = unit_no * p.ofs_unit; if (segno >= last_segment) { if (sm->last_victim[p.gc_mode]) { last_segment = @@ -381,14 +394,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, } p.offset = segno + p.ofs_unit; - if (p.ofs_unit > 1) { - p.offset -= segno % p.ofs_unit; - nsearched += count_bits(p.dirty_segmap, - p.offset - p.ofs_unit, - p.ofs_unit); - } else { - nsearched++; - } + nsearched++; #ifdef CONFIG_F2FS_CHECK_FS /* @@ -421,9 +427,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, next: if (nsearched >= p.max_search) { if (!sm->last_victim[p.gc_mode] && segno <= last_victim) - sm->last_victim[p.gc_mode] = last_victim + 1; + sm->last_victim[p.gc_mode] = + last_victim + p.ofs_unit; else - sm->last_victim[p.gc_mode] = segno + 1; + sm->last_victim[p.gc_mode] = segno + p.ofs_unit; sm->last_victim[p.gc_mode] %= (MAIN_SECS(sbi) * sbi->segs_per_sec); break; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 196f31503511..66eeaba30e91 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -796,6 +796,18 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, } if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) dirty_i->nr_dirty[t]++; + + if (__is_large_section(sbi)) { + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned short valid_blocks = + get_valid_blocks(sbi, segno, true); + + f2fs_bug_on(sbi, unlikely(!valid_blocks || + valid_blocks == BLKS_PER_SEC(sbi))); + + if (!IS_CURSEC(sbi, secno)) + set_bit(secno, dirty_i->dirty_secmap); + } } } @@ -803,6 +815,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, enum dirty_type dirty_type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned short valid_blocks; if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) dirty_i->nr_dirty[dirty_type]--; @@ -814,13 +827,26 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) dirty_i->nr_dirty[t]--; - if (get_valid_blocks(sbi, segno, true) == 0) { + valid_blocks = get_valid_blocks(sbi, segno, true); + if (valid_blocks == 0) { clear_bit(GET_SEC_FROM_SEG(sbi, segno), dirty_i->victim_secmap); #ifdef CONFIG_F2FS_CHECK_FS clear_bit(segno, SIT_I(sbi)->invalid_segmap); #endif } + if (__is_large_section(sbi)) { + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + + if (!valid_blocks || + valid_blocks == BLKS_PER_SEC(sbi)) { + clear_bit(secno, dirty_i->dirty_secmap); + return; + } + + if (!IS_CURSEC(sbi, secno)) + set_bit(secno, dirty_i->dirty_secmap); + } } } @@ -4293,8 +4319,9 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int segno = 0, offset = 0; + unsigned int segno = 0, offset = 0, secno; unsigned short valid_blocks; + unsigned short blks_per_sec = BLKS_PER_SEC(sbi); while (1) { /* find dirty segment based on free segmap */ @@ -4313,6 +4340,22 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) __locate_dirty_segment(sbi, segno, DIRTY); mutex_unlock(&dirty_i->seglist_lock); } + + if (!__is_large_section(sbi)) + return; + + mutex_lock(&dirty_i->seglist_lock); + for (segno = 0; segno < MAIN_SECS(sbi); segno += blks_per_sec) { + valid_blocks = get_valid_blocks(sbi, segno, true); + secno = GET_SEC_FROM_SEG(sbi, segno); + + if (!valid_blocks || valid_blocks == blks_per_sec) + continue; + if (IS_CURSEC(sbi, secno)) + continue; + set_bit(secno, dirty_i->dirty_secmap); + } + mutex_unlock(&dirty_i->seglist_lock); } static int init_victim_secmap(struct f2fs_sb_info *sbi) @@ -4349,6 +4392,14 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) return -ENOMEM; } + if (__is_large_section(sbi)) { + bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); + dirty_i->dirty_secmap = f2fs_kvzalloc(sbi, + bitmap_size, GFP_KERNEL); + if (!dirty_i->dirty_secmap) + return -ENOMEM; + } + init_dirty_segmap(sbi); return init_victim_secmap(sbi); } @@ -4775,6 +4826,12 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) for (i = 0; i < NR_DIRTY_TYPE; i++) discard_dirty_segmap(sbi, i); + if (__is_large_section(sbi)) { + mutex_lock(&dirty_i->seglist_lock); + kvfree(dirty_i->dirty_secmap); + mutex_unlock(&dirty_i->seglist_lock); + } + destroy_victim_secmap(sbi); SM_I(sbi)->dirty_info = NULL; kvfree(dirty_i); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index cba16cca5189..f261e3e6a69b 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -166,8 +166,11 @@ enum { struct victim_sel_policy { int alloc_mode; /* LFS or SSR */ int gc_mode; /* GC_CB or GC_GREEDY */ - unsigned long *dirty_segmap; /* dirty segment bitmap */ - unsigned int max_search; /* maximum # of segments to search */ + unsigned long *dirty_bitmap; /* dirty segment/section bitmap */ + unsigned int max_search; /* + * maximum # of segments/sections + * to search + */ unsigned int offset; /* last scanned bitmap offset */ unsigned int ofs_unit; /* bitmap search unit */ unsigned int min_cost; /* minimum cost */ @@ -266,6 +269,7 @@ enum dirty_type { struct dirty_seglist_info { const struct victim_selection *v_ops; /* victim selction operation */ unsigned long *dirty_segmap[NR_DIRTY_TYPE]; + unsigned long *dirty_secmap; struct mutex seglist_lock; /* lock for segment bitmaps */ int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */ unsigned long *victim_secmap; /* background GC victims */ From ba87a45c23d645b25e1265b477e83cc1b73086e4 Mon Sep 17 00:00:00 2001 From: Wang Xiaojun Date: Wed, 17 Jun 2020 20:30:12 +0800 Subject: [PATCH 06/52] f2fs: use kfree() to free variables allocated by match_strdup() Use kfree() instead of kvfree() to free variables allocated by match_strdup(). Because the memory is allocated with kmalloc inside match_strdup(). Signed-off-by: Wang Xiaojun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 426cebdd84e5..732652205737 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -348,7 +348,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, set_opt(sbi, QUOTA); return 0; errout: - kvfree(qname); + kfree(qname); return ret; } @@ -360,7 +360,7 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype) f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); return -EINVAL; } - kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]); + kfree(F2FS_OPTION(sbi).s_qf_names[qtype]); F2FS_OPTION(sbi).s_qf_names[qtype] = NULL; return 0; } @@ -494,10 +494,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) } else if (!strcmp(name, "sync")) { F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC; } else { - kvfree(name); + kfree(name); return -EINVAL; } - kvfree(name); + kfree(name); break; case Opt_disable_roll_forward: set_opt(sbi, DISABLE_ROLL_FORWARD); @@ -654,17 +654,17 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) if (!strcmp(name, "adaptive")) { if (f2fs_sb_has_blkzoned(sbi)) { f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature"); - kvfree(name); + kfree(name); return -EINVAL; } F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; } else if (!strcmp(name, "lfs")) { F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; } else { - kvfree(name); + kfree(name); return -EINVAL; } - kvfree(name); + kfree(name); break; case Opt_io_size_bits: if (args->from && match_int(args, &arg)) @@ -790,10 +790,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) } else if (!strcmp(name, "fs-based")) { F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS; } else { - kvfree(name); + kfree(name); return -EINVAL; } - kvfree(name); + kfree(name); break; case Opt_alloc: name = match_strdup(&args[0]); @@ -805,10 +805,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) } else if (!strcmp(name, "reuse")) { F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; } else { - kvfree(name); + kfree(name); return -EINVAL; } - kvfree(name); + kfree(name); break; case Opt_fsync: name = match_strdup(&args[0]); @@ -822,10 +822,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_NOBARRIER; } else { - kvfree(name); + kfree(name); return -EINVAL; } - kvfree(name); + kfree(name); break; case Opt_test_dummy_encryption: ret = f2fs_set_test_dummy_encryption(sb, p, &args[0], @@ -1250,7 +1250,7 @@ static void f2fs_put_super(struct super_block *sb) mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kvfree(F2FS_OPTION(sbi).s_qf_names[i]); + kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif fscrypt_free_dummy_context(&F2FS_OPTION(sbi).dummy_enc_ctx); destroy_percpu_info(sbi); @@ -1754,7 +1754,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) GFP_KERNEL); if (!org_mount_opt.s_qf_names[i]) { for (j = 0; j < i; j++) - kvfree(org_mount_opt.s_qf_names[j]); + kfree(org_mount_opt.s_qf_names[j]); return -ENOMEM; } } else { @@ -1879,7 +1879,7 @@ skip: #ifdef CONFIG_QUOTA /* Release old quota file names */ for (i = 0; i < MAXQUOTAS; i++) - kvfree(org_mount_opt.s_qf_names[i]); + kfree(org_mount_opt.s_qf_names[i]); #endif /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | @@ -1900,7 +1900,7 @@ restore_opts: #ifdef CONFIG_QUOTA F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - kvfree(F2FS_OPTION(sbi).s_qf_names[i]); + kfree(F2FS_OPTION(sbi).s_qf_names[i]); F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i]; } #endif @@ -3811,7 +3811,7 @@ free_bio_info: free_options: #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kvfree(F2FS_OPTION(sbi).s_qf_names[i]); + kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif fscrypt_free_dummy_context(&F2FS_OPTION(sbi).dummy_enc_ctx); kvfree(options); From 0ef818335f734bbb4bb26b0eaab2c27aa6e5d2c2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 18 Jun 2020 14:36:22 +0800 Subject: [PATCH 07/52] f2fs: add prefix for exported symbols to avoid polluting global symbol namespace. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 4 ++-- fs/f2fs/data.c | 14 +++++++------- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/file.c | 4 ++-- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 4dca9e456734..7dbd56abe936 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -949,7 +949,7 @@ retry: } if (prealloc) { - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); set_new_dnode(&dn, cc->inode, NULL, NULL, 0); @@ -964,7 +964,7 @@ retry: break; } - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); } if (likely(!ret)) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 326c63879ddc..c78ce08f6400 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1426,7 +1426,7 @@ map_blocks: return err; } -void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) +void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) { if (flag == F2FS_GET_BLOCK_PRE_AIO) { if (lock) @@ -1491,7 +1491,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, next_dnode: if (map->m_may_create) - __do_map_lock(sbi, flag, true); + f2fs_do_map_lock(sbi, flag, true); /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -1640,7 +1640,7 @@ skip: f2fs_put_dnode(&dn); if (map->m_may_create) { - __do_map_lock(sbi, flag, false); + f2fs_do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } goto next_dnode; @@ -1666,7 +1666,7 @@ sync_out: f2fs_put_dnode(&dn); unlock_out: if (map->m_may_create) { - __do_map_lock(sbi, flag, false); + f2fs_do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } out: @@ -3217,7 +3217,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, if (f2fs_has_inline_data(inode) || (pos & PAGE_MASK) >= i_size_read(inode)) { - __do_map_lock(sbi, flag, true); + f2fs_do_map_lock(sbi, flag, true); locked = true; } @@ -3254,7 +3254,7 @@ restart: err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err || dn.data_blkaddr == NULL_ADDR) { f2fs_put_dnode(&dn); - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, + f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO); locked = true; @@ -3270,7 +3270,7 @@ out: f2fs_put_dnode(&dn); unlock_out: if (locked) - __do_map_lock(sbi, flag, false); + f2fs_do_map_lock(sbi, flag, false); return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b35a50f4953c..9b6f7f72923a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3325,7 +3325,7 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi); int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable); void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); -void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, +void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); @@ -3448,7 +3448,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, struct page *f2fs_get_new_data_page(struct inode *inode, struct page *ipage, pgoff_t index, bool new_i_size); int f2fs_do_write_data_page(struct f2fs_io_info *fio); -void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock); +void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock); int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 98721f9bef25..e15d4ce4c08e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -105,11 +105,11 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) if (need_alloc) { /* block allocation */ - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_get_block(&dn, page->index); f2fs_put_dnode(&dn); - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); } #ifdef CONFIG_F2FS_FS_COMPRESSION diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9b6fc61ce649..cb18a99a7f87 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1441,7 +1441,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi, /* Move out cursegs from the target range */ for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++) - allocate_segment_for_resize(sbi, type, start, end); + f2fs_allocate_segment_for_resize(sbi, type, start, end); /* do GC to move out valid blocks in the range */ for (segno = start; segno <= end; segno += sbi->segs_per_sec) { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 66eeaba30e91..cf693a018bec 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2700,7 +2700,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, stat_inc_seg_type(sbi, curseg); } -void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, +void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end) { struct curseg_info *curseg = CURSEG_I(sbi, type); From fa6795552ad20509ffb1cc9ed1246b6b337f5da5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 19 Jun 2020 11:20:28 +0800 Subject: [PATCH 08/52] f2fs: fix to document reserved special compression extension There is one reserved special compression extension: '*', which could be set via 'compress_extension="*"' mount option to enable compression for all files. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 099d45ac8d8f..535021c46260 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -258,6 +258,8 @@ compress_extension=%s Support adding specified extension, so that f2fs can enab on compression extension list and enable compression on these file by default rather than to enable it via ioctl. For other files, we can still enable compression via ioctl. + Note that, there is one reserved special extension '*', it + can be set to enable compression for all files. ====================== ============================================================ Debugfs Entries From 79963d967b492876fa17c8c2c2c17b7438683d9b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 18 Jun 2020 14:36:23 +0800 Subject: [PATCH 09/52] f2fs: shrink node_write lock coverage - to avoid race between checkpoint and quota file writeback, it just needs to hold read lock of node_write in writeback path. - node_write lock has covered all LFS data write paths, it's not necessary, we only need to hold node_write lock at write path of quota file. This refactors commit ca7f76e68074 ("f2fs: fix wrong discard space"). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 18 +++++++++++++++--- fs/f2fs/data.c | 12 ++++++++++++ fs/f2fs/segment.c | 11 ----------- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 7dbd56abe936..5643aa2b8377 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1096,8 +1096,16 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, loff_t psize; int i, err; - if (!IS_NOQUOTA(inode) && !f2fs_trylock_op(sbi)) + if (IS_NOQUOTA(inode)) { + /* + * We need to wait for node_write to avoid block allocation during + * checkpoint. This can only happen to quota writes which can cause + * the below discard race condition. + */ + down_read(&sbi->node_write); + } else if (!f2fs_trylock_op(sbi)) { return -EAGAIN; + } set_new_dnode(&dn, cc->inode, NULL, NULL, 0); @@ -1203,7 +1211,9 @@ unlock_continue: set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); f2fs_put_dnode(&dn); - if (!IS_NOQUOTA(inode)) + if (IS_NOQUOTA(inode)) + up_read(&sbi->node_write); + else f2fs_unlock_op(sbi); spin_lock(&fi->i_size_lock); @@ -1230,7 +1240,9 @@ out_put_cic: out_put_dnode: f2fs_put_dnode(&dn); out_unlock_op: - if (!IS_NOQUOTA(inode)) + if (IS_NOQUOTA(inode)) + up_read(&sbi->node_write); + else f2fs_unlock_op(sbi); return -EAGAIN; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c78ce08f6400..cbdf062d3562 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2719,8 +2719,20 @@ write: /* Dentry/quota blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) { + /* + * We need to wait for node_write to avoid block allocation during + * checkpoint. This can only happen to quota writes which can cause + * the below discard race condition. + */ + if (IS_NOQUOTA(inode)) + down_read(&sbi->node_write); + fio.need_lock = LOCK_DONE; err = f2fs_do_write_data_page(&fio); + + if (IS_NOQUOTA(inode)) + up_read(&sbi->node_write); + goto done; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index cf693a018bec..2e3098f9118c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3133,14 +3133,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, type = CURSEG_COLD_DATA; } - /* - * We need to wait for node_write to avoid block allocation during - * checkpoint. This can only happen to quota writes which can cause - * the below discard race condition. - */ - if (IS_DATASEG(type)) - down_write(&sbi->node_write); - down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); @@ -3206,9 +3198,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, up_read(&SM_I(sbi)->curseg_lock); - if (IS_DATASEG(type)) - up_write(&sbi->node_write); - if (put_pin_sem) up_read(&sbi->pin_sem); } From f608c38c59c6020bfde14af88630b8d7817003f9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 18 Jun 2020 14:36:24 +0800 Subject: [PATCH 10/52] f2fs: clean up parameter of f2fs_allocate_data_block() Use validation of @fio to inidcate whether caller want to serialize IOs in io.io_list or not, then @add_list will be redundant, remove it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cbdf062d3562..dfd322515357 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1366,7 +1366,7 @@ alloc: set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); old_blkaddr = dn->data_blkaddr; f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, - &sum, seg_type, NULL, false); + &sum, seg_type, NULL); if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) invalidate_mapping_pages(META_MAPPING(sbi), old_blkaddr, old_blkaddr); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9b6f7f72923a..397cf813b527 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3350,7 +3350,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, - struct f2fs_io_info *fio, bool add_list); + struct f2fs_io_info *fio); void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered, bool locked); void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index cb18a99a7f87..5e1368ce2389 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -866,7 +866,7 @@ static int move_data_block(struct inode *inode, block_t bidx, } f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, - &sum, CURSEG_COLD_DATA, NULL, false); + &sum, CURSEG_COLD_DATA, NULL); fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2e3098f9118c..735c598c25ea 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3115,7 +3115,7 @@ static int __get_segment_type(struct f2fs_io_info *fio) void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, - struct f2fs_io_info *fio, bool add_list) + struct f2fs_io_info *fio) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -3183,7 +3183,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, if (F2FS_IO_ALIGNED(sbi)) fio->retry = false; - if (add_list) { + if (fio) { struct f2fs_bio_info *io; INIT_LIST_HEAD(&fio->list); @@ -3232,7 +3232,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) down_read(&fio->sbi->io_order_lock); reallocate: f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, - &fio->new_blkaddr, sum, type, fio, true); + &fio->new_blkaddr, sum, type, fio); if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) invalidate_mapping_pages(META_MAPPING(fio->sbi), fio->old_blkaddr, fio->old_blkaddr); From 97767500781fae9c53b7d227f99f1dbb0dfe9a4f Mon Sep 17 00:00:00 2001 From: Qilong Zhang Date: Sun, 28 Jun 2020 19:23:03 +0800 Subject: [PATCH 11/52] f2fs: add f2fs_gc exception handle in f2fs_ioc_gc_range When f2fs_ioc_gc_range performs multiple segments gc ops, the return value of f2fs_ioc_gc_range is determined by the last segment gc ops. If its ops failed, the f2fs_ioc_gc_range will be considered to be failed despite some of previous segments gc ops succeeded. Therefore, so we fix: Redefine the return value of getting victim ops and add exception handle for f2fs_gc. In particular, 1).if target has no valid block, it will go on. 2).if target sectoion has valid block(s), but it is current section, we will reminder the caller. Signed-off-by: Qilong Zhang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 5 +++++ fs/f2fs/gc.c | 20 ++++++++++++++------ fs/f2fs/segment.c | 4 ++-- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e15d4ce4c08e..6ef2656c6171 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2525,6 +2525,11 @@ do_more: } ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); + if (ret) { + if (ret == -EBUSY) + ret = -EAGAIN; + goto out; + } range.start += BLKS_PER_SEC(sbi); if (range.start <= end) goto do_more; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 5e1368ce2389..6eec3b2d606d 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -330,6 +330,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, unsigned int secno, last_victim; unsigned int last_segment; unsigned int nsearched = 0; + int ret = 0; mutex_lock(&dirty_i->seglist_lock); last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec; @@ -341,12 +342,19 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, p.min_cost = get_max_cost(sbi, &p); if (*result != NULL_SEGNO) { - if (get_valid_blocks(sbi, *result, false) && - !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) + if (!get_valid_blocks(sbi, *result, false)) { + ret = -ENODATA; + goto out; + } + + if (sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) + ret = -EBUSY; + else p.min_segno = *result; goto out; } + ret = -ENODATA; if (p.max_search == 0) goto out; @@ -447,6 +455,7 @@ got_result: else set_bit(secno, dirty_i->victim_secmap); } + ret = 0; } out: @@ -456,7 +465,7 @@ out: prefree_segments(sbi), free_segments(sbi)); mutex_unlock(&dirty_i->seglist_lock); - return (p.min_segno == NULL_SEGNO) ? 0 : 1; + return ret; } static const struct victim_selection default_v_ops = { @@ -1340,10 +1349,9 @@ gc_more: ret = -EINVAL; goto stop; } - if (!__get_victim(sbi, &segno, gc_type)) { - ret = -ENODATA; + ret = __get_victim(sbi, &segno, gc_type); + if (ret) goto stop; - } seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type); if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 735c598c25ea..5aa4158a1440 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2631,7 +2631,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) bool reversed = false; /* f2fs_need_SSR() already forces to do this */ - if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) { + if (!v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) { curseg->next_segno = segno; return 1; } @@ -2658,7 +2658,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) for (; cnt-- > 0; reversed ? i-- : i++) { if (i == type) continue; - if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) { + if (!v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) { curseg->next_segno = segno; return 1; } From 0759e2c151f47b1362b15d544aca903d4ed74f88 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 28 Jun 2020 10:58:44 +0800 Subject: [PATCH 12/52] f2fs: show more debug info for per-temperature log - Add to account and show per-log dirty_seg, full_seg and valid_blocks in debugfs. - reformat printed info. TYPE segno secno zoneno dirty_seg full_seg valid_blk - COLD data: 1523 1523 1523 1 0 399 - WARM data: 769 769 769 20 255 133098 - HOT data: 767 767 767 9 0 167 - Dir dnode: 22 22 22 3 0 70 - File dnode: 722 722 722 14 10 6505 - Indir nodes: 2 2 2 1 0 3 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 64 +++++++++++++++++++++++++++++++++++++++---------- fs/f2fs/f2fs.h | 3 +++ 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0dbcb0f9c019..4276c0f79beb 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -174,6 +174,26 @@ static void update_general_status(struct f2fs_sb_info *sbi) for (i = META_CP; i < META_MAX; i++) si->meta_count[i] = atomic_read(&sbi->meta_count[i]); + for (i = 0; i < NO_CHECK_TYPE; i++) { + si->dirty_seg[i] = 0; + si->full_seg[i] = 0; + si->valid_blks[i] = 0; + } + + for (i = 0; i < MAIN_SEGS(sbi); i++) { + int blks = get_seg_entry(sbi, i)->valid_blocks; + int type = get_seg_entry(sbi, i)->type; + + if (!blks) + continue; + + if (blks == sbi->blocks_per_seg) + si->full_seg[type]++; + else + si->dirty_seg[type]++; + si->valid_blks[type] += blks; + } + for (i = 0; i < 2; i++) { si->segment_count[i] = sbi->segment_count[i]; si->block_count[i] = sbi->block_count[i]; @@ -329,30 +349,50 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", si->main_area_segs, si->main_area_sections, si->main_area_zones); - seq_printf(s, " - COLD data: %d, %d, %d\n", + seq_printf(s, " TYPE %8s %8s %8s %10s %10s %10s\n", + "segno", "secno", "zoneno", "dirty_seg", "full_seg", "valid_blk"); + seq_printf(s, " - COLD data: %8d %8d %8d %10u %10u %10u\n", si->curseg[CURSEG_COLD_DATA], si->cursec[CURSEG_COLD_DATA], - si->curzone[CURSEG_COLD_DATA]); - seq_printf(s, " - WARM data: %d, %d, %d\n", + si->curzone[CURSEG_COLD_DATA], + si->dirty_seg[CURSEG_COLD_DATA], + si->full_seg[CURSEG_COLD_DATA], + si->valid_blks[CURSEG_COLD_DATA]); + seq_printf(s, " - WARM data: %8d %8d %8d %10u %10u %10u\n", si->curseg[CURSEG_WARM_DATA], si->cursec[CURSEG_WARM_DATA], - si->curzone[CURSEG_WARM_DATA]); - seq_printf(s, " - HOT data: %d, %d, %d\n", + si->curzone[CURSEG_WARM_DATA], + si->dirty_seg[CURSEG_WARM_DATA], + si->full_seg[CURSEG_WARM_DATA], + si->valid_blks[CURSEG_WARM_DATA]); + seq_printf(s, " - HOT data: %8d %8d %8d %10u %10u %10u\n", si->curseg[CURSEG_HOT_DATA], si->cursec[CURSEG_HOT_DATA], - si->curzone[CURSEG_HOT_DATA]); - seq_printf(s, " - Dir dnode: %d, %d, %d\n", + si->curzone[CURSEG_HOT_DATA], + si->dirty_seg[CURSEG_HOT_DATA], + si->full_seg[CURSEG_HOT_DATA], + si->valid_blks[CURSEG_HOT_DATA]); + seq_printf(s, " - Dir dnode: %8d %8d %8d %10u %10u %10u\n", si->curseg[CURSEG_HOT_NODE], si->cursec[CURSEG_HOT_NODE], - si->curzone[CURSEG_HOT_NODE]); - seq_printf(s, " - File dnode: %d, %d, %d\n", + si->curzone[CURSEG_HOT_NODE], + si->dirty_seg[CURSEG_HOT_NODE], + si->full_seg[CURSEG_HOT_NODE], + si->valid_blks[CURSEG_HOT_NODE]); + seq_printf(s, " - File dnode: %8d %8d %8d %10u %10u %10u\n", si->curseg[CURSEG_WARM_NODE], si->cursec[CURSEG_WARM_NODE], - si->curzone[CURSEG_WARM_NODE]); - seq_printf(s, " - Indir nodes: %d, %d, %d\n", + si->curzone[CURSEG_WARM_NODE], + si->dirty_seg[CURSEG_WARM_NODE], + si->full_seg[CURSEG_WARM_NODE], + si->valid_blks[CURSEG_WARM_NODE]); + seq_printf(s, " - Indir nodes: %8d %8d %8d %10u %10u %10u\n", si->curseg[CURSEG_COLD_NODE], si->cursec[CURSEG_COLD_NODE], - si->curzone[CURSEG_COLD_NODE]); + si->curzone[CURSEG_COLD_NODE], + si->dirty_seg[CURSEG_COLD_NODE], + si->full_seg[CURSEG_COLD_NODE], + si->valid_blks[CURSEG_COLD_NODE]); seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n", si->main_area_segs - si->dirty_count - si->prefree_count - si->free_segs, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 397cf813b527..2c6646d1976e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3536,6 +3536,9 @@ struct f2fs_stat_info { int curseg[NR_CURSEG_TYPE]; int cursec[NR_CURSEG_TYPE]; int curzone[NR_CURSEG_TYPE]; + unsigned int dirty_seg[NR_CURSEG_TYPE]; + unsigned int full_seg[NR_CURSEG_TYPE]; + unsigned int valid_blks[NR_CURSEG_TYPE]; unsigned int meta_count[META_MAX]; unsigned int segment_count[2]; From a6d601f30d3bae3429c39a4e0fa290e344b7c57f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 28 Jun 2020 10:58:17 +0800 Subject: [PATCH 13/52] f2fs: fix to wait page writeback before update Filesystem including f2fs should support stable page for special device like software raid, however there is one missing path that page could be updated while it is writeback state as below, fix this. - gc_node_segment - f2fs_move_node_page - __write_node_page - set_page_writeback - do_read_inode - f2fs_init_extent_tree - __f2fs_init_extent_tree i_ext->len = 0; Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 18 +++++++++--------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/inode.c | 3 +-- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index e60078460ad1..686c68b98610 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -325,9 +325,10 @@ static void __drop_largest_extent(struct extent_tree *et, } /* return true, if inode page is changed */ -static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) +static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL; struct extent_tree *et; struct extent_node *en; struct extent_info ei; @@ -335,16 +336,18 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e if (!f2fs_may_extent_tree(inode)) { /* drop largest extent */ if (i_ext && i_ext->len) { + f2fs_wait_on_page_writeback(ipage, NODE, true, true); i_ext->len = 0; - return true; + set_page_dirty(ipage); + return; } - return false; + return; } et = __grab_extent_tree(inode); if (!i_ext || !i_ext->len) - return false; + return; get_extent_info(&ei, i_ext); @@ -360,17 +363,14 @@ static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_e } out: write_unlock(&et->lock); - return false; } -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage) { - bool ret = __f2fs_init_extent_tree(inode, i_ext); + __f2fs_init_extent_tree(inode, ipage); if (!F2FS_I(inode)->extent_tree) set_inode_flag(inode, FI_NO_EXTENT); - - return ret; } static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2c6646d1976e..70565d81320b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3798,7 +3798,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, struct rb_root_cached *root); unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext); +void f2fs_init_extent_tree(struct inode *inode, struct page *ipage); void f2fs_drop_extent_tree(struct inode *inode); unsigned int f2fs_destroy_extent_node(struct inode *inode); void f2fs_destroy_extent_tree(struct inode *inode); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 33affa788588..66969ae852b9 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -367,8 +367,7 @@ static int do_read_inode(struct inode *inode) fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; - if (f2fs_init_extent_tree(inode, &ri->i_ext)) - set_page_dirty(node_page); + f2fs_init_extent_tree(inode, node_page); get_inline_info(inode, ri); From e5cc2c5563a4ac47c426e8d5d084d44fbecb68aa Mon Sep 17 00:00:00 2001 From: Liu Song Date: Thu, 25 Jun 2020 20:40:11 +0800 Subject: [PATCH 14/52] f2fs: fix typo in comment of f2fs_do_add_link stakable/stackable Signed-off-by: Liu Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index d35976785e8c..069f498af1e3 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -779,7 +779,7 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name, return err; /* - * An immature stakable filesystem shows a race condition between lookup + * An immature stackable filesystem shows a race condition between lookup * and create. If we have same task when doing lookup and create, it's * definitely fine as expected by VFS normally. Otherwise, let's just * verify on-disk dentry one more time, which guarantees filesystem From b815bdc7817d5a27410a33475f3401bd47f5854e Mon Sep 17 00:00:00 2001 From: Liu Song Date: Sun, 28 Jun 2020 21:48:13 +0800 Subject: [PATCH 15/52] f2fs: remove useless parameter of __insert_free_nid() In current version, @state will only be FREE_NID. This parameter has no real effect so remove it to keep clean. Signed-off-by: Liu Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 03e24df1c84f..b4f4b0d77553 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2108,7 +2108,7 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, } static int __insert_free_nid(struct f2fs_sb_info *sbi, - struct free_nid *i, enum nid_state state) + struct free_nid *i) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -2116,10 +2116,8 @@ static int __insert_free_nid(struct f2fs_sb_info *sbi, if (err) return err; - f2fs_bug_on(sbi, state != i->state); - nm_i->nid_cnt[state]++; - if (state == FREE_NID) - list_add_tail(&i->list, &nm_i->free_nid_list); + nm_i->nid_cnt[FREE_NID]++; + list_add_tail(&i->list, &nm_i->free_nid_list); return 0; } @@ -2241,7 +2239,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, } } ret = true; - err = __insert_free_nid(sbi, i, FREE_NID); + err = __insert_free_nid(sbi, i); err_out: if (update) { update_free_nid_bitmap(sbi, nid, ret, build); From 250e84d725479914aa6f3feec1191c5f1ea3c057 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 28 Jun 2020 20:29:38 +0800 Subject: [PATCH 16/52] f2fs: fix wrong return value of f2fs_bmap_compress() If compression is disable, we should return zero rather than -EOPNOTSUPP to indicate f2fs_bmap() is not supported. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index dfd322515357..91dc7b598961 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3703,10 +3703,9 @@ static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block) } f2fs_put_dnode(&dn); - return blknr; #else - return -EOPNOTSUPP; + return 0; #endif } From b79b0a310bca6dbe86d8adc1ca6a88e818bc1f19 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 29 Jun 2020 20:13:12 +0800 Subject: [PATCH 17/52] f2fs: support to trace f2fs_bmap() to show f2fs_bmap()'s result as below: f2fs_bmap: dev = (251,0), ino = 7, lblock:0, pblock:396800 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 14 +++++++++++--- include/trace/events/f2fs.h | 26 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 91dc7b598961..c07a50e4d967 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3713,18 +3713,26 @@ static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block) static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; + struct buffer_head tmp = { + .b_size = i_blocksize(inode), + }; + sector_t blknr = 0; if (f2fs_has_inline_data(inode)) - return 0; + goto out; /* make sure allocating whole blocks */ if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) filemap_write_and_wait(mapping); if (f2fs_compressed_file(inode)) - return f2fs_bmap_compress(inode, block); + blknr = f2fs_bmap_compress(inode, block); - return generic_block_bmap(mapping, block, get_data_block_bmap); + if (!get_data_block_bmap(inode, block, &tmp, 0)) + blknr = tmp.b_blocknr; +out: + trace_f2fs_bmap(inode, block, blknr); + return blknr; } #ifdef CONFIG_MIGRATION diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 8639ab962a71..2e8713a52d76 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1891,6 +1891,32 @@ TRACE_EVENT(f2fs_iostat, __entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio) ); +TRACE_EVENT(f2fs_bmap, + + TP_PROTO(struct inode *inode, sector_t lblock, sector_t pblock), + + TP_ARGS(inode, lblock, pblock), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(sector_t, lblock) + __field(sector_t, pblock) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->lblock = lblock; + __entry->pblock = pblock; + ), + + TP_printk("dev = (%d,%d), ino = %lu, lblock:%lld, pblock:%lld", + show_dev_ino(__entry), + (unsigned long long)__entry->lblock, + (unsigned long long)__entry->pblock) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ From dd5a09bd05ae57a5df29dbeea84783081d47bdd1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 29 Jun 2020 20:13:13 +0800 Subject: [PATCH 18/52] f2fs: support to trace f2fs_fiemap() to show f2fs_fiemap()'s result as below: f2fs_fiemap: dev = (251,0), ino = 7, lblock:0, pblock:1625292800, len:2097152, flags:0, ret:0 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +++++- fs/f2fs/inline.c | 2 ++ include/trace/events/f2fs.h | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c07a50e4d967..995cf78b23c5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1813,6 +1813,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, flags |= FIEMAP_EXTENT_LAST; err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); + trace_f2fs_fiemap(inode, 0, phys, len, flags, err); if (err || err == 1) return err; } @@ -1836,8 +1837,10 @@ static int f2fs_xattr_fiemap(struct inode *inode, flags = FIEMAP_EXTENT_LAST; } - if (phys) + if (phys) { err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); + trace_f2fs_fiemap(inode, 0, phys, len, flags, err); + } return (err < 0 ? err : 0); } @@ -1931,6 +1934,7 @@ next: ret = fiemap_fill_next_extent(fieinfo, logical, phys, size, flags); + trace_f2fs_fiemap(inode, logical, phys, size, flags, ret); if (ret) goto out; size = 0; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index dbade310dc79..def4b8481883 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -12,6 +12,7 @@ #include "f2fs.h" #include "node.h" +#include bool f2fs_may_inline_data(struct inode *inode) { @@ -776,6 +777,7 @@ int f2fs_inline_data_fiemap(struct inode *inode, byteaddr += (char *)inline_data_addr(inode, ipage) - (char *)F2FS_INODE(ipage); err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags); + trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err); out: f2fs_put_page(ipage, 1); return err; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 2e8713a52d76..8a1c1311acac 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1917,6 +1917,43 @@ TRACE_EVENT(f2fs_bmap, (unsigned long long)__entry->pblock) ); +TRACE_EVENT(f2fs_fiemap, + + TP_PROTO(struct inode *inode, sector_t lblock, sector_t pblock, + unsigned long long len, unsigned int flags, int ret), + + TP_ARGS(inode, lblock, pblock, len, flags, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(sector_t, lblock) + __field(sector_t, pblock) + __field(unsigned long long, len) + __field(unsigned int, flags) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->lblock = lblock; + __entry->pblock = pblock; + __entry->len = len; + __entry->flags = flags; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, lblock:%lld, pblock:%lld, " + "len:%llu, flags:%u, ret:%d", + show_dev_ino(__entry), + (unsigned long long)__entry->lblock, + (unsigned long long)__entry->pblock, + __entry->len, + __entry->flags, + __entry->ret) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ From d078319d069ef437f8f5d840de13f7d939d26c83 Mon Sep 17 00:00:00 2001 From: Wang Xiaojun Date: Tue, 16 Jun 2020 10:39:24 +0800 Subject: [PATCH 19/52] f2fs: remove the unused compr parameter The parameter compr is unused in the f2fs_cluster_blocks function so we no longer need to pass it as a parameter. Signed-off-by: Wang Xiaojun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 5643aa2b8377..cde6868c77bb 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -821,7 +821,7 @@ static int f2fs_compressed_blocks(struct compress_ctx *cc) } /* return # of valid blocks in compressed cluster */ -static int f2fs_cluster_blocks(struct compress_ctx *cc, bool compr) +static int f2fs_cluster_blocks(struct compress_ctx *cc) { return __f2fs_cluster_blocks(cc, false); } @@ -835,7 +835,7 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size, }; - return f2fs_cluster_blocks(&cc, false); + return f2fs_cluster_blocks(&cc); } static bool cluster_may_compress(struct compress_ctx *cc) @@ -886,7 +886,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc, bool prealloc; retry: - ret = f2fs_cluster_blocks(cc, false); + ret = f2fs_cluster_blocks(cc); if (ret <= 0) return ret; From eb1353cfa9c1e9415b03dc117f8399969fa02102 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 19 Jun 2020 17:14:19 +0800 Subject: [PATCH 20/52] f2fs: fix to check page dirty status before writeback In f2fs_write_raw_pages(), we need to check page dirty status before writeback, because there could be a racer (e.g. reclaimer) helps writebacking the dirty page. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index cde6868c77bb..dcc9f807a647 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1322,6 +1322,12 @@ retry_write: congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); lock_page(cc->rpages[i]); + + if (!PageDirty(cc->rpages[i])) { + unlock_page(cc->rpages[i]); + continue; + } + clear_page_dirty_for_io(cc->rpages[i]); goto retry_write; } From 9a99c17dab8ad89ccf77ced5c7f34d840fda7595 Mon Sep 17 00:00:00 2001 From: Lihong Kou Date: Sat, 20 Jun 2020 10:12:17 +0800 Subject: [PATCH 21/52] f2fs: make trace enter and end in pairs for unlink In the f2fs_unlink we do not add trace end for some error paths, just add. Signed-off-by: Lihong Kou Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e94e02c6580a..a15a2831d43b 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -569,15 +569,17 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) trace_f2fs_unlink_enter(dir, dentry); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; + goto fail; + } err = dquot_initialize(dir); if (err) - return err; + goto fail; err = dquot_initialize(inode); if (err) - return err; + goto fail; de = f2fs_find_entry(dir, &dentry->d_name, &page); if (!de) { From 29b993c7cd2055ffff65f4ffcaa2fc8a5cb2403e Mon Sep 17 00:00:00 2001 From: Yu Changchun Date: Sat, 20 Jun 2020 03:58:29 -0400 Subject: [PATCH 22/52] f2fs: fix an oops in f2fs_is_compressed_page This patch is to fix a crash: #3 [ffffb6580689f898] oops_end at ffffffffa2835bc2 #4 [ffffb6580689f8b8] no_context at ffffffffa28766e7 #5 [ffffb6580689f920] async_page_fault at ffffffffa320135e [exception RIP: f2fs_is_compressed_page+34] RIP: ffffffffa2ba83a2 RSP: ffffb6580689f9d8 RFLAGS: 00010213 RAX: 0000000000000001 RBX: fffffc0f50b34bc0 RCX: 0000000000002122 RDX: 0000000000002123 RSI: 0000000000000c00 RDI: fffffc0f50b34bc0 RBP: ffff97e815a40178 R8: 0000000000000000 R9: ffff97e83ffc9000 R10: 0000000000032300 R11: 0000000000032380 R12: ffffb6580689fa38 R13: fffffc0f50b34bc0 R14: ffff97e825cbd000 R15: 0000000000000c00 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #6 [ffffb6580689f9d8] __is_cp_guaranteed at ffffffffa2b7ea98 #7 [ffffb6580689f9f0] f2fs_submit_page_write at ffffffffa2b81a69 #8 [ffffb6580689fa30] f2fs_do_write_meta_page at ffffffffa2b99777 #9 [ffffb6580689fae0] __f2fs_write_meta_page at ffffffffa2b75f1a #10 [ffffb6580689fb18] f2fs_sync_meta_pages at ffffffffa2b77466 #11 [ffffb6580689fc98] do_checkpoint at ffffffffa2b78e46 #12 [ffffb6580689fd88] f2fs_write_checkpoint at ffffffffa2b79c29 #13 [ffffb6580689fdd0] f2fs_sync_fs at ffffffffa2b69d95 #14 [ffffb6580689fe20] sync_filesystem at ffffffffa2ad2574 #15 [ffffb6580689fe30] generic_shutdown_super at ffffffffa2a9b582 #16 [ffffb6580689fe48] kill_block_super at ffffffffa2a9b6d1 #17 [ffffb6580689fe60] kill_f2fs_super at ffffffffa2b6abe1 #18 [ffffb6580689fea0] deactivate_locked_super at ffffffffa2a9afb6 #19 [ffffb6580689feb8] cleanup_mnt at ffffffffa2abcad4 #20 [ffffb6580689fee0] task_work_run at ffffffffa28bca28 #21 [ffffb6580689ff00] exit_to_usermode_loop at ffffffffa28050b7 #22 [ffffb6580689ff38] do_syscall_64 at ffffffffa280560e #23 [ffffb6580689ff50] entry_SYSCALL_64_after_hwframe at ffffffffa320008c This occurred when umount f2fs if enable F2FS_FS_COMPRESSION with F2FS_IO_TRACE. Fixes it by adding IS_IO_TRACED_PAGE to check validity of pid for page_private. Signed-off-by: Yu Changchun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 7 +++++++ fs/f2fs/f2fs.h | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index dcc9f807a647..b3ecf7d4d6d8 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -49,6 +49,13 @@ bool f2fs_is_compressed_page(struct page *page) return false; if (IS_ATOMIC_WRITTEN_PAGE(page) || IS_DUMMY_WRITTEN_PAGE(page)) return false; + /* + * page->private may be set with pid. + * pid_max is enough to check if it is traced. + */ + if (IS_IO_TRACED_PAGE(page)) + return false; + f2fs_bug_on(F2FS_M_SB(page->mapping), *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC); return true; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 70565d81320b..874b793ab00e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1313,6 +1313,14 @@ enum fsync_mode { #define IS_DUMMY_WRITTEN_PAGE(page) \ (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE) +#ifdef CONFIG_F2FS_IO_TRACE +#define IS_IO_TRACED_PAGE(page) \ + (page_private(page) > 0 && \ + page_private(page) < (unsigned long)PID_MAX_LIMIT) +#else +#define IS_IO_TRACED_PAGE(page) (0) +#endif + #ifdef CONFIG_FS_ENCRYPTION #define DUMMY_ENCRYPTION_ENABLED(sbi) \ (unlikely(F2FS_OPTION(sbi).dummy_enc_ctx.ctx != NULL)) From 9039d8355d6e37647b31a42122a26f1858a2470a Mon Sep 17 00:00:00 2001 From: Yubo Feng Date: Sat, 20 Jun 2020 16:39:43 +0800 Subject: [PATCH 23/52] f2fs: lost matching-pair of trace in f2fs_truncate_inode_blocks if get_node_path() return -E2BIG and trace of f2fs_truncate_inode_blocks_enter/exit enabled then the matching-pair of trace_exit will lost in log. Signed-off-by: Yubo Feng Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b4f4b0d77553..6551d5e35c05 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1041,8 +1041,10 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) trace_f2fs_truncate_inode_blocks_enter(inode, from); level = get_node_path(inode, from, offset, noffset); - if (level < 0) + if (level < 0) { + trace_f2fs_truncate_inode_blocks_exit(inode, level); return level; + } page = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { From 901d745f8e6a61a835b12314d8b8a41df7012596 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Jun 2020 17:38:48 +0800 Subject: [PATCH 24/52] f2fs: split f2fs_allocate_new_segments() to two independent functions: - f2fs_allocate_new_segment() for specified type segment allocation - f2fs_allocate_new_segments() for all data type segments allocation Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/file.c | 2 +- fs/f2fs/recovery.c | 2 +- fs/f2fs/segment.c | 41 ++++++++++++++++++++++++----------------- 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 874b793ab00e..4f1f1a3e3334 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3335,7 +3335,8 @@ void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type); +void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type); +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6ef2656c6171..1af37f3b3309 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1658,7 +1658,7 @@ next_alloc: map.m_seg_type = CURSEG_COLD_DATA_PINNED; f2fs_lock_op(sbi); - f2fs_allocate_new_segments(sbi, CURSEG_COLD_DATA); + f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA); f2fs_unlock_op(sbi); err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ae5310f02e7f..af974ba273b3 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -742,7 +742,7 @@ next: f2fs_put_page(page, 1); } if (!err) - f2fs_allocate_new_segments(sbi, NO_CHECK_TYPE); + f2fs_allocate_new_segments(sbi); return err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5aa4158a1440..c35614d255e1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2733,28 +2733,35 @@ unlock: up_read(&SM_I(sbi)->curseg_lock); } -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type) +static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type) { - struct curseg_info *curseg; + struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int old_segno; + + if (!curseg->next_blkoff && + !get_valid_blocks(sbi, curseg->segno, false) && + !get_ckpt_valid_blocks(sbi, curseg->segno)) + return; + + old_segno = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); + locate_dirty_segment(sbi, old_segno); +} + +void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type) +{ + down_write(&SIT_I(sbi)->sentry_lock); + __allocate_new_segment(sbi, type); + up_write(&SIT_I(sbi)->sentry_lock); +} + +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) +{ int i; down_write(&SIT_I(sbi)->sentry_lock); - - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { - if (type != NO_CHECK_TYPE && i != type) - continue; - - curseg = CURSEG_I(sbi, i); - if (type == NO_CHECK_TYPE || curseg->next_blkoff || - get_valid_blocks(sbi, curseg->segno, false) || - get_ckpt_valid_blocks(sbi, curseg->segno)) { - old_segno = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); - locate_dirty_segment(sbi, old_segno); - } - } - + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) + __allocate_new_segment(sbi, i); up_write(&SIT_I(sbi)->sentry_lock); } From b7973091f0b205260fa819f85298f0ae1297cf84 Mon Sep 17 00:00:00 2001 From: Jia Yang Date: Wed, 1 Jul 2020 10:27:40 +0800 Subject: [PATCH 25/52] f2fs: add parameter op_flag in f2fs_submit_page_read() The parameter op_flag is not used in f2fs_get_read_data_page(), but it is used in f2fs_grab_read_bio(). Obviously, op_flag is not passed to f2fs_grab_read_bio() successfully. We need to add parameter in f2fs_submit_page_read() to pass it. The case: - gc_data_segment - f2fs_get_read_data_page(.., op_flag = REQ_RAHEAD,..) - f2fs_submit_page_read - f2fs_grab_read_bio(.., op_flag = 0, ..) Signed-off-by: Jia Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 995cf78b23c5..20f31ff47e05 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1025,12 +1025,13 @@ static void f2fs_release_read_bio(struct bio *bio) /* This can handle encryption stuffs */ static int f2fs_submit_page_read(struct inode *inode, struct page *page, - block_t blkaddr, bool for_write) + block_t blkaddr, int op_flags, bool for_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; - bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index, for_write); + bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags, + page->index, for_write); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -1217,7 +1218,8 @@ got_it: return page; } - err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, for_write); + err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, + op_flags, for_write); if (err) goto put_err; return page; @@ -3398,7 +3400,7 @@ repeat: err = -EFSCORRUPTED; goto fail; } - err = f2fs_submit_page_read(inode, page, blkaddr, true); + err = f2fs_submit_page_read(inode, page, blkaddr, 0, true); if (err) goto fail; From d7cd3702ca95b1e957493de8f962b47a87b4c6aa Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 1 Jul 2020 10:27:09 +0800 Subject: [PATCH 26/52] f2fs: fix return value of move_data_block() If f2fs_grab_cache_page() fails, it needs to return -ENOMEM. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6eec3b2d606d..9a40761445d3 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -849,8 +849,10 @@ static int move_data_block(struct inode *inode, block_t bidx, mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi), fio.old_blkaddr, false); - if (!mpage) + if (!mpage) { + err = -ENOMEM; goto up_out; + } fio.encrypted_page = mpage; From 6b12367da22f1c9d4d0073f8d802f616c735f95b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 22 Jun 2020 23:01:05 -0700 Subject: [PATCH 27/52] f2fs: avoid readahead race condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If two readahead threads having same offset enter in readpages, every read IOs are split and issued to the disk which giving lower bandwidth. This patch tries to avoid redundant readahead calls. Fixes one build error reported by Randy. Fix build error when F2FS_FS_COMPRESSION is not set/enabled. This label is needed in either case. ../fs/f2fs/data.c: In function ‘f2fs_mpage_readpages’: ../fs/f2fs/data.c:2327:5: error: label ‘next_page’ used but not defined goto next_page; Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 20 ++++++++++++++++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 2 ++ 3 files changed, 23 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 20f31ff47e05..44645f4f914b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2298,6 +2298,7 @@ static int f2fs_mpage_readpages(struct inode *inode, unsigned nr_pages = rac ? readahead_count(rac) : 1; unsigned max_nr_pages = nr_pages; int ret = 0; + bool drop_ra = false; map.m_pblk = 0; map.m_lblk = 0; @@ -2308,10 +2309,26 @@ static int f2fs_mpage_readpages(struct inode *inode, map.m_seg_type = NO_CHECK_TYPE; map.m_may_create = false; + /* + * Two readahead threads for same address range can cause race condition + * which fragments sequential read IOs. So let's avoid each other. + */ + if (rac && readahead_count(rac)) { + if (READ_ONCE(F2FS_I(inode)->ra_offset) == readahead_index(rac)) + drop_ra = true; + else + WRITE_ONCE(F2FS_I(inode)->ra_offset, + readahead_index(rac)); + } + for (; nr_pages; nr_pages--) { if (rac) { page = readahead_page(rac); prefetchw(&page->flags); + if (drop_ra) { + f2fs_put_page(page, 1); + continue; + } } #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -2374,6 +2391,9 @@ next_page: } if (bio) __submit_bio(F2FS_I_SB(inode), bio, DATA); + + if (rac && readahead_count(rac) && !drop_ra) + WRITE_ONCE(F2FS_I(inode)->ra_offset, -1); return ret; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4f1f1a3e3334..c98a9182862b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -794,6 +794,7 @@ struct f2fs_inode_info { struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct task_struct *inmem_task; /* store inmemory task */ struct mutex inmem_lock; /* lock for inmemory pages */ + pgoff_t ra_offset; /* ongoing readahead offset */ struct extent_tree *extent_tree; /* cached extent_tree entry */ /* avoid racing between foreground op and gc */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 732652205737..80cb7cd358f8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1015,6 +1015,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; + fi->ra_offset = -1; + return &fi->vfs_inode; } From 0e5e81114de1c4ac32e36f057256653a04f27a33 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Thu, 2 Jul 2020 13:14:14 +0900 Subject: [PATCH 28/52] f2fs: add GC_URGENT_LOW mode in gc_urgent Added a new gc_urgent mode, GC_URGENT_LOW, in which mode F2FS will lower the bar of checking idle in order to process outstanding discard commands and GC a little bit aggressively. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 4 +++- fs/f2fs/f2fs.h | 10 ++++++++-- fs/f2fs/gc.c | 6 +++--- fs/f2fs/segment.c | 4 ++-- fs/f2fs/sysfs.c | 10 +++++++--- 5 files changed, 23 insertions(+), 11 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 4bb93a06d8ab..7f730c4c8df2 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -229,7 +229,9 @@ Date: August 2017 Contact: "Jaegeuk Kim" Description: Do background GC agressively when set. When gc_urgent = 1, background thread starts to do GC by given gc_urgent_sleep_time - interval. It is set to 0 by default. + interval. When gc_urgent = 2, F2FS will lower the bar of + checking idle in order to process outstanding discard commands + and GC a little bit aggressively. It is set to 0 by default. What: /sys/fs/f2fs//gc_urgent_sleep_time Date: August 2017 diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c98a9182862b..b747223860d5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1268,7 +1268,8 @@ enum { GC_NORMAL, GC_IDLE_CB, GC_IDLE_GREEDY, - GC_URGENT, + GC_URGENT_HIGH, + GC_URGENT_LOW, }; enum { @@ -1525,6 +1526,7 @@ struct f2fs_sb_info { unsigned int cur_victim_sec; /* current victim section num */ unsigned int gc_mode; /* current GC state */ unsigned int next_victim_seg[2]; /* next segment in victim section */ + /* for skip statistic */ unsigned int atomic_files; /* # of opened atomic file */ unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */ @@ -2465,7 +2467,7 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { - if (sbi->gc_mode == GC_URGENT) + if (sbi->gc_mode == GC_URGENT_HIGH) return true; if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || @@ -2483,6 +2485,10 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) atomic_read(&SM_I(sbi)->fcc_info->queued_flush)) return false; + if (sbi->gc_mode == GC_URGENT_LOW && + (type == DISCARD_TIME || type == GC_TIME)) + return true; + return f2fs_time_over(sbi, type); } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9a40761445d3..11b4adde9baf 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -82,7 +82,7 @@ static int gc_thread_func(void *data) * invalidated soon after by user update or deletion. * So, I'd like to wait some time to collect dirty segments. */ - if (sbi->gc_mode == GC_URGENT) { + if (sbi->gc_mode == GC_URGENT_HIGH) { wait_ms = gc_th->urgent_sleep_time; down_write(&sbi->gc_lock); goto do_gc; @@ -176,7 +176,7 @@ static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type) gc_mode = GC_CB; break; case GC_IDLE_GREEDY: - case GC_URGENT: + case GC_URGENT_HIGH: gc_mode = GC_GREEDY; break; } @@ -211,7 +211,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, * foreground GC and urgent GC cases. */ if (gc_type != FG_GC && - (sbi->gc_mode != GC_URGENT) && + (sbi->gc_mode != GC_URGENT_HIGH) && p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c35614d255e1..40dd29cfc83d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -174,7 +174,7 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi) if (f2fs_lfs_mode(sbi)) return false; - if (sbi->gc_mode == GC_URGENT) + if (sbi->gc_mode == GC_URGENT_HIGH) return true; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return true; @@ -1759,7 +1759,7 @@ static int issue_discard_thread(void *data) continue; } - if (sbi->gc_mode == GC_URGENT) + if (sbi->gc_mode == GC_URGENT_HIGH) __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); sb_start_intwrite(sbi->sb); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index e877c59b9fdb..2a140657fc4d 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -350,16 +350,20 @@ out: return -EINVAL; if (!strcmp(a->attr.name, "gc_urgent")) { - if (t >= 1) { - sbi->gc_mode = GC_URGENT; + if (t == 0) { + sbi->gc_mode = GC_NORMAL; + } else if (t == 1) { + sbi->gc_mode = GC_URGENT_HIGH; if (sbi->gc_thread) { sbi->gc_thread->gc_wake = 1; wake_up_interruptible_all( &sbi->gc_thread->gc_wait_queue_head); wake_up_discard_thread(sbi, true); } + } else if (t == 2) { + sbi->gc_mode = GC_URGENT_LOW; } else { - sbi->gc_mode = GC_NORMAL; + return -EINVAL; } return count; } From ffcde4b29a5f20ddca6fe559b48f345818bf1d91 Mon Sep 17 00:00:00 2001 From: Dehe Gu Date: Fri, 3 Jul 2020 17:51:29 +0800 Subject: [PATCH 29/52] f2fs: remove write attribute of main_blkaddr sysfs node Fuzzing main_blkaddr sysfs node will corrupt this field's value, causing kernel panic, remove its write attribute to avoid potential security risk. [Chao Yu: add description] Signed-off-by: Dehe Gu Signed-off-by: Daiyue Zhang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 2a140657fc4d..50524401c8e6 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -223,6 +223,13 @@ static ssize_t avg_vblocks_show(struct f2fs_attr *a, } #endif +static ssize_t main_blkaddr_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)MAIN_BLKADDR(sbi)); +} + static ssize_t f2fs_sbi_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -526,7 +533,6 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, main_blkaddr, main_blkaddr); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); @@ -569,6 +575,7 @@ F2FS_GENERAL_RO_ATTR(current_reserved_blocks); F2FS_GENERAL_RO_ATTR(unusable); F2FS_GENERAL_RO_ATTR(encoding); F2FS_GENERAL_RO_ATTR(mounted_time_sec); +F2FS_GENERAL_RO_ATTR(main_blkaddr); #ifdef CONFIG_F2FS_STAT_FS F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count); F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count); From f567adb0342e125903cbe77adcf1e09b53acda1f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 3 Jul 2020 16:40:11 +0800 Subject: [PATCH 30/52] f2fs: fix to wait GCed compressed page writeback like we did for encrypted page. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index b3ecf7d4d6d8..3a847bc36748 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1152,6 +1152,13 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, f2fs_set_compressed_page(cc->cpages[i], inode, cc->rpages[i + 1]->index, cic); fio.compressed_page = cc->cpages[i]; + + fio.old_blkaddr = data_blkaddr(dn.inode, dn.node_page, + dn.ofs_in_node + i + 1); + + /* wait for GCed page writeback via META_MAPPING */ + f2fs_wait_on_block_writeback(inode, fio.old_blkaddr); + if (fio.encrypted) { fio.page = cc->rpages[i + 1]; err = f2fs_encrypt_one_page(&fio); From 4fc781a3eb3b4955c03c2009e033da3b5f5adb5d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 3 Jul 2020 16:39:09 +0800 Subject: [PATCH 31/52] f2fs: fix wrong description of compress feature in f2fs.rst f2fs will try compressing data in cluster only when "all logical blocks in cluster contain valid data" rather than "all logical blocks in file are valid". Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 535021c46260..de43239a3c31 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -739,8 +739,8 @@ Compression implementation - In order to eliminate write amplification during overwrite, F2FS only support compression on write-once file, data can be compressed only when - all logical blocks in file are valid and cluster compress ratio is lower - than specified threshold. + all logical blocks in cluster contain valid data and compress ratio of + cluster data is lower than specified threshold. - To enable compression on regular inode, there are three ways: From 9627a7b31f3c4ff8bc8f3be3683983ffe6eaebe6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 6 Jul 2020 18:23:36 +0800 Subject: [PATCH 32/52] f2fs: fix error path in do_recover_data() - don't panic kernel if f2fs_get_node_page() fails in f2fs_recover_inline_data() or f2fs_recover_inline_xattr(); - return error number of f2fs_truncate_blocks() to f2fs_recover_inline_data()'s caller; Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/inline.c | 19 ++++++++++++------- fs/f2fs/node.c | 6 ++++-- fs/f2fs/recovery.c | 10 ++++++++-- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b747223860d5..f880b47a6671 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3302,7 +3302,7 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); -void f2fs_recover_inline_xattr(struct inode *inode, struct page *page); +int f2fs_recover_inline_xattr(struct inode *inode, struct page *page); int f2fs_recover_xattr_data(struct inode *inode, struct page *page); int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, @@ -3769,7 +3769,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); int f2fs_convert_inline_inode(struct inode *inode); int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry); int f2fs_write_inline_data(struct inode *inode, struct page *page); -bool f2fs_recover_inline_data(struct inode *inode, struct page *npage); +int f2fs_recover_inline_data(struct inode *inode, struct page *npage); struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, const struct f2fs_filename *fname, struct page **res_page); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index def4b8481883..102df444f623 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -254,7 +254,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) return 0; } -bool f2fs_recover_inline_data(struct inode *inode, struct page *npage) +int f2fs_recover_inline_data(struct inode *inode, struct page *npage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode *ri = NULL; @@ -276,7 +276,8 @@ bool f2fs_recover_inline_data(struct inode *inode, struct page *npage) ri && (ri->i_inline & F2FS_INLINE_DATA)) { process_inline: ipage = f2fs_get_node_page(sbi, inode->i_ino); - f2fs_bug_on(sbi, IS_ERR(ipage)); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); f2fs_wait_on_page_writeback(ipage, NODE, true, true); @@ -289,21 +290,25 @@ process_inline: set_page_dirty(ipage); f2fs_put_page(ipage, 1); - return true; + return 1; } if (f2fs_has_inline_data(inode)) { ipage = f2fs_get_node_page(sbi, inode->i_ino); - f2fs_bug_on(sbi, IS_ERR(ipage)); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); f2fs_truncate_inline_inode(inode, ipage, 0); clear_inode_flag(inode, FI_INLINE_DATA); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { - if (f2fs_truncate_blocks(inode, 0, false)) - return false; + int ret; + + ret = f2fs_truncate_blocks(inode, 0, false); + if (ret) + return ret; goto process_inline; } - return false; + return 0; } struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6551d5e35c05..85ebdd0e3e7c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2572,7 +2572,7 @@ int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) return nr - nr_shrink; } -void f2fs_recover_inline_xattr(struct inode *inode, struct page *page) +int f2fs_recover_inline_xattr(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; size_t inline_size; @@ -2580,7 +2580,8 @@ void f2fs_recover_inline_xattr(struct inode *inode, struct page *page) struct f2fs_inode *ri; ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); - f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage)); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); ri = F2FS_INODE(page); if (ri->i_inline & F2FS_INLINE_XATTR) { @@ -2599,6 +2600,7 @@ void f2fs_recover_inline_xattr(struct inode *inode, struct page *page) update_inode: f2fs_update_inode(inode, ipage); f2fs_put_page(ipage, 1); + return 0; } int f2fs_recover_xattr_data(struct inode *inode, struct page *page) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index af974ba273b3..4f12ade6410a 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -544,7 +544,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* step 1: recover xattr */ if (IS_INODE(page)) { - f2fs_recover_inline_xattr(inode, page); + err = f2fs_recover_inline_xattr(inode, page); + if (err) + goto out; } else if (f2fs_has_xattr_block(ofs_of_node(page))) { err = f2fs_recover_xattr_data(inode, page); if (!err) @@ -553,8 +555,12 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } /* step 2: recover inline data */ - if (f2fs_recover_inline_data(inode, page)) + err = f2fs_recover_inline_data(inode, page); + if (err) { + if (err == 1) + err = 0; goto out; + } /* step 3: recover data indices */ start = f2fs_start_bidx_of_node(ofs_of_node(page), inode); From aff6fbbe8e1b62af381f443291fd15fd16e17805 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 7 Jul 2020 09:35:41 +0800 Subject: [PATCH 33/52] f2fs: don't keep meta inode pages used for compressed block migration meta inode's pages are used for encrypted, verity and compressed blocks, so the meta inode's cache invalidation condition in do_checkpoint() should consider compression as well, not just for verity and encryption, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 236064930251..8f5753cc3037 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1515,9 +1515,10 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* * invalidate intermediate page cache borrowed from meta inode which are - * used for migration of encrypted or verity inode's blocks. + * used for migration of encrypted, verity or compressed inode's blocks. */ - if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi)) + if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) || + f2fs_sb_has_compression(sbi)) invalidate_mapping_pages(META_MAPPING(sbi), MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); From 3357af8f1a09731b75628f72de76ea8e3c9cbd7e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 14 Jul 2020 15:18:12 -0700 Subject: [PATCH 34/52] f2fs: use generic names for generic ioctls Don't define F2FS_IOC_* aliases to ioctls that already have a generic FS_IOC_* name. These aliases are unnecessary, and they make it unclear which ioctls are f2fs-specific and which are generic. Signed-off-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 25 +---------------------- fs/f2fs/file.c | 54 +++++++++++++++++++++++++------------------------- 2 files changed, 28 insertions(+), 51 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f880b47a6671..163fd5fadf11 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -402,12 +402,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, } /* - * ioctl commands + * f2fs-specific ioctl commands */ -#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS -#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS -#define F2FS_IOC_GETVERSION FS_IOC_GETVERSION - #define F2FS_IOCTL_MAGIC 0xf5 #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) @@ -435,13 +431,6 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_RESERVE_COMPRESS_BLOCKS \ _IOR(F2FS_IOCTL_MAGIC, 19, __u64) -#define F2FS_IOC_GET_VOLUME_NAME FS_IOC_GETFSLABEL -#define F2FS_IOC_SET_VOLUME_NAME FS_IOC_SETFSLABEL - -#define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY -#define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY -#define F2FS_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT - /* * should be same as XFS_IOC_GOINGDOWN. * Flags for going down operation used by FS_IOC_GOINGDOWN @@ -453,18 +442,6 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */ #define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */ -#if defined(__KERNEL__) && defined(CONFIG_COMPAT) -/* - * ioctl commands in 32 bit emulation - */ -#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS -#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS -#define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION -#endif - -#define F2FS_IOC_FSGETXATTR FS_IOC_FSGETXATTR -#define F2FS_IOC_FSSETXATTR FS_IOC_FSSETXATTR - struct f2fs_gc_range { u32 sync; u64 start; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1af37f3b3309..521987cd8772 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3362,7 +3362,7 @@ static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg) return fsverity_ioctl_measure(filp, (void __user *)arg); } -static int f2fs_get_volume_name(struct file *filp, unsigned long arg) +static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -3388,7 +3388,7 @@ static int f2fs_get_volume_name(struct file *filp, unsigned long arg) return err; } -static int f2fs_set_volume_name(struct file *filp, unsigned long arg) +static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -3767,11 +3767,11 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -ENOSPC; switch (cmd) { - case F2FS_IOC_GETFLAGS: + case FS_IOC_GETFLAGS: return f2fs_ioc_getflags(filp, arg); - case F2FS_IOC_SETFLAGS: + case FS_IOC_SETFLAGS: return f2fs_ioc_setflags(filp, arg); - case F2FS_IOC_GETVERSION: + case FS_IOC_GETVERSION: return f2fs_ioc_getversion(filp, arg); case F2FS_IOC_START_ATOMIC_WRITE: return f2fs_ioc_start_atomic_write(filp); @@ -3787,11 +3787,11 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_shutdown(filp, arg); case FITRIM: return f2fs_ioc_fitrim(filp, arg); - case F2FS_IOC_SET_ENCRYPTION_POLICY: + case FS_IOC_SET_ENCRYPTION_POLICY: return f2fs_ioc_set_encryption_policy(filp, arg); - case F2FS_IOC_GET_ENCRYPTION_POLICY: + case FS_IOC_GET_ENCRYPTION_POLICY: return f2fs_ioc_get_encryption_policy(filp, arg); - case F2FS_IOC_GET_ENCRYPTION_PWSALT: + case FS_IOC_GET_ENCRYPTION_PWSALT: return f2fs_ioc_get_encryption_pwsalt(filp, arg); case FS_IOC_GET_ENCRYPTION_POLICY_EX: return f2fs_ioc_get_encryption_policy_ex(filp, arg); @@ -3819,9 +3819,9 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_flush_device(filp, arg); case F2FS_IOC_GET_FEATURES: return f2fs_ioc_get_features(filp, arg); - case F2FS_IOC_FSGETXATTR: + case FS_IOC_FSGETXATTR: return f2fs_ioc_fsgetxattr(filp, arg); - case F2FS_IOC_FSSETXATTR: + case FS_IOC_FSSETXATTR: return f2fs_ioc_fssetxattr(filp, arg); case F2FS_IOC_GET_PIN_FILE: return f2fs_ioc_get_pin_file(filp, arg); @@ -3835,10 +3835,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_enable_verity(filp, arg); case FS_IOC_MEASURE_VERITY: return f2fs_ioc_measure_verity(filp, arg); - case F2FS_IOC_GET_VOLUME_NAME: - return f2fs_get_volume_name(filp, arg); - case F2FS_IOC_SET_VOLUME_NAME: - return f2fs_set_volume_name(filp, arg); + case FS_IOC_GETFSLABEL: + return f2fs_ioc_getfslabel(filp, arg); + case FS_IOC_SETFSLABEL: + return f2fs_ioc_setfslabel(filp, arg); case F2FS_IOC_GET_COMPRESS_BLOCKS: return f2fs_get_compress_blocks(filp, arg); case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: @@ -3969,14 +3969,14 @@ out: long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { switch (cmd) { - case F2FS_IOC32_GETFLAGS: - cmd = F2FS_IOC_GETFLAGS; + case FS_IOC32_GETFLAGS: + cmd = FS_IOC_GETFLAGS; break; - case F2FS_IOC32_SETFLAGS: - cmd = F2FS_IOC_SETFLAGS; + case FS_IOC32_SETFLAGS: + cmd = FS_IOC_SETFLAGS; break; - case F2FS_IOC32_GETVERSION: - cmd = F2FS_IOC_GETVERSION; + case FS_IOC32_GETVERSION: + cmd = FS_IOC_GETVERSION; break; case F2FS_IOC_START_ATOMIC_WRITE: case F2FS_IOC_COMMIT_ATOMIC_WRITE: @@ -3985,9 +3985,9 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_ABORT_VOLATILE_WRITE: case F2FS_IOC_SHUTDOWN: case FITRIM: - case F2FS_IOC_SET_ENCRYPTION_POLICY: - case F2FS_IOC_GET_ENCRYPTION_PWSALT: - case F2FS_IOC_GET_ENCRYPTION_POLICY: + case FS_IOC_SET_ENCRYPTION_POLICY: + case FS_IOC_GET_ENCRYPTION_PWSALT: + case FS_IOC_GET_ENCRYPTION_POLICY: case FS_IOC_GET_ENCRYPTION_POLICY_EX: case FS_IOC_ADD_ENCRYPTION_KEY: case FS_IOC_REMOVE_ENCRYPTION_KEY: @@ -4001,16 +4001,16 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_MOVE_RANGE: case F2FS_IOC_FLUSH_DEVICE: case F2FS_IOC_GET_FEATURES: - case F2FS_IOC_FSGETXATTR: - case F2FS_IOC_FSSETXATTR: + case FS_IOC_FSGETXATTR: + case FS_IOC_FSSETXATTR: case F2FS_IOC_GET_PIN_FILE: case F2FS_IOC_SET_PIN_FILE: case F2FS_IOC_PRECACHE_EXTENTS: case F2FS_IOC_RESIZE_FS: case FS_IOC_ENABLE_VERITY: case FS_IOC_MEASURE_VERITY: - case F2FS_IOC_GET_VOLUME_NAME: - case F2FS_IOC_SET_VOLUME_NAME: + case FS_IOC_GETFSLABEL: + case FS_IOC_SETFSLABEL: case F2FS_IOC_GET_COMPRESS_BLOCKS: case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: From 02772fbfcba8597eef9d5c5f7f94087132d0c1d4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 20 Jul 2020 16:52:50 +0800 Subject: [PATCH 35/52] f2fs: compress: fix to avoid memory leak on cc->cpages Memory allocated for storing compressed pages' poitner should be released after f2fs_write_compressed_pages(), otherwise it will cause memory leak issue. Signed-off-by: Chao Yu Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 3a847bc36748..a20c9f3272af 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1385,6 +1385,8 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, err = f2fs_write_compressed_pages(cc, submitted, wbc, io_type); cops->destroy_compress_ctx(cc); + kfree(cc->cpages); + cc->cpages = NULL; if (!err) return 0; f2fs_bug_on(F2FS_I_SB(cc->inode), err != -EAGAIN); From 887e03739130a5fddadfd6dfe9323a6f117ef582 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 19 Jul 2020 17:13:44 -0700 Subject: [PATCH 36/52] f2fs: segment.h: delete a duplicated word Drop the repeated word "the" in a comment. Signed-off-by: Randy Dunlap Cc: Jaegeuk Kim Cc: Chao Yu Cc: linux-f2fs-devel@lists.sourceforge.net Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index f261e3e6a69b..752b177073b2 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -187,7 +187,7 @@ struct seg_entry { unsigned char *cur_valid_map_mir; /* mirror of current valid bitmap */ #endif /* - * # of valid blocks and the validity bitmap stored in the the last + * # of valid blocks and the validity bitmap stored in the last * checkpoint pack. This information is used by the SSR mode. */ unsigned char *ckpt_valid_map; /* validity bitmap of blocks last cp */ From b0f3b87fb3abc42c81d76c6c5795f26dbdb2f04b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 16 Jul 2020 09:57:03 -0700 Subject: [PATCH 37/52] f2fs: should avoid inode eviction in synchronous path https://bugzilla.kernel.org/show_bug.cgi?id=208565 PID: 257 TASK: ecdd0000 CPU: 0 COMMAND: "init" #0 [] (__schedule) from [] #1 [] (schedule) from [] #2 [] (rwsem_down_read_failed) from [] #3 [] (down_read) from [] #4 [] (f2fs_truncate_blocks) from [] #5 [] (f2fs_truncate) from [] #6 [] (f2fs_evict_inode) from [] #7 [] (evict) from [] #8 [] (iput) from [] #9 [] (f2fs_sync_node_pages) from [] #10 [] (f2fs_write_checkpoint) from [] #11 [] (f2fs_sync_fs) from [] #12 [] (f2fs_do_sync_file) from [] #13 [] (f2fs_sync_file) from [] #14 [] (vfs_fsync_range) from [] #15 [] (do_fsync) from [] #16 [] (sys_fsync) from [] This can be caused by flush_dirty_inode() in f2fs_sync_node_pages() where iput() requires f2fs_lock_op() again resulting in livelock. Reported-by: Zhiguo Niu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 85ebdd0e3e7c..3ffe8d83f29f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1926,8 +1926,12 @@ continue_unlock: goto continue_unlock; } - /* flush inline_data, if it's async context. */ - if (do_balance && is_inline_node(page)) { + /* flush inline_data/inode, if it's async context. */ + if (!do_balance) + goto write_node; + + /* flush inline_data */ + if (is_inline_node(page)) { clear_inline_node(page); unlock_page(page); flush_inline_data(sbi, ino_of_node(page)); @@ -1940,7 +1944,7 @@ continue_unlock: if (flush_dirty_inode(page)) goto lock_node; } - +write_node: f2fs_wait_on_page_writeback(page, NODE, true, true); if (!clear_page_dirty_for_io(page)) From 9af846486d781a63de025a5f502c515268e48790 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Tue, 21 Jul 2020 12:21:11 +0900 Subject: [PATCH 38/52] f2fs: add F2FS_IOC_SEC_TRIM_FILE ioctl Added a new ioctl to send discard commands or/and zero out to selected data area of a regular file for security reason. The way of handling range.len of F2FS_IOC_SEC_TRIM_FILE: 1. Added -1 value support for range.len to secure trim the whole blocks starting from range.start regardless of i_size. 2. If the end of the range passes over the end of file, it means until the end of file (i_size). 3. ignored the case of that range.len is zero to prevent the function from making end_addr zero and triggering different behaviour of the function. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 15 ++++ fs/f2fs/file.c | 191 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 163fd5fadf11..3d9dd32a176a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -430,6 +430,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, _IOR(F2FS_IOCTL_MAGIC, 18, __u64) #define F2FS_IOC_RESERVE_COMPRESS_BLOCKS \ _IOR(F2FS_IOCTL_MAGIC, 19, __u64) +#define F2FS_IOC_SEC_TRIM_FILE _IOW(F2FS_IOCTL_MAGIC, 20, \ + struct f2fs_sectrim_range) /* * should be same as XFS_IOC_GOINGDOWN. @@ -442,6 +444,13 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */ #define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */ +/* + * Flags used by F2FS_IOC_SEC_TRIM_FILE + */ +#define F2FS_TRIM_FILE_DISCARD 0x1 /* send discard command */ +#define F2FS_TRIM_FILE_ZEROOUT 0x2 /* zero out */ +#define F2FS_TRIM_FILE_MASK 0x3 + struct f2fs_gc_range { u32 sync; u64 start; @@ -465,6 +474,12 @@ struct f2fs_flush_device { u32 segments; /* # of segments to flush */ }; +struct f2fs_sectrim_range { + u64 start; + u64 len; + u64 flags; +}; + /* for inline stuff */ #define DEF_INLINE_RESERVED_SIZE 1 static inline int get_extra_isize(struct inode *inode); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 521987cd8772..cc7f5670390f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -3759,6 +3760,193 @@ out: return ret; } +static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode, + pgoff_t off, block_t block, block_t len, u32 flags) +{ + struct request_queue *q = bdev_get_queue(bdev); + sector_t sector = SECTOR_FROM_BLOCK(block); + sector_t nr_sects = SECTOR_FROM_BLOCK(len); + int ret = 0; + + if (!q) + return -ENXIO; + + if (flags & F2FS_TRIM_FILE_DISCARD) + ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS, + blk_queue_secure_erase(q) ? + BLKDEV_DISCARD_SECURE : 0); + + if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) { + if (IS_ENCRYPTED(inode)) + ret = fscrypt_zeroout_range(inode, off, block, len); + else + ret = blkdev_issue_zeroout(bdev, sector, nr_sects, + GFP_NOFS, 0); + } + + return ret; +} + +static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct address_space *mapping = inode->i_mapping; + struct block_device *prev_bdev = NULL; + struct f2fs_sectrim_range range; + pgoff_t index, pg_end, prev_index = 0; + block_t prev_block = 0, len = 0; + loff_t end_addr; + bool to_end = false; + int ret = 0; + + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + + if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg, + sizeof(range))) + return -EFAULT; + + if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) || + !S_ISREG(inode->i_mode)) + return -EINVAL; + + if (((range.flags & F2FS_TRIM_FILE_DISCARD) && + !f2fs_hw_support_discard(sbi)) || + ((range.flags & F2FS_TRIM_FILE_ZEROOUT) && + IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi))) + return -EOPNOTSUPP; + + file_start_write(filp); + inode_lock(inode); + + if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) || + range.start >= inode->i_size) { + ret = -EINVAL; + goto err; + } + + if (range.len == 0) + goto err; + + if (inode->i_size - range.start > range.len) { + end_addr = range.start + range.len; + } else { + end_addr = range.len == (u64)-1 ? + sbi->sb->s_maxbytes : inode->i_size; + to_end = true; + } + + if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) || + (!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) { + ret = -EINVAL; + goto err; + } + + index = F2FS_BYTES_TO_BLK(range.start); + pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE); + + ret = f2fs_convert_inline_inode(inode); + if (ret) + goto err; + + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); + + ret = filemap_write_and_wait_range(mapping, range.start, + to_end ? LLONG_MAX : end_addr - 1); + if (ret) + goto out; + + truncate_inode_pages_range(mapping, range.start, + to_end ? -1 : end_addr - 1); + + while (index < pg_end) { + struct dnode_of_data dn; + pgoff_t end_offset, count; + int i; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); + if (ret) { + if (ret == -ENOENT) { + index = f2fs_get_next_page_offset(&dn, index); + continue; + } + goto out; + } + + end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + count = min(end_offset - dn.ofs_in_node, pg_end - index); + for (i = 0; i < count; i++, index++, dn.ofs_in_node++) { + struct block_device *cur_bdev; + block_t blkaddr = f2fs_data_blkaddr(&dn); + + if (!__is_valid_data_blkaddr(blkaddr)) + continue; + + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, + DATA_GENERIC_ENHANCE)) { + ret = -EFSCORRUPTED; + f2fs_put_dnode(&dn); + goto out; + } + + cur_bdev = f2fs_target_device(sbi, blkaddr, NULL); + if (f2fs_is_multi_device(sbi)) { + int di = f2fs_target_device_index(sbi, blkaddr); + + blkaddr -= FDEV(di).start_blk; + } + + if (len) { + if (prev_bdev == cur_bdev && + index == prev_index + len && + blkaddr == prev_block + len) { + len++; + } else { + ret = f2fs_secure_erase(prev_bdev, + inode, prev_index, prev_block, + len, range.flags); + if (ret) { + f2fs_put_dnode(&dn); + goto out; + } + + len = 0; + } + } + + if (!len) { + prev_bdev = cur_bdev; + prev_index = index; + prev_block = blkaddr; + len = 1; + } + } + + f2fs_put_dnode(&dn); + + if (fatal_signal_pending(current)) { + ret = -EINTR; + goto out; + } + cond_resched(); + } + + if (len) + ret = f2fs_secure_erase(prev_bdev, inode, prev_index, + prev_block, len, range.flags); +out: + up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); +err: + inode_unlock(inode); + file_end_write(filp); + + return ret; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -3845,6 +4033,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_release_compress_blocks(filp, arg); case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: return f2fs_reserve_compress_blocks(filp, arg); + case F2FS_IOC_SEC_TRIM_FILE: + return f2fs_sec_trim_file(filp, arg); default: return -ENOTTY; } @@ -4014,6 +4204,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GET_COMPRESS_BLOCKS: case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: + case F2FS_IOC_SEC_TRIM_FILE: break; default: return -ENOIOCTLCMD; From 68e79baf41f89fcdf9edf9bf3d65a71b667d066c Mon Sep 17 00:00:00 2001 From: Jia Yang Date: Tue, 21 Jul 2020 11:49:14 +0800 Subject: [PATCH 39/52] f2fs: Change the type of f2fs_flush_inline_data() to void The return value of f2fs_flush_inline_data() is not used, so delete it. Signed-off-by: Jia Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3d9dd32a176a..927006d00a63 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3282,7 +3282,7 @@ void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); struct page *f2fs_get_node_page_ra(struct page *parent, int start); int f2fs_move_node_page(struct page *node_page, int gc_type); -int f2fs_flush_inline_data(struct f2fs_sb_info *sbi); +void f2fs_flush_inline_data(struct f2fs_sb_info *sbi); int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic, unsigned int *seq_id); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3ffe8d83f29f..07a4dc743416 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1816,12 +1816,11 @@ static bool flush_dirty_inode(struct page *page) return true; } -int f2fs_flush_inline_data(struct f2fs_sb_info *sbi) +void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) { pgoff_t index = 0; struct pagevec pvec; int nr_pages; - int ret = 0; pagevec_init(&pvec); @@ -1860,7 +1859,6 @@ continue_unlock: pagevec_release(&pvec); cond_resched(); } - return ret; } int f2fs_sync_node_pages(struct f2fs_sb_info *sbi, From 99c787cfd2bd04926f1f553b30bd7dcea2caaba1 Mon Sep 17 00:00:00 2001 From: Li Guifu Date: Fri, 24 Jul 2020 09:38:11 +0800 Subject: [PATCH 40/52] f2fs: fix use-after-free issue During umount, f2fs_put_super() unregisters procfs entries after f2fs_destroy_segment_manager(), it may cause use-after-free issue when umount races with procfs accessing, fix it by relocating f2fs_unregister_sysfs(). [Chao Yu: change commit title/message a bit] Signed-off-by: Li Guifu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 80cb7cd358f8..5e0a3eeb8ca4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1175,6 +1175,9 @@ static void f2fs_put_super(struct super_block *sb) int i; bool dropped; + /* unregister procfs/sysfs entries in advance to avoid race case */ + f2fs_unregister_sysfs(sbi); + f2fs_quota_off_umount(sb); /* prevent remaining shrinker jobs */ @@ -1240,8 +1243,6 @@ static void f2fs_put_super(struct super_block *sb) kvfree(sbi->ckpt); - f2fs_unregister_sysfs(sbi); - sb->s_fs_info = NULL; if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); From a87aff1d491f6d824d9cc53303544361d7eaae75 Mon Sep 17 00:00:00 2001 From: Jack Qiu Date: Fri, 24 Jul 2020 16:55:28 +0800 Subject: [PATCH 41/52] f2fs: space related cleanup Just for code style, no logic change 1. delete useless space 2. change spaces into tab Signed-off-by: Jack Qiu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/namei.c | 8 ++++---- fs/f2fs/node.c | 4 ++-- fs/f2fs/sysfs.c | 4 ++-- fs/f2fs/xattr.c | 4 ++-- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 8f5753cc3037..f86c6ba7cb82 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1415,7 +1415,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); } - /* 2 cp + n data seg summary + orphan inode blocks */ + /* 2 cp + n data seg summary + orphan inode blocks */ data_sum_blocks = f2fs_npages_for_summary_flush(sbi, false); spin_lock_irqsave(&sbi->cp_lock, flags); if (data_sum_blocks < NR_CURSEG_DATA_TYPE) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 44645f4f914b..3b8409ce24f2 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -86,7 +86,7 @@ static bool __is_cp_guaranteed(struct page *page) sbi = F2FS_I_SB(inode); if (inode->i_ino == F2FS_META_INO(sbi) || - inode->i_ino == F2FS_NODE_INO(sbi) || + inode->i_ino == F2FS_NODE_INO(sbi) || S_ISDIR(inode->i_mode) || (S_ISREG(inode->i_mode) && (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) || @@ -1146,7 +1146,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) { - struct extent_info ei = {0,0,0}; + struct extent_info ei = {0, 0, 0}; struct inode *inode = dn->inode; if (f2fs_lookup_extent_cache(inode, index, &ei)) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 927006d00a63..3a14056c9cd8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1440,7 +1440,7 @@ struct f2fs_sb_info { unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ long interval_time[MAX_TIME]; /* to store thresholds */ - struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ + struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ spinlock_t fsync_node_lock; /* for node entry lock */ struct list_head fsync_node_list; /* node list head */ @@ -1520,7 +1520,7 @@ struct f2fs_sb_info { unsigned int next_victim_seg[2]; /* next segment in victim section */ /* for skip statistic */ - unsigned int atomic_files; /* # of opened atomic file */ + unsigned int atomic_files; /* # of opened atomic file */ unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a15a2831d43b..84e4bbc1a64d 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -602,7 +602,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the - * negative dentries at f2fs_lookup(), when it is better + * negative dentries at f2fs_lookup(), when it is better * supported by the VFS for the CI case. */ if (IS_CASEFOLDED(dir)) @@ -1287,7 +1287,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry, } const struct inode_operations f2fs_encrypted_symlink_inode_operations = { - .get_link = f2fs_encrypted_get_link, + .get_link = f2fs_encrypted_get_link, .getattr = f2fs_getattr, .setattr = f2fs_setattr, .listxattr = f2fs_listxattr, @@ -1313,7 +1313,7 @@ const struct inode_operations f2fs_dir_inode_operations = { }; const struct inode_operations f2fs_symlink_inode_operations = { - .get_link = f2fs_get_link, + .get_link = f2fs_get_link, .getattr = f2fs_getattr, .setattr = f2fs_setattr, .listxattr = f2fs_listxattr, @@ -1321,7 +1321,7 @@ const struct inode_operations f2fs_symlink_inode_operations = { const struct inode_operations f2fs_special_inode_operations = { .getattr = f2fs_getattr, - .setattr = f2fs_setattr, + .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, .set_acl = f2fs_set_acl, .listxattr = f2fs_listxattr, diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 07a4dc743416..9bbaa2614679 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1728,7 +1728,7 @@ continue_unlock: set_dentry_mark(page, f2fs_need_dentry_mark(sbi, ino)); } - /* may be written by other thread */ + /* may be written by other thread */ if (!PageDirty(page)) set_page_dirty(page); } @@ -2101,7 +2101,7 @@ const struct address_space_operations f2fs_node_aops = { .invalidatepage = f2fs_invalidate_page, .releasepage = f2fs_release_page, #ifdef CONFIG_MIGRATION - .migratepage = f2fs_migrate_page, + .migratepage = f2fs_migrate_page, #endif }; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 50524401c8e6..88ed9969cc86 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -27,7 +27,7 @@ enum { NM_INFO, /* struct f2fs_nm_info */ F2FS_SBI, /* struct f2fs_sb_info */ #ifdef CONFIG_F2FS_STAT_FS - STAT_INFO, /* struct f2fs_stat_info */ + STAT_INFO, /* struct f2fs_stat_info */ #endif #ifdef CONFIG_F2FS_FAULT_INJECTION FAULT_INFO_RATE, /* struct f2fs_fault_info */ @@ -717,7 +717,7 @@ static struct kobj_type f2fs_ktype = { }; static struct kset f2fs_kset = { - .kobj = {.ktype = &f2fs_ktype}, + .kobj = {.ktype = &f2fs_ktype}, }; static struct kobj_type f2fs_feat_ktype = { diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 4f6582ef7ee3..1b0736ce0918 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -175,8 +175,8 @@ const struct xattr_handler f2fs_xattr_trusted_handler = { const struct xattr_handler f2fs_xattr_advise_handler = { .name = F2FS_SYSTEM_ADVISE_NAME, .flags = F2FS_XATTR_INDEX_ADVISE, - .get = f2fs_xattr_advise_get, - .set = f2fs_xattr_advise_set, + .get = f2fs_xattr_advise_get, + .set = f2fs_xattr_advise_set, }; const struct xattr_handler f2fs_xattr_security_handler = { From 944dd22ea4475bd11180fd2f431a4a547ca4d8f5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 24 Jul 2020 18:21:36 +0800 Subject: [PATCH 42/52] f2fs: compress: fix to update isize when overwriting compressed file We missed to update isize of compressed file in write_end() with below case: cluster size is 16KB - write 14KB data from offset 0 - overwrite 16KB data from offset 0 Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3b8409ce24f2..bc89bc987513 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3470,6 +3470,10 @@ static int f2fs_write_end(struct file *file, if (f2fs_compressed_file(inode) && fsdata) { f2fs_compress_write_end(inode, fsdata, page->index, copied); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + + if (pos + copied > i_size_read(inode) && + !f2fs_verity_in_progress(inode)) + f2fs_i_size_write(inode, pos + copied); return copied; } #endif From b2f57a8e6bc987ba2c8defb507c98aff66c94fc5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 25 Jul 2020 09:17:48 +0800 Subject: [PATCH 43/52] f2fs: compress: delay temp page allocation Currently, we allocate temp pages which is used to pad hole in cluster during read IO submission, it may take long time before releasing them in f2fs_decompress_pages(), since they are only used as temp output buffer in decompression context, so let's just do the allocation in that context to reduce time of memory pool resource occupation. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index a20c9f3272af..6e7db450006c 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -670,6 +670,7 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; int ret; + int i; dec_page_count(sbi, F2FS_RD_DATA); @@ -688,6 +689,26 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) goto out_free_dic; } + dic->tpages = f2fs_kzalloc(sbi, sizeof(struct page *) * + dic->cluster_size, GFP_NOFS); + if (!dic->tpages) { + ret = -ENOMEM; + goto out_free_dic; + } + + for (i = 0; i < dic->cluster_size; i++) { + if (dic->rpages[i]) { + dic->tpages[i] = dic->rpages[i]; + continue; + } + + dic->tpages[i] = f2fs_compress_alloc_page(); + if (!dic->tpages[i]) { + ret = -ENOMEM; + goto out_free_dic; + } + } + if (cops->init_decompress_ctx) { ret = cops->init_decompress_ctx(dic); if (ret) @@ -1449,22 +1470,6 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) dic->cpages[i] = page; } - dic->tpages = f2fs_kzalloc(sbi, sizeof(struct page *) * - dic->cluster_size, GFP_NOFS); - if (!dic->tpages) - goto out_free; - - for (i = 0; i < dic->cluster_size; i++) { - if (cc->rpages[i]) { - dic->tpages[i] = cc->rpages[i]; - continue; - } - - dic->tpages[i] = f2fs_compress_alloc_page(); - if (!dic->tpages[i]) - goto out_free; - } - return dic; out_free: From 1f07cc58bc92bdea261b7cca12eec2f2827edab6 Mon Sep 17 00:00:00 2001 From: Jack Qiu Date: Sat, 25 Jul 2020 18:05:27 +0800 Subject: [PATCH 44/52] f2fs: correct comment of f2fs_exist_written_data Function parameter mode could be TRANS_DIR_INO. Signed-off-by: Jack Qiu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f86c6ba7cb82..8c782d3f324f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -523,7 +523,7 @@ void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) __remove_ino_entry(sbi, ino, type); } -/* mode should be APPEND_INO or UPDATE_INO */ +/* mode should be APPEND_INO, UPDATE_INO or TRANS_DIR_INO */ bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) { struct inode_management *im = &sbi->im[mode]; From 1fd280188d1f1e7318264a34aba435f3b69e71e8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 28 Jul 2020 08:26:11 -0700 Subject: [PATCH 45/52] f2fs: fix deadlock between quota writes and checkpoint f2fs_write_data_pages(quota_mapping) __f2fs_write_data_pages f2fs_write_checkpoint * blk_start_plug(&plug); * add bio in write_io[DATA] - block_operations - skip syncing quota by >DEFAULT_RETRY_QUOTA_FLUSH_COUNT - down_write(&sbi->node_write); - f2fs_write_single_data_page - down_read(node_write) - f2fs_wait_on_all_pages(F2FS_WB_CP_DATA); Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 8c782d3f324f..99c8061da55b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1269,6 +1269,8 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type) if (type == F2FS_DIRTY_META) f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); + else if (type == F2FS_WB_CP_DATA) + f2fs_submit_merged_write(sbi, DATA); io_schedule_timeout(DEFAULT_IO_TIMEOUT); } finish_wait(&sbi->cp_wait, &wait); From 8fa41016f098b5ea5b48ffc6042ecaf0282b809b Mon Sep 17 00:00:00 2001 From: Jack Qiu Date: Wed, 29 Jul 2020 09:29:01 +0800 Subject: [PATCH 46/52] f2fs: use macro instead of f2fs verity version Because fsverity_descriptor_location.version is constant, so use macro for better reading. Signed-off-by: Jack Qiu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/verity.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index 865c9fb774fb..9eb0dba851e8 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -29,6 +29,8 @@ #include "f2fs.h" #include "xattr.h" +#define F2FS_VERIFY_VER (1) + static inline loff_t f2fs_verity_metadata_pos(const struct inode *inode) { return round_up(inode->i_size, 65536); @@ -152,7 +154,7 @@ static int f2fs_end_enable_verity(struct file *filp, const void *desc, struct inode *inode = file_inode(filp); u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size; struct fsverity_descriptor_location dloc = { - .version = cpu_to_le32(1), + .version = cpu_to_le32(F2FS_VERIFY_VER), .size = cpu_to_le32(desc_size), .pos = cpu_to_le64(desc_pos), }; @@ -199,7 +201,7 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), NULL); if (res < 0 && res != -ERANGE) return res; - if (res != sizeof(dloc) || dloc.version != cpu_to_le32(1)) { + if (res != sizeof(dloc) || dloc.version != cpu_to_le32(F2FS_VERIFY_VER)) { f2fs_warn(F2FS_I_SB(inode), "unknown verity xattr format"); return -EINVAL; } From a86d27dd3dbed4d870c6582b8d4bf1fd0f653914 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 29 Jul 2020 21:21:35 +0800 Subject: [PATCH 47/52] f2fs: compress: add sanity check during compressed cluster read In f2fs_read_multi_pages(), we don't have to check cluster's type again, since overwrite or partial truncation need page lock in cluster which has already been held by reader, so cluster's type is stable, let's change check condition to sanity check. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index bc89bc987513..c1b676be67b9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2170,9 +2170,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, if (ret) goto out; - /* cluster was overwritten as normal cluster */ - if (dn.data_blkaddr != COMPRESS_ADDR) - goto out; + f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR); for (i = 1; i < cc->cluster_size; i++) { block_t blkaddr; From 1f0b067b6e4927f06f5ffaea8eccdf722e563819 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 29 Jul 2020 21:21:36 +0800 Subject: [PATCH 48/52] f2fs: compress: disable compression mount option if compression is off If CONFIG_F2FS_FS_COMPRESSION is off, don't allow to configure or show compression related mount option. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5e0a3eeb8ca4..cdca2087dba0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -460,9 +460,12 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) { struct f2fs_sb_info *sbi = F2FS_SB(sb); substring_t args[MAX_OPT_ARGS]; +#ifdef CONFIG_F2FS_FS_COMPRESSION unsigned char (*ext)[F2FS_EXTENSION_LEN]; + int ext_cnt; +#endif char *p, *name; - int arg = 0, ext_cnt; + int arg = 0; kuid_t uid; kgid_t gid; int ret; @@ -853,6 +856,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_checkpoint_enable: clear_opt(sbi, DISABLE_CHECKPOINT); break; +#ifdef CONFIG_F2FS_FS_COMPRESSION case Opt_compress_algorithm: if (!f2fs_sb_has_compression(sbi)) { f2fs_err(sbi, "Compression feature if off"); @@ -918,6 +922,13 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) F2FS_OPTION(sbi).compress_ext_cnt++; kfree(name); break; +#else + case Opt_compress_algorithm: + case Opt_compress_log_size: + case Opt_compress_extension: + f2fs_info(sbi, "compression options not supported"); + break; +#endif default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1608,7 +1619,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER) seq_printf(seq, ",fsync_mode=%s", "nobarrier"); +#ifdef CONFIG_F2FS_FS_COMPRESSION f2fs_show_compress_options(seq, sbi->sb); +#endif return 0; } From 567c4bf54a85a14af4ca9be19d7dc327e56a15f9 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Thu, 30 Jul 2020 14:09:28 +0900 Subject: [PATCH 49/52] f2fs: make file immutable even if releasing zero compression block When we use F2FS_IOC_RELEASE_COMPRESS_BLOCKS ioctl, if we can't find any compressed blocks in the file even with large file size, the ioctl just ends up without changing the file's status as immutable. It makes the user, who expects that the file is immutable when it returns successfully, confused. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index cc7f5670390f..8a422400e824 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3535,14 +3535,14 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (ret) goto out; - if (!F2FS_I(inode)->i_compr_blocks) - goto out; - F2FS_I(inode)->i_flags |= F2FS_IMMUTABLE_FL; f2fs_set_inode_flags(inode); inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, true); + if (!F2FS_I(inode)->i_compr_blocks) + goto out; + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); From 58f7e00ffb45a4115ba701a32467c2aeb944c855 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Fri, 31 Jul 2020 02:18:13 -0400 Subject: [PATCH 50/52] f2fs: replace test_and_set/clear_bit() with set/clear_bit() Since set/clear_inode_flag() don't need to return value to show if flag is set, we can just call set/clear_bit() here. Signed-off-by: Yufen Yu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3a14056c9cd8..16322ea5b463 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2656,7 +2656,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, static inline void set_inode_flag(struct inode *inode, int flag) { - test_and_set_bit(flag, F2FS_I(inode)->flags); + set_bit(flag, F2FS_I(inode)->flags); __mark_inode_dirty_flag(inode, flag, true); } @@ -2667,7 +2667,7 @@ static inline int is_inode_flag_set(struct inode *inode, int flag) static inline void clear_inode_flag(struct inode *inode, int flag) { - test_and_clear_bit(flag, F2FS_I(inode)->flags); + clear_bit(flag, F2FS_I(inode)->flags); __mark_inode_dirty_flag(inode, flag, false); } From 9feffe14661bb6f5200bc5712999378832d129c5 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Sat, 1 Aug 2020 11:24:50 +0800 Subject: [PATCH 51/52] f2fs: update_sit_entry: Make the judgment condition of f2fs_bug_on more intuitive Current judgment condition of f2fs_bug_on in function update_sit_entry(): new_vblocks >> (sizeof(unsigned short) << 3) || new_vblocks > sbi->blocks_per_seg which equivalents to: new_vblocks < 0 || new_vblocks > sbi->blocks_per_seg The latter is more intuitive. Signed-off-by: Zhihao Cheng Reported-by: Jack Qiu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 40dd29cfc83d..a65d357f89a9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2166,7 +2166,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) new_vblocks = se->valid_blocks + del; offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); - f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) || + f2fs_bug_on(sbi, (new_vblocks < 0 || (new_vblocks > sbi->blocks_per_seg))); se->valid_blocks = new_vblocks; From 828add774f0d2bf930cdeca6c982c1fbcdd846bb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 3 Aug 2020 19:37:12 -0700 Subject: [PATCH 52/52] f2fs: prepare a waiter before entering io_schedule This is to avoid sleep() in the waiter thread. [ 20.157753] ------------[ cut here ]------------ [ 20.158393] do not call blocking ops when !TASK_RUNNING; state=2 set at [<0000000096354225>] prepare_to_wait+0xcd/0x430 [ 20.159858] WARNING: CPU: 1 PID: 1152 at kernel/sched/core.c:7142 __might_sleep+0x149/0x1a0 ... [ 20.176110] __submit_merged_write_cond+0x191/0x310 [ 20.176739] f2fs_submit_merged_write+0x18/0x20 [ 20.177323] f2fs_wait_on_all_pages+0x269/0x2d0 [ 20.177899] ? block_operations+0x980/0x980 [ 20.178441] ? __kasan_check_read+0x11/0x20 [ 20.178975] ? finish_wait+0x260/0x260 [ 20.179488] ? percpu_counter_set+0x147/0x230 [ 20.180049] do_checkpoint+0x1757/0x2a50 [ 20.180558] f2fs_write_checkpoint+0x840/0xaf0 [ 20.181126] f2fs_sync_fs+0x287/0x4a0 Reported-by: Eric Biggers Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 99c8061da55b..ff807e14c891 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1258,8 +1258,6 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type) DEFINE_WAIT(wait); for (;;) { - prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); - if (!get_pages(sbi, type)) break; @@ -1271,6 +1269,8 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type) FS_CP_META_IO); else if (type == F2FS_WB_CP_DATA) f2fs_submit_merged_write(sbi, DATA); + + prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); io_schedule_timeout(DEFAULT_IO_TIMEOUT); } finish_wait(&sbi->cp_wait, &wait);