From 5deaf1f63bafcec48e70e70669c567072c574f69 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 9 Jun 2020 13:29:08 -0400 Subject: [PATCH 1/8] gfs2: Add some flags missing from glock output Before this patch, three flags were not represented in the glock output. This patch adds them in: c - GLF_INODE_CREATING P - GLF_PENDING_DELETE x - GLF_FREEING (both f and F are already used) Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 8545024a1401..57134d326cfa 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2106,6 +2106,12 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'o'; if (test_bit(GLF_BLOCKING, gflags)) *p++ = 'b'; + if (test_bit(GLF_INODE_CREATING, gflags)) + *p++ = 'c'; + if (test_bit(GLF_PENDING_DELETE, gflags)) + *p++ = 'P'; + if (test_bit(GLF_FREEING, gflags)) + *p++ = 'x'; *p = 0; return buf; } From 4c5c3010408d872359134cfa209619778d8f15e6 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 29 Jun 2020 17:47:15 -0500 Subject: [PATCH 2/8] gfs2: Pass glock holder to gfs2_file_direct_{read,write} Pass a pointer to the existing glock holder from gfs2_file_{read,write}_iter to gfs2_file_direct_{read,write} to save some stack space. Suggested-by: Matthew Wilcox Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index bebde537ac8c..847adb97380d 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -781,39 +781,39 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, return ret ? ret : ret1; } -static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to) +static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, + struct gfs2_holder *gh) { struct file *file = iocb->ki_filp; struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); size_t count = iov_iter_count(to); - struct gfs2_holder gh; ssize_t ret; if (!count) return 0; /* skip atime */ - gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); - ret = gfs2_glock_nq(&gh); + gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh); + ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, is_sync_kiocb(iocb)); - gfs2_glock_dq(&gh); + gfs2_glock_dq(gh); out_uninit: - gfs2_holder_uninit(&gh); + gfs2_holder_uninit(gh); return ret; } -static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from) +static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, + struct gfs2_holder *gh) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct gfs2_inode *ip = GFS2_I(inode); size_t len = iov_iter_count(from); loff_t offset = iocb->ki_pos; - struct gfs2_holder gh; ssize_t ret; /* @@ -824,8 +824,8 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from) * unfortunately, have the option of only flushing a range like the * VFS does. */ - gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); - ret = gfs2_glock_nq(&gh); + gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh); + ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; @@ -837,9 +837,9 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from) is_sync_kiocb(iocb)); out: - gfs2_glock_dq(&gh); + gfs2_glock_dq(gh); out_uninit: - gfs2_holder_uninit(&gh); + gfs2_holder_uninit(gh); return ret; } @@ -851,7 +851,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) ssize_t ret; if (iocb->ki_flags & IOCB_DIRECT) { - ret = gfs2_file_direct_read(iocb, to); + ret = gfs2_file_direct_read(iocb, to, &gh); if (likely(ret != -ENOTBLK)) return ret; iocb->ki_flags &= ~IOCB_DIRECT; @@ -900,13 +900,12 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder gh; ssize_t ret; gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from)); if (iocb->ki_flags & IOCB_APPEND) { - struct gfs2_holder gh; - ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) return ret; @@ -930,7 +929,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct address_space *mapping = file->f_mapping; ssize_t buffered, ret2; - ret = gfs2_file_direct_write(iocb, from); + ret = gfs2_file_direct_write(iocb, from, &gh); if (ret < 0 || !iov_iter_count(from)) goto out_unlock; From c07bfb4d8fa1ee11c6d18b093d0bb6c8832d3626 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 27 Jul 2020 19:18:57 +0200 Subject: [PATCH 3/8] gfs2: Fix refcount leak in gfs2_glock_poke In gfs2_glock_poke, make sure gfs2_holder_uninit is called on the local glock holder. Without that, we're leaking a glock and a pid reference. Fixes: 9e8990dea926 ("gfs2: Smarter iopen glock waiting") Cc: stable@vger.kernel.org # v5.8+ Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 57134d326cfa..f13b136654ca 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -790,9 +790,11 @@ static void gfs2_glock_poke(struct gfs2_glock *gl) struct gfs2_holder gh; int error; - error = gfs2_glock_nq_init(gl, LM_ST_SHARED, flags, &gh); + gfs2_holder_init(gl, LM_ST_SHARED, flags, &gh); + error = gfs2_glock_nq(&gh); if (!error) gfs2_glock_dq(&gh); + gfs2_holder_uninit(&gh); } static bool gfs2_try_evict(struct gfs2_glock *gl) From c9dff08485a6c11449307abde1d7bb404fcee481 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 9 Jul 2020 14:49:30 +0200 Subject: [PATCH 4/8] fs: Fix typo in comment The comment for function filemap_check_wb_err accidentally refers to it as filemap_check_wb_error. Signed-off-by: Andreas Gruenbacher --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index f5abba86107d..f9ae45795c1a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2798,7 +2798,7 @@ static inline void filemap_set_wb_err(struct address_space *mapping, int err) } /** - * filemap_check_wb_error - has an error occurred since the mark was sampled? + * filemap_check_wb_err - has an error occurred since the mark was sampled? * @mapping: mapping to check for writeback errors * @since: previously-sampled errseq_t * From b57bc0fb2fe403b687142d12fba4ad085f8be6ee Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 22 Jul 2020 10:19:52 -0500 Subject: [PATCH 5/8] gfs2: Fix inaccurate comment The comment regarding journal flush thresholds is wrong. This patch fixes it. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index a76e55bc28eb..a58333e3980d 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -1092,7 +1092,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) * or the total number of used blocks (pinned blocks plus AIL blocks) * is greater than thresh2. * - * At mount time thresh1 is 1/3rd of journal size, thresh2 is 2/3rd of + * At mount time thresh1 is 2/5ths of journal size, thresh2 is 4/5ths of * journal size. * * Returns: errno From b0be23b23f6cdeb61d154fef72cd82f8f99f9ca4 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 23 Jul 2020 13:14:07 -0500 Subject: [PATCH 6/8] gfs2: print details on transactions that aren't properly ended If function gfs2_trans_begin is called with another transaction active it BUGs out, but it doesn't give any details about the duplicate. This patch moves function gfs2_print_trans and calls it when this situation arises for better debugging. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/trans.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index a3dfa3aa87ad..e1c7eb6eb00a 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -25,13 +25,28 @@ #include "util.h" #include "trace_gfs2.h" +static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr) +{ + fs_warn(sdp, "Transaction created at: %pSR\n", (void *)tr->tr_ip); + fs_warn(sdp, "blocks=%u revokes=%u reserved=%u touched=%u\n", + tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, + test_bit(TR_TOUCHED, &tr->tr_flags)); + fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u/%u\n", + tr->tr_num_buf_new, tr->tr_num_buf_rm, + tr->tr_num_databuf_new, tr->tr_num_databuf_rm, + tr->tr_num_revoke, tr->tr_num_revoke_rm); +} + int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, unsigned int revokes) { struct gfs2_trans *tr; int error; - BUG_ON(current->journal_info); + if (current->journal_info) { + gfs2_print_trans(sdp, current->journal_info); + BUG(); + } BUG_ON(blocks == 0 && revokes == 0); if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) @@ -72,18 +87,6 @@ fail: return error; } -static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr) -{ - fs_warn(sdp, "Transaction created at: %pSR\n", (void *)tr->tr_ip); - fs_warn(sdp, "blocks=%u revokes=%u reserved=%u touched=%u\n", - tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, - test_bit(TR_TOUCHED, &tr->tr_flags)); - fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u/%u\n", - tr->tr_num_buf_new, tr->tr_num_buf_rm, - tr->tr_num_databuf_new, tr->tr_num_databuf_rm, - tr->tr_num_revoke, tr->tr_num_revoke_rm); -} - void gfs2_trans_end(struct gfs2_sbd *sdp) { struct gfs2_trans *tr = current->journal_info; From 70499cdfeb3625c87eebe4f7a7ea06fa7447e5df Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 24 Jul 2020 12:06:31 -0500 Subject: [PATCH 7/8] gfs2: Never call gfs2_block_zero_range with an open transaction Before this patch, some functions started transactions then they called gfs2_block_zero_range. However, gfs2_block_zero_range, like writes, can start transactions, which results in a recursive transaction error. For example: do_shrink trunc_start gfs2_trans_begin <------------------------------------------------ gfs2_block_zero_range iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops); iomap_apply ... iomap_zero_range_actor iomap_begin gfs2_iomap_begin gfs2_iomap_begin_write actor (iomap_zero_range_actor) iomap_zero iomap_write_begin gfs2_iomap_page_prepare gfs2_trans_begin <------------------------ This patch reorders the callers of gfs2_block_zero_range so that they only start their transactions after the call. It also adds a BUG_ON to ensure this doesn't happen again. Fixes: 2257e468a63b ("gfs2: implement gfs2_block_zero_range using iomap_zero_range") Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/bmap.c | 69 ++++++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 6306eaae378b..6d2ea788d0a1 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1351,9 +1351,15 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi return ret; } +/* + * NOTE: Never call gfs2_block_zero_range with an open transaction because it + * uses iomap write to perform its actions, which begin their own transactions + * (iomap_begin, page_prepare, etc.) + */ static int gfs2_block_zero_range(struct inode *inode, loff_t from, unsigned int length) { + BUG_ON(current->journal_info); return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops); } @@ -1414,6 +1420,16 @@ static int trunc_start(struct inode *inode, u64 newsize) u64 oldsize = inode->i_size; int error; + if (!gfs2_is_stuffed(ip)) { + unsigned int blocksize = i_blocksize(inode); + unsigned int offs = newsize & (blocksize - 1); + if (offs) { + error = gfs2_block_zero_range(inode, newsize, + blocksize - offs); + if (error) + return error; + } + } if (journaled) error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES); else @@ -1427,19 +1443,10 @@ static int trunc_start(struct inode *inode, u64 newsize) gfs2_trans_add_meta(ip->i_gl, dibh); - if (gfs2_is_stuffed(ip)) { + if (gfs2_is_stuffed(ip)) gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); - } else { - unsigned int blocksize = i_blocksize(inode); - unsigned int offs = newsize & (blocksize - 1); - if (offs) { - error = gfs2_block_zero_range(inode, newsize, - blocksize - offs); - if (error) - goto out; - } + else ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; - } i_size_write(inode, newsize); ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); @@ -2448,25 +2455,7 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length) loff_t start, end; int error; - start = round_down(offset, blocksize); - end = round_up(offset + length, blocksize) - 1; - error = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (error) - return error; - - if (gfs2_is_jdata(ip)) - error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA, - GFS2_JTRUNC_REVOKES); - else - error = gfs2_trans_begin(sdp, RES_DINODE, 0); - if (error) - return error; - - if (gfs2_is_stuffed(ip)) { - error = stuffed_zero_range(inode, offset, length); - if (error) - goto out; - } else { + if (!gfs2_is_stuffed(ip)) { unsigned int start_off, end_len; start_off = offset & (blocksize - 1); @@ -2489,6 +2478,26 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length) } } + start = round_down(offset, blocksize); + end = round_up(offset + length, blocksize) - 1; + error = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (error) + return error; + + if (gfs2_is_jdata(ip)) + error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA, + GFS2_JTRUNC_REVOKES); + else + error = gfs2_trans_begin(sdp, RES_DINODE, 0); + if (error) + return error; + + if (gfs2_is_stuffed(ip)) { + error = stuffed_zero_range(inode, offset, length); + if (error) + goto out; + } + if (gfs2_is_jdata(ip)) { BUG_ON(!current->journal_info); gfs2_journaled_truncate_range(inode, offset, length); From e28c02b94f9e039beeb5c75198caf6e17b66c520 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 30 Jul 2020 12:31:38 -0500 Subject: [PATCH 8/8] gfs2: When gfs2_dirty_inode gets a glock error, dump the glock Before this patch, if function gfs2_dirty_inode got an error when trying to lock the inode glock, it complained, but it didn't say what glock or inode had the problem. In this case, it almost always means that dinode_in found an error with the dinode in the file system. So it makes sense to dump the glock, which tells us the location of the dinode in the file system. That will allow us to analyze the corruption from the metadata. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 47d0ae158b69..9f4d9e7be839 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -566,6 +566,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (ret) { fs_err(sdp, "dirty_inode: glock %d\n", ret); + gfs2_dump_glock(NULL, ip->i_gl, true); return; } need_unlock = 1;