From 0a944e8a6c66ca04c7afbaa17e22bf208a8b37f0 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 22 May 2019 10:27:01 -0400 Subject: [PATCH 1/4] ext4: don't perform block validity checks on the journal inode Since the journal inode is already checked when we added it to the block validity's system zone, if we check it again, we'll just trigger a failure. This was causing failures like this: [ 53.897001] EXT4-fs error (device sda): ext4_find_extent:909: inode #8: comm jbd2/sda-8: pblk 121667583 bad header/extent: invalid extent entries - magic f30a, entries 8, max 340(340), depth 0(0) [ 53.931430] jbd2_journal_bmap: journal block not found at offset 49 on sda-8 [ 53.938480] Aborting journal on device sda-8. ... but only if the system was under enough memory pressure that logical->physical mapping for the journal inode gets pushed out of the extent cache. (This is why it wasn't noticed earlier.) Fixes: 345c0dbf3a30 ("ext4: protect journal inode's blocks using block_validity") Reported-by: Dan Rue Signed-off-by: Theodore Ts'o Tested-by: Naresh Kamboju --- fs/ext4/extents.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f2c62e2a0c98..d40ed940001e 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -518,10 +518,14 @@ __read_extent_tree_block(const char *function, unsigned int line, } if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) return bh; - err = __ext4_ext_check(function, line, inode, - ext_block_hdr(bh), depth, pblk); - if (err) - goto errout; + if (!ext4_has_feature_journal(inode->i_sb) || + (inode->i_ino != + le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) { + err = __ext4_ext_check(function, line, inode, + ext_block_hdr(bh), depth, pblk); + if (err) + goto errout; + } set_buffer_verified(bh); /* * If this is a leaf block, cache all of its entries From 82a25b027ca48d7ef197295846b352345853dfa8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 May 2019 23:07:08 -0400 Subject: [PATCH 2/4] ext4: wait for outstanding dio during truncate in nojournal mode We didn't wait for outstanding direct IO during truncate in nojournal mode (as we skip orphan handling in that case). This can lead to fs corruption or stale data exposure if truncate ends up freeing blocks and these get reallocated before direct IO finishes. Fix the condition determining whether the wait is necessary. CC: stable@vger.kernel.org Fixes: 1c9114f9c0f1 ("ext4: serialize unlocked dio reads with truncate") Reviewed-by: Ira Weiny Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 82298c63ea6d..9bcb7f2b86dd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5630,20 +5630,17 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) goto err_out; } } - if (!shrink) + if (!shrink) { pagecache_isize_extended(inode, oldsize, inode->i_size); - - /* - * Blocks are going to be removed from the inode. Wait - * for dio in flight. Temporarily disable - * dioread_nolock to prevent livelock. - */ - if (orphan) { - if (!ext4_should_journal_data(inode)) { - inode_dio_wait(inode); - } else - ext4_wait_for_tail_page_commit(inode); + } else { + /* + * Blocks are going to be removed from the inode. Wait + * for dio in flight. + */ + inode_dio_wait(inode); } + if (orphan && ext4_should_journal_data(inode)) + ext4_wait_for_tail_page_commit(inode); down_write(&EXT4_I(inode)->i_mmap_sem); rc = ext4_break_layouts(inode); From ee0ed02ca93ef1ecf8963ad96638795d55af2c14 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 May 2019 23:35:28 -0400 Subject: [PATCH 3/4] ext4: do not delete unlinked inode from orphan list on failed truncate It is possible that unlinked inode enters ext4_setattr() (e.g. if somebody calls ftruncate(2) on unlinked but still open file). In such case we should not delete the inode from the orphan list if truncate fails. Note that this is mostly a theoretical concern as filesystem is corrupted if we reach this path anyway but let's be consistent in our orphan handling. Reviewed-by: Ira Weiny Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9bcb7f2b86dd..c7f77c643008 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5625,7 +5625,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) up_write(&EXT4_I(inode)->i_data_sem); ext4_journal_stop(handle); if (error) { - if (orphan) + if (orphan && inode->i_nlink) ext4_orphan_del(NULL, inode); goto err_out; } From 66883da1eee8ad4b38eeff7fa1c86a097d9670fc Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 24 May 2019 23:48:23 -0400 Subject: [PATCH 4/4] ext4: fix dcache lookup of !casefolded directories Found by visual inspection, this wasn't caught by my xfstest, since it's effect is ignoring positive dentries in the cache the fallback just goes to the disk. it was introduced in the last iteration of the case-insensitive patch. d_compare should return 0 when the entries match, so make sure we are correctly comparing the entire string if the encoding feature is set and we are on a case-INsensitive directory. Fixes: b886ee3e778e ("ext4: Support case-insensitive file name lookups") Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Theodore Ts'o --- fs/ext4/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 884a6e776809..c7843b149a1e 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -671,7 +671,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len, if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) { if (len != name->len) return -1; - return !memcmp(str, name, len); + return memcmp(str, name->name, len); } return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr);