From 16e6281b6b22b0178eab95c6a82502d7b10f67b8 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Sun, 22 Nov 2020 18:10:24 -0500 Subject: [PATCH 1/6] gfs2: Fix deadlock dumping resource group glocks Commit 0e539ca1bbbe ("gfs2: Fix NULL pointer dereference in gfs2_rgrp_dump") introduced additional locking in gfs2_rgrp_go_dump, which is also used for dumping resource group glocks via debugfs. However, on that code path, the glock spin lock is already taken in dump_glock, and taking it again in gfs2_glock2rgrp leads to deadlock. This can be reproduced with: $ mkfs.gfs2 -O -p lock_nolock /dev/FOO $ mount /dev/FOO /mnt/foo $ touch /mnt/foo/bar $ cat /sys/kernel/debug/gfs2/FOO/glocks Fix that by not taking the glock spin lock inside the go_dump callback. Fixes: 0e539ca1bbbe ("gfs2: Fix NULL pointer dereference in gfs2_rgrp_dump") Signed-off-by: Alexander Aring Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 67f2921ae8d4..6cedeefb7b3f 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -245,7 +245,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags) static void gfs2_rgrp_go_dump(struct seq_file *seq, struct gfs2_glock *gl, const char *fs_id_buf) { - struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); + struct gfs2_rgrpd *rgd = gl->gl_object; if (rgd) gfs2_rgrp_dump(seq, rgd, fs_id_buf); From 515b269d5bd29a986d5e1c0a0cba87fa865a48b4 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 23 Nov 2020 10:53:35 -0500 Subject: [PATCH 2/6] gfs2: set lockdep subclass for iopen glocks This patch introduce a new globs attribute to define the subclass of the glock lockref spinlock. This avoid the following lockdep warning, which occurs when we lock an inode lock while an iopen lock is held: ============================================ WARNING: possible recursive locking detected 5.10.0-rc3+ #4990 Not tainted -------------------------------------------- kworker/0:1/12 is trying to acquire lock: ffff9067d45672d8 (&gl->gl_lockref.lock){+.+.}-{3:3}, at: lockref_get+0x9/0x20 but task is already holding lock: ffff9067da308588 (&gl->gl_lockref.lock){+.+.}-{3:3}, at: delete_work_func+0x164/0x260 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&gl->gl_lockref.lock); lock(&gl->gl_lockref.lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by kworker/0:1/12: #0: ffff9067c1bfdd38 ((wq_completion)delete_workqueue){+.+.}-{0:0}, at: process_one_work+0x1b7/0x540 #1: ffffac594006be70 ((work_completion)(&(&gl->gl_delete)->work)){+.+.}-{0:0}, at: process_one_work+0x1b7/0x540 #2: ffff9067da308588 (&gl->gl_lockref.lock){+.+.}-{3:3}, at: delete_work_func+0x164/0x260 stack backtrace: CPU: 0 PID: 12 Comm: kworker/0:1 Not tainted 5.10.0-rc3+ #4990 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 Workqueue: delete_workqueue delete_work_func Call Trace: dump_stack+0x8b/0xb0 __lock_acquire.cold+0x19e/0x2e3 lock_acquire+0x150/0x410 ? lockref_get+0x9/0x20 _raw_spin_lock+0x27/0x40 ? lockref_get+0x9/0x20 lockref_get+0x9/0x20 delete_work_func+0x188/0x260 process_one_work+0x237/0x540 worker_thread+0x4d/0x3b0 ? process_one_work+0x540/0x540 kthread+0x127/0x140 ? __kthread_bind_mask+0x60/0x60 ret_from_fork+0x22/0x30 Suggested-by: Andreas Gruenbacher Signed-off-by: Alexander Aring Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 1 + fs/gfs2/glops.c | 1 + fs/gfs2/incore.h | 1 + 3 files changed, 3 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d98a2e5dab9f..35a6fd103761 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1035,6 +1035,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_node.next = NULL; gl->gl_flags = 0; gl->gl_name = name; + lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); gl->gl_lockref.count = 1; gl->gl_state = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED; diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 6cedeefb7b3f..e2cfc00ab936 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -781,6 +781,7 @@ const struct gfs2_glock_operations gfs2_iopen_glops = { .go_callback = iopen_go_callback, .go_demote_ok = iopen_go_demote_ok, .go_flags = GLOF_LRU | GLOF_NONDISK, + .go_subclass = 1, }; const struct gfs2_glock_operations gfs2_flock_glops = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index d7707307f4b1..f8858d995b24 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -247,6 +247,7 @@ struct gfs2_glock_operations { const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); void (*go_free)(struct gfs2_glock *gl); + const int go_subclass; const int go_type; const unsigned long go_flags; #define GLOF_ASPACE 1 /* address space attached */ From 778721510e84209f78e31e2ccb296ae36d623f5e Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 24 Nov 2020 10:44:36 -0500 Subject: [PATCH 3/6] gfs2: check for empty rgrp tree in gfs2_ri_update If gfs2 tries to mount a (corrupt) file system that has no resource groups it still tries to set preferences on the first one, which causes a kernel null pointer dereference. This patch adds a check to function gfs2_ri_update so this condition is detected and reported back as an error. Reported-by: syzbot+e3f23ce40269a4c9053a@syzkaller.appspotmail.com Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f7addc6197ed..5e8eef9990e3 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -985,6 +985,10 @@ static int gfs2_ri_update(struct gfs2_inode *ip) if (error < 0) return error; + if (RB_EMPTY_ROOT(&sdp->sd_rindex_tree)) { + fs_err(sdp, "no resource groups found in the file system.\n"); + return -ENOENT; + } set_rgrp_preferences(sdp); sdp->sd_rindex_uptodate = 1; From f39e7d3aae2934b1cfdd209b54c508e2552e9531 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 24 Nov 2020 10:41:40 -0600 Subject: [PATCH 4/6] gfs2: Don't freeze the file system during unmount GFS2's freeze/thaw mechanism uses a special freeze glock to control its operation. It does this with a sync glock operation (glops.c) called freeze_go_sync. When the freeze glock is demoted (glock's do_xmote) the glops function causes the file system to be frozen. This is intended. However, GFS2's mount and unmount processes also hold the freeze glock to prevent other processes, perhaps on different cluster nodes, from mounting the frozen file system in read-write mode. Before this patch, there was no check in freeze_go_sync for whether a freeze in intended or whether the glock demote was caused by a normal unmount. So it was trying to freeze the file system it's trying to unmount, which ends up in a deadlock. This patch adds an additional check to freeze_go_sync so that demotes of the freeze glock are ignored if they come from the unmount process. Fixes: 20b329129009 ("gfs2: Fix regression in freeze_go_sync") Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index e2cfc00ab936..3faa421568b0 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -582,7 +582,8 @@ static int freeze_go_sync(struct gfs2_glock *gl) * Once thawed, the work func acquires the freeze glock in * SH and everybody goes back to thawed. */ - if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp)) { + if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) && + !test_bit(SDF_NORECOVERY, &sdp->sd_flags)) { atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); error = freeze_super(sdp->sd_vfs); if (error) { From 82e938bd5382b322ce81e6cb8fd030987f2da022 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 25 Nov 2020 23:37:18 +0100 Subject: [PATCH 5/6] gfs2: Upgrade shared glocks for atime updates Commit 20f829999c38 ("gfs2: Rework read and page fault locking") lifted the glock lock taking from the low-level ->readpage and ->readahead address space operations to the higher-level ->read_iter file and ->fault vm operations. The glocks are still taken in LM_ST_SHARED mode only. On filesystems mounted without the noatime option, ->read_iter sometimes needs to update the atime as well, though. Right now, this leads to a failed locking mode assertion in gfs2_dirty_inode. Fix that by introducing a new update_time inode operation. There, if the glock is held non-exclusively, upgrade it to an exclusive lock. Reported-by: Alexander Aring Fixes: 20f829999c38 ("gfs2: Rework read and page fault locking") Cc: stable@vger.kernel.org # v5.8+ Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 077ccb1b3ccc..eed1a1bac6f6 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -2116,6 +2116,25 @@ loff_t gfs2_seek_hole(struct file *file, loff_t offset) return vfs_setpos(file, ret, inode->i_sb->s_maxbytes); } +static int gfs2_update_time(struct inode *inode, struct timespec64 *time, + int flags) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_glock *gl = ip->i_gl; + struct gfs2_holder *gh; + int error; + + gh = gfs2_glock_is_locked_by_me(gl); + if (gh && !gfs2_glock_is_held_excl(gl)) { + gfs2_glock_dq(gh); + gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, gh); + error = gfs2_glock_nq(gh); + if (error) + return error; + } + return generic_update_time(inode, time, flags); +} + const struct inode_operations gfs2_file_iops = { .permission = gfs2_permission, .setattr = gfs2_setattr, @@ -2124,6 +2143,7 @@ const struct inode_operations gfs2_file_iops = { .fiemap = gfs2_fiemap, .get_acl = gfs2_get_acl, .set_acl = gfs2_set_acl, + .update_time = gfs2_update_time, }; const struct inode_operations gfs2_dir_iops = { @@ -2143,6 +2163,7 @@ const struct inode_operations gfs2_dir_iops = { .fiemap = gfs2_fiemap, .get_acl = gfs2_get_acl, .set_acl = gfs2_set_acl, + .update_time = gfs2_update_time, .atomic_open = gfs2_atomic_open, }; From dd0ecf544125639e54056d851e4887dbb94b6d2f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 30 Nov 2020 16:07:25 +0100 Subject: [PATCH 6/6] gfs2: Fix deadlock between gfs2_{create_inode,inode_lookup} and delete_work_func In gfs2_create_inode and gfs2_inode_lookup, make sure to cancel any pending delete work before taking the inode glock. Otherwise, gfs2_cancel_delete_work may block waiting for delete_work_func to complete, and delete_work_func may block trying to acquire the inode glock in gfs2_inode_lookup. Reported-by: Alexander Aring Fixes: a0e3cc65fa29 ("gfs2: Turn gl_delete into a delayed work") Cc: stable@vger.kernel.org # v5.8+ Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index eed1a1bac6f6..65ae4fc28ede 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -150,6 +150,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (unlikely(error)) goto fail; + if (blktype != GFS2_BLKST_UNLINKED) + gfs2_cancel_delete_work(io_gl); if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) { /* @@ -180,8 +182,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (unlikely(error)) goto fail; - if (blktype != GFS2_BLKST_UNLINKED) - gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl); glock_set_object(ip->i_iopen_gh.gh_gl, ip); gfs2_glock_put(io_gl); io_gl = NULL; @@ -725,13 +725,19 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, flush_delayed_work(&ip->i_gl->gl_work); glock_set_object(ip->i_gl, ip); - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (error) goto fail_free_inode; + gfs2_cancel_delete_work(io_gl); + glock_set_object(io_gl, ip); + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + if (error) + goto fail_gunlock2; error = gfs2_trans_begin(sdp, blocks, 0); if (error) - goto fail_free_inode; + goto fail_gunlock2; if (blocks > 1) { ip->i_eattr = ip->i_no_addr + 1; @@ -740,18 +746,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, init_dinode(dip, ip, symname); gfs2_trans_end(sdp); - error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); - if (error) - goto fail_free_inode; - BUG_ON(test_and_set_bit(GLF_INODE_CREATING, &io_gl->gl_flags)); error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (error) goto fail_gunlock2; - gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl); - glock_set_object(ip->i_iopen_gh.gh_gl, ip); gfs2_set_iop(inode); insert_inode_hash(inode); @@ -803,6 +803,7 @@ fail_gunlock3: gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_gunlock2: clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); + glock_clear_object(io_gl, ip); gfs2_glock_put(io_gl); fail_free_inode: if (ip->i_gl) {