From ba6379f7e6c7e51b3c0e92672bc61bb6961c2b5e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 23 Nov 2016 12:53:00 +0100 Subject: [PATCH 01/10] fs: Provide function to get superblock with exclusive s_umount Quota code will need a variant of get_super_thawed() that returns superblock with s_umount held in exclusive mode to serialize quota on and quota off operations. Provide this functionality. Signed-off-by: Jan Kara --- fs/super.c | 80 ++++++++++++++++++++++++++++++++++------------ include/linux/fs.h | 2 ++ 2 files changed, 62 insertions(+), 20 deletions(-) diff --git a/fs/super.c b/fs/super.c index c183835566c1..f7f724230e2b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -558,6 +558,13 @@ void drop_super(struct super_block *sb) EXPORT_SYMBOL(drop_super); +void drop_super_exclusive(struct super_block *sb) +{ + up_write(&sb->s_umount); + put_super(sb); +} +EXPORT_SYMBOL(drop_super_exclusive); + /** * iterate_supers - call function for all active superblocks * @f: function to call @@ -628,15 +635,7 @@ void iterate_supers_type(struct file_system_type *type, EXPORT_SYMBOL(iterate_supers_type); -/** - * get_super - get the superblock of a device - * @bdev: device to get the superblock for - * - * Scans the superblock list and finds the superblock of the file system - * mounted on the device given. %NULL is returned if no match is found. - */ - -struct super_block *get_super(struct block_device *bdev) +static struct super_block *__get_super(struct block_device *bdev, bool excl) { struct super_block *sb; @@ -651,11 +650,17 @@ rescan: if (sb->s_bdev == bdev) { sb->s_count++; spin_unlock(&sb_lock); - down_read(&sb->s_umount); + if (!excl) + down_read(&sb->s_umount); + else + down_write(&sb->s_umount); /* still alive? */ if (sb->s_root && (sb->s_flags & MS_BORN)) return sb; - up_read(&sb->s_umount); + if (!excl) + up_read(&sb->s_umount); + else + up_write(&sb->s_umount); /* nope, got unmounted */ spin_lock(&sb_lock); __put_super(sb); @@ -666,8 +671,36 @@ rescan: return NULL; } +/** + * get_super - get the superblock of a device + * @bdev: device to get the superblock for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device given. %NULL is returned if no match is found. + */ +struct super_block *get_super(struct block_device *bdev) +{ + return __get_super(bdev, false); +} EXPORT_SYMBOL(get_super); +static struct super_block *__get_super_thawed(struct block_device *bdev, + bool excl) +{ + while (1) { + struct super_block *s = __get_super(bdev, excl); + if (!s || s->s_writers.frozen == SB_UNFROZEN) + return s; + if (!excl) + up_read(&s->s_umount); + else + up_write(&s->s_umount); + wait_event(s->s_writers.wait_unfrozen, + s->s_writers.frozen == SB_UNFROZEN); + put_super(s); + } +} + /** * get_super_thawed - get thawed superblock of a device * @bdev: device to get the superblock for @@ -679,18 +712,25 @@ EXPORT_SYMBOL(get_super); */ struct super_block *get_super_thawed(struct block_device *bdev) { - while (1) { - struct super_block *s = get_super(bdev); - if (!s || s->s_writers.frozen == SB_UNFROZEN) - return s; - up_read(&s->s_umount); - wait_event(s->s_writers.wait_unfrozen, - s->s_writers.frozen == SB_UNFROZEN); - put_super(s); - } + return __get_super_thawed(bdev, false); } EXPORT_SYMBOL(get_super_thawed); +/** + * get_super_exclusive_thawed - get thawed superblock of a device + * @bdev: device to get the superblock for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device. The superblock is returned once it is thawed + * (or immediately if it was not frozen) and s_umount semaphore is held + * in exclusive mode. %NULL is returned if no match is found. + */ +struct super_block *get_super_exclusive_thawed(struct block_device *bdev) +{ + return __get_super_thawed(bdev, true); +} +EXPORT_SYMBOL(get_super_exclusive_thawed); + /** * get_active_super - get an active reference to the superblock of a device * @bdev: device to get the superblock for diff --git a/include/linux/fs.h b/include/linux/fs.h index dc0478c07b2a..d04cfdefcd11 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2949,8 +2949,10 @@ extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); extern struct super_block *get_super_thawed(struct block_device *); +extern struct super_block *get_super_exclusive_thawed(struct block_device *bdev); extern struct super_block *get_active_super(struct block_device *bdev); extern void drop_super(struct super_block *sb); +extern void drop_super_exclusive(struct super_block *sb); extern void iterate_supers(void (*)(struct super_block *, void *), void *); extern void iterate_supers_type(struct file_system_type *, void (*)(struct super_block *, void *), void *); From 7d6cd73d33b62021111a469b6a454ec357be295f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 23 Nov 2016 13:16:10 +0100 Subject: [PATCH 02/10] quota: Hold s_umount in exclusive mode when enabling / disabling quotas Currently we hold s_umount semaphore only in shared mode when enabling or disabling quotas and use dqonoff_mutex for serializing quota state changes on a filesystem and also quota state changes with other places depending on current quota state. Using dedicated mutex for this causes possible deadlocks during filesystem freezing (see following commit for details) so we transition to using s_umount semaphore for the necessary synchronization whose lock ordering is properly handled by the filesystem freezing code. As a start grab s_umount in exclusive mode when enabling / disabling quotas. Signed-off-by: Jan Kara --- fs/quota/dquot.c | 11 +++++++++++ fs/quota/quota.c | 16 +++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 1bfac28b7e7d..047afb966420 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2107,6 +2107,10 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) struct quota_info *dqopt = sb_dqopt(sb); struct inode *toputinode[MAXQUOTAS]; + /* s_umount should be held in exclusive mode */ + if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount))) + up_read(&sb->s_umount); + /* Cannot turn off usage accounting without turning off limits, or * suspend quotas and simultaneously turn quotas off. */ if ((flags & DQUOT_USAGE_ENABLED && !(flags & DQUOT_LIMITS_ENABLED)) @@ -2371,6 +2375,10 @@ int dquot_resume(struct super_block *sb, int type) int ret = 0, cnt; unsigned int flags; + /* s_umount should be held in exclusive mode */ + if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount))) + up_read(&sb->s_umount); + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; @@ -2430,6 +2438,9 @@ int dquot_enable(struct inode *inode, int type, int format_id, /* Just unsuspend quotas? */ BUG_ON(flags & DQUOT_SUSPENDED); + /* s_umount should be held in exclusive mode */ + if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount))) + up_read(&sb->s_umount); if (!flags) return 0; diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 2d445425aad7..6ce6f4b6826b 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -789,9 +789,14 @@ static int quotactl_cmd_write(int cmd) } return 1; } - #endif /* CONFIG_BLOCK */ +/* Return true if quotactl command is manipulating quota on/off state */ +static bool quotactl_cmd_onoff(int cmd) +{ + return (cmd == Q_QUOTAON) || (cmd == Q_QUOTAOFF); +} + /* * look up a superblock on which quota ops will be performed * - use the name of a block device to find the superblock thereon @@ -809,7 +814,9 @@ static struct super_block *quotactl_block(const char __user *special, int cmd) putname(tmp); if (IS_ERR(bdev)) return ERR_CAST(bdev); - if (quotactl_cmd_write(cmd)) + if (quotactl_cmd_onoff(cmd)) + sb = get_super_exclusive_thawed(bdev); + else if (quotactl_cmd_write(cmd)) sb = get_super_thawed(bdev); else sb = get_super(bdev); @@ -872,7 +879,10 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, ret = do_quotactl(sb, type, cmds, id, addr, pathp); - drop_super(sb); + if (!quotactl_cmd_onoff(cmds)) + drop_super(sb); + else + drop_super_exclusive(sb); out: if (pathp && !IS_ERR(pathp)) path_put(pathp); From 9d1ccbe70e0b14545caad12dc73adb3605447df0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 23 Nov 2016 13:35:14 +0100 Subject: [PATCH 03/10] quota: Use s_umount protection for quota operations Writeback quota is protected by s_umount semaphore held for reading because every writeback must be protected by that lock (grabbed either by the generic writeback code or by quotactl handler). Getting next available ID in quota file, querying quota state, setting quota information, getting quota format are all quotactl operations protected by s_umount semaphore held for reading grabbed in quotactl handler. This also fixes lockdep splat about possible deadlock during filesystem freezing where sync_filesystem() is called with page-faults already blocked but sync_filesystem() calls into dquot_writeback_dquots() which grabs dqonoff_mutex which ranks above i_mutex (vfs_load_quota_inode() grabs i_mutex under dqonoff_mutex) which clearly ranks below page fault freeze protection (e.g. via mmap_sem dependencies). The reported problem is not a real deadlock possibility since during quota on we check whether filesystem freezing is not in progress but still it is good to have this fixed. Reported-by: Ted Tso Reported-by: Eric Whitney Signed-off-by: Jan Kara --- fs/quota/dquot.c | 39 ++++++++++----------------------------- fs/quota/quota.c | 6 +----- 2 files changed, 11 insertions(+), 34 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 047afb966420..2a9dc3fb491c 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -617,7 +617,8 @@ int dquot_writeback_dquots(struct super_block *sb, int type) int cnt; int err, ret = 0; - mutex_lock(&dqopt->dqonoff_mutex); + WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount)); + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; @@ -653,7 +654,6 @@ int dquot_writeback_dquots(struct super_block *sb, int type) && info_dirty(&dqopt->info[cnt])) sb->dq_op->write_info(sb, cnt); dqstats_inc(DQST_SYNCS); - mutex_unlock(&dqopt->dqonoff_mutex); return ret; } @@ -683,7 +683,6 @@ int dquot_quota_sync(struct super_block *sb, int type) * Now when everything is written we can discard the pagecache so * that userspace sees the changes. */ - mutex_lock(&dqopt->dqonoff_mutex); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; @@ -693,7 +692,6 @@ int dquot_quota_sync(struct super_block *sb, int type) truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); inode_unlock(dqopt->files[cnt]); } - mutex_unlock(&dqopt->dqonoff_mutex); return 0; } @@ -2050,21 +2048,13 @@ int dquot_get_next_id(struct super_block *sb, struct kqid *qid) struct quota_info *dqopt = sb_dqopt(sb); int err; - mutex_lock(&dqopt->dqonoff_mutex); - if (!sb_has_quota_active(sb, qid->type)) { - err = -ESRCH; - goto out; - } - if (!dqopt->ops[qid->type]->get_next_id) { - err = -ENOSYS; - goto out; - } + if (!sb_has_quota_active(sb, qid->type)) + return -ESRCH; + if (!dqopt->ops[qid->type]->get_next_id) + return -ENOSYS; mutex_lock(&dqopt->dqio_mutex); err = dqopt->ops[qid->type]->get_next_id(sb, qid); mutex_unlock(&dqopt->dqio_mutex); -out: - mutex_unlock(&dqopt->dqonoff_mutex); - return err; } EXPORT_SYMBOL(dquot_get_next_id); @@ -2762,7 +2752,6 @@ int dquot_get_state(struct super_block *sb, struct qc_state *state) struct quota_info *dqopt = sb_dqopt(sb); int type; - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); memset(state, 0, sizeof(*state)); for (type = 0; type < MAXQUOTAS; type++) { if (!sb_has_quota_active(sb, type)) @@ -2784,7 +2773,6 @@ int dquot_get_state(struct super_block *sb, struct qc_state *state) tstate->nextents = 1; /* We don't know... */ spin_unlock(&dq_data_lock); } - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); return 0; } EXPORT_SYMBOL(dquot_get_state); @@ -2798,18 +2786,13 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii) if ((ii->i_fieldmask & QC_WARNS_MASK) || (ii->i_fieldmask & QC_RT_SPC_TIMER)) return -EINVAL; - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); - if (!sb_has_quota_active(sb, type)) { - err = -ESRCH; - goto out; - } + if (!sb_has_quota_active(sb, type)) + return -ESRCH; mi = sb_dqopt(sb)->info + type; if (ii->i_fieldmask & QC_FLAGS) { if ((ii->i_flags & QCI_ROOT_SQUASH && - mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) { - err = -EINVAL; - goto out; - } + mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) + return -EINVAL; } spin_lock(&dq_data_lock); if (ii->i_fieldmask & QC_SPC_TIMER) @@ -2826,8 +2809,6 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii) mark_info_dirty(sb, type); /* Force write to disk */ sb->dq_op->write_info(sb, type); -out: - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); return err; } EXPORT_SYMBOL(dquot_set_dqinfo); diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 6ce6f4b6826b..413c36cca462 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -104,13 +104,9 @@ static int quota_getfmt(struct super_block *sb, int type, void __user *addr) { __u32 fmt; - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); - if (!sb_has_quota_active(sb, type)) { - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + if (!sb_has_quota_active(sb, type)) return -ESRCH; - } fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id; - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); if (copy_to_user(addr, &fmt, sizeof(fmt))) return -EFAULT; return 0; From 2a64f80ee3ab47dc0114e077ea27903c97882138 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 23 Nov 2016 13:47:57 +0100 Subject: [PATCH 04/10] ocfs2: Protect periodic quota syncing with s_umount semaphore New quota locking rules will require s_umount semaphore for all quota scanning functions. Add is for periodic quota syncing. Tested-by: Eric Ren Signed-off-by: Jan Kara --- fs/ocfs2/quota_global.c | 10 +++++++++- fs/ocfs2/super.c | 1 - 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 87e577a49b0d..cec495a921e3 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -634,7 +634,15 @@ static void qsync_work_fn(struct work_struct *work) dqi_sync_work.work); struct super_block *sb = oinfo->dqi_gqinode->i_sb; - dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); + /* + * We have to be careful here not to deadlock on s_umount as umount + * disabling quotas may be in progress and it waits for this work to + * complete. If trylock fails, we'll do the sync next time... + */ + if (down_read_trylock(&sb->s_umount)) { + dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); + up_read(&sb->s_umount); + } schedule_delayed_work(&oinfo->dqi_sync_work, msecs_to_jiffies(oinfo->dqi_syncms)); } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index f56fe39fab04..5b9c0dfdb541 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -985,7 +985,6 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (!sb_has_quota_loaded(sb, type)) continue; - /* Cancel periodic syncing before we grab dqonoff_mutex */ oinfo = sb_dqinfo(sb, type)->dqi_priv; cancel_delayed_work_sync(&oinfo->dqi_sync_work); inode = igrab(sb->s_dquot.files[type]); From ee1ac541a2446b13b2525929e719e84b63d5e5da Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 23 Nov 2016 13:52:19 +0100 Subject: [PATCH 05/10] quota: Remove dqonoff_mutex from dquot_scan_active() All callers of dquot_scan_active() now hold s_umount so we can rely on that lock to protect us against quota state changes. Signed-off-by: Jan Kara --- fs/quota/dquot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 2a9dc3fb491c..d91aecc939c9 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -572,7 +572,8 @@ int dquot_scan_active(struct super_block *sb, struct dquot *dquot, *old_dquot = NULL; int ret = 0; - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); + WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount)); + spin_lock(&dq_list_lock); list_for_each_entry(dquot, &inuse_list, dq_inuse) { if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) @@ -603,7 +604,6 @@ int dquot_scan_active(struct super_block *sb, spin_unlock(&dq_list_lock); out: dqput(old_dquot); - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); return ret; } EXPORT_SYMBOL(dquot_scan_active); From 5f530de63cfc6ca8571cbdf58af63fb166cc6517 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 23 Nov 2016 14:35:26 +0100 Subject: [PATCH 06/10] ocfs2: Use s_umount for quota recovery protection Currently we use dqonoff_mutex to serialize quota recovery protection and turning of quotas on / off. Use s_umount semaphore instead. Tested-by: Eric Ren Signed-off-by: Jan Kara --- fs/ocfs2/quota_local.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 8a54fd8a4fa5..32c5a40c1257 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -454,7 +454,7 @@ out: /* Sync changes in local quota file into global quota file and * reinitialize local quota file. * The function expects local quota file to be already locked and - * dqonoff_mutex locked. */ + * s_umount locked in shared mode. */ static int ocfs2_recover_local_quota_file(struct inode *lqinode, int type, struct ocfs2_quota_recovery *rec) @@ -597,7 +597,7 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for " "slot %u\n", osb->dev_str, slot_num); - mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); + down_read(&sb->s_umount); for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (list_empty(&(rec->r_list[type]))) continue; @@ -674,7 +674,7 @@ out_put: break; } out: - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + up_read(&sb->s_umount); kfree(rec); return status; } @@ -840,7 +840,10 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) } ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); - /* dqonoff_mutex protects us against racing with recovery thread... */ + /* + * s_umount held in exclusive mode protects us against racing with + * recovery thread... + */ if (oinfo->dqi_rec) { ocfs2_free_quota_recovery(oinfo->dqi_rec); mark_clean = 0; From c3b004460d77bf3f980d877be539016f2df4df12 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 23 Nov 2016 14:04:55 +0100 Subject: [PATCH 07/10] quota: Remove dqonoff_mutex The only places that were grabbing dqonoff_mutex are functions turning quotas on and off and these are properly serialized using s_umount semaphore. Remove dqonoff_mutex. Signed-off-by: Jan Kara --- fs/quota/dquot.c | 80 +++++++++++++------------------------------ fs/super.c | 1 - include/linux/quota.h | 1 - 3 files changed, 23 insertions(+), 59 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index d91aecc939c9..550f78f2b7c7 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -119,8 +119,7 @@ * spinlock to internal buffers before writing. * * Lock ordering (including related VFS locks) is the following: - * dqonoff_mutex > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex - * dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc. + * s_umount > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock); @@ -933,7 +932,7 @@ static int dqinit_needed(struct inode *inode, int type) return 0; } -/* This routine is guarded by dqonoff_mutex mutex */ +/* This routine is guarded by s_umount semaphore */ static void add_dquot_ref(struct super_block *sb, int type) { struct inode *inode, *old_inode = NULL; @@ -2108,18 +2107,14 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) DQUOT_USAGE_ENABLED))) return -EINVAL; - /* We need to serialize quota_off() for device */ - mutex_lock(&dqopt->dqonoff_mutex); - /* * Skip everything if there's nothing to do. We have to do this because * sometimes we are called when fill_super() failed and calling * sync_fs() in such cases does no good. */ - if (!sb_any_quota_loaded(sb)) { - mutex_unlock(&dqopt->dqonoff_mutex); + if (!sb_any_quota_loaded(sb)) return 0; - } + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { toputinode[cnt] = NULL; if (type != -1 && cnt != type) @@ -2173,7 +2168,6 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) dqopt->info[cnt].dqi_bgrace = 0; dqopt->ops[cnt] = NULL; } - mutex_unlock(&dqopt->dqonoff_mutex); /* Skip syncing and setting flags if quota files are hidden */ if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) @@ -2191,19 +2185,13 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) * changes done by userspace on the next quotaon() */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) if (toputinode[cnt]) { - mutex_lock(&dqopt->dqonoff_mutex); - /* If quota was reenabled in the meantime, we have - * nothing to do */ - if (!sb_has_quota_loaded(sb, cnt)) { - inode_lock(toputinode[cnt]); - toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | + WARN_ON_ONCE(sb_has_quota_loaded(sb, cnt)); + inode_lock(toputinode[cnt]); + toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | S_NOATIME | S_NOQUOTA); - truncate_inode_pages(&toputinode[cnt]->i_data, - 0); - inode_unlock(toputinode[cnt]); - mark_inode_dirty_sync(toputinode[cnt]); - } - mutex_unlock(&dqopt->dqonoff_mutex); + truncate_inode_pages(&toputinode[cnt]->i_data, 0); + inode_unlock(toputinode[cnt]); + mark_inode_dirty_sync(toputinode[cnt]); } if (sb->s_bdev) invalidate_bdev(sb->s_bdev); @@ -2275,6 +2263,10 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, error = -EINVAL; goto out_fmt; } + if (sb_has_quota_loaded(sb, type)) { + error = -EBUSY; + goto out_fmt; + } if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { /* As we bypass the pagecache we must now flush all the @@ -2286,11 +2278,6 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, sync_filesystem(sb); invalidate_bdev(sb->s_bdev); } - mutex_lock(&dqopt->dqonoff_mutex); - if (sb_has_quota_loaded(sb, type)) { - error = -EBUSY; - goto out_lock; - } if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { /* We don't want quota and atime on quota files (deadlocks @@ -2311,7 +2298,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, error = -EIO; dqopt->files[type] = igrab(inode); if (!dqopt->files[type]) - goto out_lock; + goto out_file_flags; error = -EINVAL; if (!fmt->qf_ops->check_quota_file(sb, type)) goto out_file_init; @@ -2334,14 +2321,13 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, spin_unlock(&dq_state_lock); add_dquot_ref(sb, type); - mutex_unlock(&dqopt->dqonoff_mutex); return 0; out_file_init: dqopt->files[type] = NULL; iput(inode); -out_lock: +out_file_flags: if (oldflags != -1) { inode_lock(inode); /* Set the flags back (in the case of accidental quotaon() @@ -2350,7 +2336,6 @@ out_lock: inode->i_flags |= oldflags; inode_unlock(inode); } - mutex_unlock(&dqopt->dqonoff_mutex); out_fmt: put_quota_format(fmt); @@ -2372,12 +2357,9 @@ int dquot_resume(struct super_block *sb, int type) for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; - - mutex_lock(&dqopt->dqonoff_mutex); - if (!sb_has_quota_suspended(sb, cnt)) { - mutex_unlock(&dqopt->dqonoff_mutex); + if (!sb_has_quota_suspended(sb, cnt)) continue; - } + inode = dqopt->files[cnt]; dqopt->files[cnt] = NULL; spin_lock(&dq_state_lock); @@ -2386,7 +2368,6 @@ int dquot_resume(struct super_block *sb, int type) cnt); dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, cnt); spin_unlock(&dq_state_lock); - mutex_unlock(&dqopt->dqonoff_mutex); flags = dquot_generic_flag(flags, cnt); ret = vfs_load_quota_inode(inode, cnt, @@ -2422,9 +2403,7 @@ EXPORT_SYMBOL(dquot_quota_on); int dquot_enable(struct inode *inode, int type, int format_id, unsigned int flags) { - int ret = 0; struct super_block *sb = inode->i_sb; - struct quota_info *dqopt = sb_dqopt(sb); /* Just unsuspend quotas? */ BUG_ON(flags & DQUOT_SUSPENDED); @@ -2436,31 +2415,18 @@ int dquot_enable(struct inode *inode, int type, int format_id, return 0; /* Just updating flags needed? */ if (sb_has_quota_loaded(sb, type)) { - mutex_lock(&dqopt->dqonoff_mutex); - /* Now do a reliable test... */ - if (!sb_has_quota_loaded(sb, type)) { - mutex_unlock(&dqopt->dqonoff_mutex); - goto load_quota; - } if (flags & DQUOT_USAGE_ENABLED && - sb_has_quota_usage_enabled(sb, type)) { - ret = -EBUSY; - goto out_lock; - } + sb_has_quota_usage_enabled(sb, type)) + return -EBUSY; if (flags & DQUOT_LIMITS_ENABLED && - sb_has_quota_limits_enabled(sb, type)) { - ret = -EBUSY; - goto out_lock; - } + sb_has_quota_limits_enabled(sb, type)) + return -EBUSY; spin_lock(&dq_state_lock); sb_dqopt(sb)->flags |= dquot_state_flag(flags, type); spin_unlock(&dq_state_lock); -out_lock: - mutex_unlock(&dqopt->dqonoff_mutex); - return ret; + return 0; } -load_quota: return vfs_load_quota_inode(inode, type, format_id, flags); } EXPORT_SYMBOL(dquot_enable); diff --git a/fs/super.c b/fs/super.c index f7f724230e2b..1709ed029a2c 100644 --- a/fs/super.c +++ b/fs/super.c @@ -244,7 +244,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, mutex_init(&s->s_vfs_rename_mutex); lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); mutex_init(&s->s_dquot.dqio_mutex); - mutex_init(&s->s_dquot.dqonoff_mutex); s->s_maxbytes = MAX_NON_LFS; s->s_op = &default_op; s->s_time_gran = 1000000000; diff --git a/include/linux/quota.h b/include/linux/quota.h index 55107a8ff887..b281d198ee5b 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -520,7 +520,6 @@ static inline void quota_send_warning(struct kqid qid, dev_t dev, struct quota_info { unsigned int flags; /* Flags for diskquotas on this device */ struct mutex dqio_mutex; /* lock device while I/O in progress */ - struct mutex dqonoff_mutex; /* Serialize quotaon & quotaoff */ struct inode *files[MAXQUOTAS]; /* inodes of quotafiles */ struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */ const struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */ From b46dc033818d3293ecc49dc258e2efb603c80bd7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 6 Dec 2016 15:59:01 -0800 Subject: [PATCH 08/10] ext2: reject inodes with negative size Don't load an inode with a negative size; this causes integer overflow problems in the VFS. Signed-off-by: Darrick J. Wong Signed-off-by: Jan Kara --- fs/ext2/inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 41b8b44a391c..01f6d4be7f9a 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1476,6 +1476,10 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; else ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); + if (i_size_read(inode) < 0) { + ret = -EFSCORRUPTED; + goto bad_inode; + } ei->i_dtime = 0; inode->i_generation = le32_to_cpu(raw_inode->i_generation); ei->i_state = 0; From 5716863e0f8251d3360d4cbfc0e44e08007075df Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 12 Dec 2016 16:08:41 +0100 Subject: [PATCH 09/10] fsnotify: Fix possible use-after-free in inode iteration on umount fsnotify_unmount_inodes() plays complex tricks to pin next inode in the sb->s_inodes list when iterating over all inodes. Furthermore the code has a bug that if the current inode is the last on i_sb_list that does not have e.g. I_FREEING set, then we leave next_i pointing to inode which may get removed from the i_sb_list once we drop s_inode_list_lock thus resulting in use-after-free issues (usually manifesting as infinite looping in fsnotify_unmount_inodes()). Fix the problem by keeping current inode pinned somewhat longer. Then we can make the code much simpler and standard. CC: stable@vger.kernel.org Signed-off-by: Jan Kara --- fs/notify/inode_mark.c | 45 +++++++++--------------------------------- 1 file changed, 9 insertions(+), 36 deletions(-) diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 741077deef3b..a3645249f7ec 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -150,12 +150,10 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, */ void fsnotify_unmount_inodes(struct super_block *sb) { - struct inode *inode, *next_i, *need_iput = NULL; + struct inode *inode, *iput_inode = NULL; spin_lock(&sb->s_inode_list_lock); - list_for_each_entry_safe(inode, next_i, &sb->s_inodes, i_sb_list) { - struct inode *need_iput_tmp; - + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { /* * We cannot __iget() an inode in state I_FREEING, * I_WILL_FREE, or I_NEW which is fine because by that point @@ -178,49 +176,24 @@ void fsnotify_unmount_inodes(struct super_block *sb) continue; } - need_iput_tmp = need_iput; - need_iput = NULL; - - /* In case fsnotify_inode_delete() drops a reference. */ - if (inode != need_iput_tmp) - __iget(inode); - else - need_iput_tmp = NULL; + __iget(inode); spin_unlock(&inode->i_lock); - - /* In case the dropping of a reference would nuke next_i. */ - while (&next_i->i_sb_list != &sb->s_inodes) { - spin_lock(&next_i->i_lock); - if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) && - atomic_read(&next_i->i_count)) { - __iget(next_i); - need_iput = next_i; - spin_unlock(&next_i->i_lock); - break; - } - spin_unlock(&next_i->i_lock); - next_i = list_next_entry(next_i, i_sb_list); - } - - /* - * We can safely drop s_inode_list_lock here because either - * we actually hold references on both inode and next_i or - * end of list. Also no new inodes will be added since the - * umount has begun. - */ spin_unlock(&sb->s_inode_list_lock); - if (need_iput_tmp) - iput(need_iput_tmp); + if (iput_inode) + iput(iput_inode); /* for each watch, send FS_UNMOUNT and then remove it */ fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); fsnotify_inode_delete(inode); - iput(inode); + iput_inode = inode; spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); + + if (iput_inode) + iput(iput_inode); } From 2700e6067c72a99d1b7037692da0145ac44623c4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 19 Dec 2016 14:01:39 +0100 Subject: [PATCH 10/10] quota: Fix bogus warning in dquot_disable() dquot_disable() was warning when sb_has_quota_loaded() was true when invalidating page cache for quota files. The thinking behind this warning was that we must have raced with somebody else turning quotas on and this should not happen because all places modifying quota state must hold s_umount exclusively now. However sb_has_quota_loaded() can be also true at this point when we are just suspending quotas on remount read-only. Just restore the behavior to situation before commit c3b004460d77 ("quota: Remove dqonoff_mutex") which introduced the warning. The code in dquot_disable() can be further simplified with the new locking of quota state changes however let's leave that to a separate commit that can get more testing exposure. Fixes: c3b004460d77bf3f980d877be539016f2df4df12 Signed-off-by: Jan Kara --- fs/quota/dquot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 550f78f2b7c7..1ed5494aa773 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2184,8 +2184,8 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) * must also discard the blockdev buffers so that we see the * changes done by userspace on the next quotaon() */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if (toputinode[cnt]) { - WARN_ON_ONCE(sb_has_quota_loaded(sb, cnt)); + /* This can happen when suspending quotas on remount-ro... */ + if (toputinode[cnt] && !sb_has_quota_loaded(sb, cnt)) { inode_lock(toputinode[cnt]); toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | S_NOATIME | S_NOQUOTA);