vfs: Separate changing mount flags full remount

Separate just the changing of mount flags (MS_REMOUNT|MS_BIND) from full remount because the mount data will get parsed with the new fs_context stuff prior to doing a remount - and this causes the syscall to fail under some circumstances. To quote Eric's explanation: [...] mount(..., MS_REMOUNT|MS_BIND, ...) now validates the mount options string, which breaks systemd unit files with ProtectControlGroups=yes (e.g. systemd-networkd.service) when systemd does the following to change a cgroup (v1) mount to read-only: mount(NULL, "/run/systemd/unit-root/sys/fs/cgroup/systemd", NULL, MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_REMOUNT|MS_BIND, NULL) ... when the kernel has CONFIG_CGROUPS=y but no cgroup subsystems enabled, since in that case the error "cgroup1: Need name or subsystem set" is hit when the mount options string is empty. Probably it doesn't make sense to validate the mount options string at all in the MS_REMOUNT|MS_BIND case, though maybe you had something else in mind. This is also worthwhile doing because we will need to add a mount_setattr() syscall to take over the remount-bind function. Reported-by: Eric Biggers <ebiggers@google.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Reviewed-by: David Howells <dhowells@redhat.com>
2018-11-01 23:07:25 +00:00 · 2018-11-01 23:07:25 +00:00 · 43f5e655ef
--- a/fs/namespace.c
+++ b/fs/namespace.c
@ -246,13 +246,9 @@ out_free_cache:
 * mnt_want/drop_write() will _keep_ the filesystem
 * r/w.
 */
-int __mnt_is_readonly(struct vfsmount *mnt)
+bool __mnt_is_readonly(struct vfsmount *mnt)
 {
-	if (mnt->mnt_flags & MNT_READONLY)
+	return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
 		return 1;
 	if (sb_rdonly(mnt->mnt_sb))
 		return 1;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(__mnt_is_readonly);
@ -508,11 +504,12 @@ static int mnt_make_readonly(struct mount *mnt)
 	return ret;
 }
-static void __mnt_unmake_readonly(struct mount *mnt)
+static int __mnt_unmake_readonly(struct mount *mnt)
 {
 	lock_mount_hash();
 	mnt->mnt.mnt_flags &= ~MNT_READONLY;
 	unlock_mount_hash();
 	return 0;
 }
 int sb_prepare_remount_readonly(struct super_block *sb)
@ -2204,21 +2201,91 @@ out:
 	return err;
 }
-static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
+/*
 * Don't allow locked mount flags to be cleared.
 *
 * No locks need to be held here while testing the various MNT_LOCK
 * flags because those flags can never be cleared once they are set.
 */
 static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
 {
-	int error = 0;
+	unsigned int fl = mnt->mnt.mnt_flags;
 	int readonly_request = 0;
-	if (ms_flags & MS_RDONLY)
+	if ((fl & MNT_LOCK_READONLY) &&
-		readonly_request = 1;
+	    !(mnt_flags & MNT_READONLY))
-	if (readonly_request == __mnt_is_readonly(mnt))
+		return false;
 	if ((fl & MNT_LOCK_NODEV) &&
 	    !(mnt_flags & MNT_NODEV))
 		return false;
 	if ((fl & MNT_LOCK_NOSUID) &&
 	    !(mnt_flags & MNT_NOSUID))
 		return false;
 	if ((fl & MNT_LOCK_NOEXEC) &&
 	    !(mnt_flags & MNT_NOEXEC))
 		return false;
 	if ((fl & MNT_LOCK_ATIME) &&
 	    ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
 		return false;
 	return true;
 }
 static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
 {
 	bool readonly_request = (mnt_flags & MNT_READONLY);
 	if (readonly_request == __mnt_is_readonly(&mnt->mnt))
 		return 0;
 	if (readonly_request)
-		error = mnt_make_readonly(real_mount(mnt));
+		return mnt_make_readonly(mnt);
-	else
+
-		__mnt_unmake_readonly(real_mount(mnt));
+	return __mnt_unmake_readonly(mnt);
-	return error;
+}
 /*
 * Update the user-settable attributes on a mount.  The caller must hold
 * sb->s_umount for writing.
 */
 static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
 {
 	lock_mount_hash();
 	mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
 	mnt->mnt.mnt_flags = mnt_flags;
 	touch_mnt_namespace(mnt->mnt_ns);
 	unlock_mount_hash();
 }
 /*
 * Handle reconfiguration of the mountpoint only without alteration of the
 * superblock it refers to.  This is triggered by specifying MS_REMOUNT|MS_BIND
 * to mount(2).
 */
 static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
 {
 	struct super_block *sb = path->mnt->mnt_sb;
 	struct mount *mnt = real_mount(path->mnt);
 	int ret;
 	if (!check_mnt(mnt))
 		return -EINVAL;
 	if (path->dentry != mnt->mnt.mnt_root)
 		return -EINVAL;
 	if (!can_change_locked_flags(mnt, mnt_flags))
 		return -EPERM;
 	down_write(&sb->s_umount);
 	ret = change_mount_ro_state(mnt, mnt_flags);
 	if (ret == 0)
 		set_mount_attributes(mnt, mnt_flags);
 	up_write(&sb->s_umount);
 	return ret;
 }
 /*
@ -2239,50 +2306,19 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
 	if (path->dentry != path->mnt->mnt_root)
 		return -EINVAL;
-	/* Don't allow changing of locked mnt flags.
+	if (!can_change_locked_flags(mnt, mnt_flags))
 	 *
 	 * No locks need to be held here while testing the various
 	 * MNT_LOCK flags because those flags can never be cleared
 	 * once they are set.
 	 */
 	if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
 	    !(mnt_flags & MNT_READONLY)) {
 		return -EPERM;
 	}
 	if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
 	    !(mnt_flags & MNT_NODEV)) {
 		return -EPERM;
 	}
 	if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
 	    !(mnt_flags & MNT_NOSUID)) {
 		return -EPERM;
 	}
 	if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
 	    !(mnt_flags & MNT_NOEXEC)) {
 		return -EPERM;
 	}
 	if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
 	    ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
 		return -EPERM;
 	}
 	err = security_sb_remount(sb, data);
 	if (err)
 		return err;
 	down_write(&sb->s_umount);
 	if (ms_flags & MS_BIND)
 		err = change_mount_flags(path->mnt, ms_flags);
 	else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
 	err = -EPERM;
-	else
+	if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
 		err = do_remount_sb(sb, sb_flags, data, 0);
-	if (!err) {
+		if (!err)
-		lock_mount_hash();
+			set_mount_attributes(mnt, mnt_flags);
 		mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
 		mnt->mnt.mnt_flags = mnt_flags;
 		touch_mnt_namespace(mnt->mnt_ns);
 		unlock_mount_hash();
 	}
 	up_write(&sb->s_umount);
 	return err;
@ -2777,7 +2813,9 @@ long do_mount(const char *dev_name, const char __user *dir_name,
 			    SB_LAZYTIME |
 			    SB_I_VERSION);
-	if (flags & MS_REMOUNT)
+	if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
 		retval = do_reconfigure_mnt(&path, mnt_flags);
 	else if (flags & MS_REMOUNT)
 		retval = do_remount(&path, flags, sb_flags, mnt_flags,
 				    data_page);
 	else if (flags & MS_BIND)
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@ -81,7 +81,7 @@ extern void mnt_drop_write_file(struct file *file);
 extern void mntput(struct vfsmount *mnt);
 extern struct vfsmount *mntget(struct vfsmount *mnt);
 extern struct vfsmount *mnt_clone_internal(const struct path *path);
-extern int __mnt_is_readonly(struct vfsmount *mnt);
+extern bool __mnt_is_readonly(struct vfsmount *mnt);
 extern bool mnt_may_suid(struct vfsmount *mnt);
 struct path;