From b842e240f27678aa5d71611cddc8d17a93fb0caf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 10 May 2007 19:02:07 -0400 Subject: [PATCH 01/17] locks: reverse order of posix_locks_conflict() arguments The first argument to posix_locks_conflict() is meant to be a lock request, and the second a lock from an inode's lock request. It doesn't really make a difference which order you call them in, since the only asymmetric test in posix_lock_conflict() is the check whether the second argument is a posix lock--and every caller already does that check for some reason. But may as well fix posix_test_lock() to call posix_locks_conflict() with the arguments in the same order as everywhere else. Signed-off-by: "J. Bruce Fields" --- fs/locks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/locks.c b/fs/locks.c index c795eaaf6c4c..51bae6227c25 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -668,7 +668,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { if (!IS_POSIX(cfl)) continue; - if (posix_locks_conflict(cfl, fl)) + if (posix_locks_conflict(fl, cfl)) break; } if (cfl) From 526985b9dd6ef7716b87f5fe6f0e2438ea3a89c7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 14 Nov 2006 16:54:36 -0500 Subject: [PATCH 02/17] locks: kill redundant local variable There's no need for another variable local to this loop; we can use the variable (of the same name!) already declared at the top of the function, and not used till later (at which point it's initialized, so this is safe). Signed-off-by: J. Bruce Fields --- fs/locks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/locks.c b/fs/locks.c index 51bae6227c25..efe1affe6bed 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -819,7 +819,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str lock_kernel(); if (request->fl_type != F_UNLCK) { for_each_lock(inode, before) { - struct file_lock *fl = *before; + fl = *before; if (!IS_POSIX(fl)) continue; if (!posix_locks_conflict(request, fl)) From 84d535ade62b6f8ce852745731ad6200c46b977c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 11 Sep 2007 16:38:13 +0400 Subject: [PATCH 03/17] Memory shortage can result in inconsistent flocks state When the flock_lock_file() is called to change the flock from F_RDLCK to F_WRLCK or vice versa the existing flock can be removed without appropriate warning. Look: for_each_lock(inode, before) { struct file_lock *fl = *before; if (IS_POSIX(fl)) break; if (IS_LEASE(fl)) continue; if (filp != fl->fl_file) continue; if (request->fl_type == fl->fl_type) goto out; found = 1; locks_delete_lock(before); <<<<<< ! break; } if after this point the subsequent locks_alloc_lock() will fail the return code will be -ENOMEM, but the existing lock is already removed. This is a known feature that such "re-locking" is not atomic, but in the racy case the file should stay locked (although by some other process), but in this case the file will be unlocked. The proposal is to prepare the lock in advance keeping no chance to fail in the future code. Found during making the flocks pid-namespaces aware. (Note: Thanks to Reuben Farrelly for finding a bug in an earlier version of this patch.) Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields Cc: Reuben Farrelly --- fs/locks.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index efe1affe6bed..6e22c8129a80 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -733,6 +733,15 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) lock_kernel(); if (request->fl_flags & FL_ACCESS) goto find_conflict; + + if (request->fl_type != F_UNLCK) { + error = -ENOMEM; + new_fl = locks_alloc_lock(); + if (new_fl == NULL) + goto out; + error = 0; + } + for_each_lock(inode, before) { struct file_lock *fl = *before; if (IS_POSIX(fl)) @@ -754,10 +763,6 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) goto out; } - error = -ENOMEM; - new_fl = locks_alloc_lock(); - if (new_fl == NULL) - goto out; /* * If a higher-priority process was blocked on the old file lock, * give it the opportunity to lock the file. From 02888f41e9d7fa95d1f5b2f76e0f0af6ea8198cc Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 12 Sep 2007 15:45:07 -0400 Subject: [PATCH 04/17] locks: fix flock_lock_file() comment This comment wasn't updated when lease support was added, and it makes essentially the same mistake that the code made before a recent bugfix. Signed-off-by: J. Bruce Fields --- fs/locks.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 6e22c8129a80..c7c69d29a576 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -715,8 +715,7 @@ next_task: } /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks - * at the head of the list, but that's secret knowledge known only to - * flock_lock_file and posix_lock_file. + * after any leases, but before any posix locks. * * Note that if called with an FL_EXISTS argument, the caller may determine * whether or not a lock was successfully freed by testing the return From f0c1cd0eaf0b127356c2c09e40305453bc361b0f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 19 Sep 2007 16:44:07 +0400 Subject: [PATCH 05/17] Use list_first_entry in locks_wake_up_blocks This routine deletes all the elements from the list with the "while (!list_empty())" loop, and we already have a list_first_entry() macro to help it look nicer :) Signed-off-by: Pavel Emelyanov --- fs/locks.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/locks.c b/fs/locks.c index c7c69d29a576..282b6c11670a 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -534,7 +534,9 @@ static void locks_insert_block(struct file_lock *blocker, static void locks_wake_up_blocks(struct file_lock *blocker) { while (!list_empty(&blocker->fl_block)) { - struct file_lock *waiter = list_entry(blocker->fl_block.next, + struct file_lock *waiter; + + waiter = list_first_entry(&blocker->fl_block, struct file_lock, fl_block); __locks_delete_block(waiter); if (waiter->fl_lmops && waiter->fl_lmops->fl_notify) From 85c59580b30c82aa771aa33b37217a6b6851bc14 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 20 Sep 2007 12:45:02 +0400 Subject: [PATCH 06/17] locks: Fix potential OOPS in generic_setlease() This code is run under lock_kernel(), which is dropped during sleeping operations, so the following race is possible: CPU1: CPU2: vfs_setlease(); vfs_setlease(); lock_kernel(); lock_kernel(); /* spin */ generic_setlease(): ... for (before = ...) /* here we found some lease after * which we will insert the new one */ fl = locks_alloc_lock(); /* go to sleep in this allocation and * drop the BKL */ generic_setlease(): ... for (before = ...) /* here we find the "before" pointing * at the one we found on CPU1 */ ->fl_change(my_before, arg); lease_modify(); locks_free_lock(); /* and we freed it */ ... unlock_kernel(); locks_insert_lock(before, fl); /* OOPS! We have just tried to add the lease * at the tail of already removed one */ The similar races are already handled in other code - all the allocations are performed before any checks/updates. Thanks to Kamalesh Babulal for testing and for a bug report on an earlier version. Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields Cc: Kamalesh Babulal --- fs/locks.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 282b6c11670a..43dbc7f566fa 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1343,6 +1343,7 @@ int fcntl_getlease(struct file *filp) int generic_setlease(struct file *filp, long arg, struct file_lock **flp) { struct file_lock *fl, **before, **my_before = NULL, *lease; + struct file_lock *new_fl = NULL; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; int error, rdlease_count = 0, wrlease_count = 0; @@ -1369,6 +1370,11 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) || (atomic_read(&inode->i_count) > 1))) goto out; + error = -ENOMEM; + new_fl = locks_alloc_lock(); + if (new_fl == NULL) + goto out; + /* * At this point, we know that if there is an exclusive * lease on this file, then we hold it on this filp @@ -1411,18 +1417,15 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) if (!leases_enable) goto out; - error = -ENOMEM; - fl = locks_alloc_lock(); - if (fl == NULL) - goto out; + locks_copy_lock(new_fl, lease); + locks_insert_lock(before, new_fl); - locks_copy_lock(fl, lease); + *flp = new_fl; + return 0; - locks_insert_lock(before, fl); - - *flp = fl; - error = 0; out: + if (new_fl != NULL) + locks_free_lock(new_fl); return error; } EXPORT_SYMBOL(generic_setlease); From 4f3b19ca41fbe572e3d44caf516c215b286fe2a6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 24 Sep 2007 18:52:09 -0400 Subject: [PATCH 07/17] Documentation: move mandatory locking documentation to filesystems/ Shouldn't this mandatory-locking documentation be in the Documentation/filesystems directory? Give it a more descriptive name while we're at it, and update 00-INDEX with a more inclusive description of Documentation/filesystems (which has already talked about more than just individual filesystems). Signed-off-by: J. Bruce Fields Acked-by: Randy Dunlap --- Documentation/00-INDEX | 4 +--- Documentation/filesystems/00-INDEX | 2 ++ .../mandatory-locking.txt} | 0 Documentation/locks.txt | 10 +++++----- 4 files changed, 8 insertions(+), 8 deletions(-) rename Documentation/{mandatory.txt => filesystems/mandatory-locking.txt} (100%) diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 43e89b1537d9..910473cb1c1c 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -145,7 +145,7 @@ fb/ feature-removal-schedule.txt - list of files and features that are going to be removed. filesystems/ - - directory with info on the various filesystems that Linux supports. + - info on the vfs and the various filesystems that Linux supports. firmware_class/ - request_firmware() hotplug interface info. floppy.txt @@ -240,8 +240,6 @@ m68k/ - directory with info about Linux on Motorola 68k architecture. magic-number.txt - list of magic numbers used to mark/protect kernel data structures. -mandatory.txt - - info on the Linux implementation of Sys V mandatory file locking. mca.txt - info on supporting Micro Channel Architecture (e.g. PS/2) systems. md.txt diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 59db1bca7027..e801076812a4 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -52,6 +52,8 @@ isofs.txt - info and mount options for the ISO 9660 (CDROM) filesystem. jfs.txt - info and mount options for the JFS filesystem. +mandatory-locking.txt + - info on the Linux implementation of Sys V mandatory file locking. ncpfs.txt - info on Novell Netware(tm) filesystem using NCP protocol. ntfs.txt diff --git a/Documentation/mandatory.txt b/Documentation/filesystems/mandatory-locking.txt similarity index 100% rename from Documentation/mandatory.txt rename to Documentation/filesystems/mandatory-locking.txt diff --git a/Documentation/locks.txt b/Documentation/locks.txt index e3b402ef33bd..fab857accbd6 100644 --- a/Documentation/locks.txt +++ b/Documentation/locks.txt @@ -53,11 +53,11 @@ fcntl(), with all the problems that implies. 1.3 Mandatory Locking As A Mount Option --------------------------------------- -Mandatory locking, as described in 'Documentation/mandatory.txt' was prior -to this release a general configuration option that was valid for all -mounted filesystems. This had a number of inherent dangers, not the least -of which was the ability to freeze an NFS server by asking it to read a -file for which a mandatory lock existed. +Mandatory locking, as described in 'Documentation/filesystems/mandatory.txt' +was prior to this release a general configuration option that was valid for +all mounted filesystems. This had a number of inherent dangers, not the +least of which was the ability to freeze an NFS server by asking it to read +a file for which a mandatory lock existed. From this release of the kernel, mandatory locking can be turned on and off on a per-filesystem basis, using the mount options 'mand' and 'nomand'. From 9efa68ed079af97f4e9477eadef567ffe64f7afc Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 25 Sep 2007 11:57:19 -0400 Subject: [PATCH 08/17] locks: add warning about mandatory locking races The mandatory file locking implementation has long-standing races that probably render it useless. I know of no plans to fix them. Till we do, we should at least warn people. Signed-off-by: J. Bruce Fields --- .../filesystems/mandatory-locking.txt | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Documentation/filesystems/mandatory-locking.txt b/Documentation/filesystems/mandatory-locking.txt index bc449d49eee5..0979d1d2ca8b 100644 --- a/Documentation/filesystems/mandatory-locking.txt +++ b/Documentation/filesystems/mandatory-locking.txt @@ -3,7 +3,26 @@ Andy Walker 15 April 1996 + (Updated September 2007) +0. Why you should avoid mandatory locking +----------------------------------------- + +The Linux implementation is prey to a number of difficult-to-fix race +conditions which in practice make it not dependable: + + - The write system call checks for a mandatory lock only once + at its start. It is therefore possible for a lock request to + be granted after this check but before the data is modified. + A process may then see file data change even while a mandatory + lock was held. + - Similarly, an exclusive lock may be granted on a file after + the kernel has decided to proceed with a read, but before the + read has actually completed, and the reading process may see + the file data in a state which should not have been visible + to it. + - Similar races make the claimed mutual exclusion between lock + and mmap similarly unreliable. 1. What is mandatory locking? ------------------------------ From 98257af5a2ad0c5b502ebd07094d9fd8ce87acef Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 30 Sep 2007 22:18:55 -0400 Subject: [PATCH 09/17] Documentation: move locks.txt in filesystems/ This documentation (about file locking) belongs in filesystems/. Signed-off-by: J. Bruce Fields --- Documentation/00-INDEX | 2 -- Documentation/filesystems/00-INDEX | 2 ++ Documentation/{ => filesystems}/locks.txt | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename Documentation/{ => filesystems}/locks.txt (100%) diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 910473cb1c1c..cc10ce7dc339 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -230,8 +230,6 @@ local_ops.txt - semantics and behavior of local atomic operations. lockdep-design.txt - documentation on the runtime locking correctness validator. -locks.txt - - info on file locking implementations, flock() vs. fcntl(), etc. logo.gif - full colour GIF image of Linux logo (penguin - Tux). logo.txt diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index e801076812a4..599593a17067 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -52,6 +52,8 @@ isofs.txt - info and mount options for the ISO 9660 (CDROM) filesystem. jfs.txt - info and mount options for the JFS filesystem. +locks.txt + - info on file locking implementations, flock() vs. fcntl(), etc. mandatory-locking.txt - info on the Linux implementation of Sys V mandatory file locking. ncpfs.txt diff --git a/Documentation/locks.txt b/Documentation/filesystems/locks.txt similarity index 100% rename from Documentation/locks.txt rename to Documentation/filesystems/locks.txt From a16877ca9cec211708a161057a7cbfbf2cbc3a53 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 1 Oct 2007 14:41:11 -0700 Subject: [PATCH 10/17] Cleanup macros for distinguishing mandatory locks The combination of S_ISGID bit set and S_IXGRP bit unset is used to mark the inode as "mandatory lockable" and there's a macro for this check called MANDATORY_LOCK(inode). However, fs/locks.c and some filesystems still perform the explicit i_mode checking. Besides, Andrew pointed out, that this macro is buggy itself, as it dereferences the inode arg twice. Convert this macro into static inline function and switch its users to it, making the code shorter and more readable. The __mandatory_lock() helper is to be used in places where the IS_MANDLOCK() for superblock is already known to be true. Signed-off-by: Pavel Emelyanov Cc: Trond Myklebust Cc: "J. Bruce Fields" Cc: David Howells Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Cc: Steven Whitehouse Signed-off-by: Andrew Morton --- fs/locks.c | 14 ++++---------- fs/nfsd/nfs4state.c | 2 +- fs/nfsd/vfs.c | 2 +- fs/read_write.c | 2 +- include/linux/fs.h | 21 +++++++++++++++++---- 5 files changed, 24 insertions(+), 17 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 43dbc7f566fa..9a3fe0d8285b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1119,7 +1119,7 @@ int locks_mandatory_area(int read_write, struct inode *inode, * If we've been sleeping someone might have * changed the permissions behind our back. */ - if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + if (__mandatory_lock(inode)) continue; } @@ -1761,9 +1761,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, /* Don't allow mandatory locks on files that may be memory mapped * and shared. */ - if (IS_MANDLOCK(inode) && - (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && - mapping_writably_mapped(filp->f_mapping)) { + if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { error = -EAGAIN; goto out; } @@ -1887,9 +1885,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, /* Don't allow mandatory locks on files that may be memory mapped * and shared. */ - if (IS_MANDLOCK(inode) && - (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && - mapping_writably_mapped(filp->f_mapping)) { + if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { error = -EAGAIN; goto out; } @@ -2083,9 +2079,7 @@ static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx) out += sprintf(out, "%6s %s ", (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", (inode == NULL) ? "*NOINODE*" : - (IS_MANDLOCK(inode) && - (inode->i_mode & (S_IXGRP | S_ISGID)) == S_ISGID) ? - "MANDATORY" : "ADVISORY "); + mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); } else if (IS_FLOCK(fl)) { if (fl->fl_type & LOCK_MAND) { out += sprintf(out, "FLOCK MSNFS "); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3f559700788f..3c028b9c6e0e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2044,7 +2044,7 @@ static inline int io_during_grace_disallowed(struct inode *inode, int flags) { return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE)) - && MANDATORY_LOCK(inode); + && mandatory_lock(inode); } /* diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7867151ebb83..9152f87eea18 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -65,7 +65,7 @@ * locks on them because there is no way to know if the accesser has * the lock. */ -#define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && MANDATORY_LOCK(i)) +#define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && mandatory_lock(i)) /* * This is a cache of readahead params that help us choose the proper diff --git a/fs/read_write.c b/fs/read_write.c index 507ddff48a9a..124693e8d3fa 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -205,7 +205,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) goto Einval; - if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) { + if (unlikely(inode->i_flock && mandatory_lock(inode))) { int retval = locks_mandatory_area( read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, inode, file, pos, count); diff --git a/include/linux/fs.h b/include/linux/fs.h index 16421f662a7a..f5075e0e7301 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1369,12 +1369,25 @@ extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size * Candidates for mandatory locking have the setgid bit set * but no group execute bit - an otherwise meaningless combination. */ -#define MANDATORY_LOCK(inode) \ - (IS_MANDLOCK(inode) && ((inode)->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + +static inline int __mandatory_lock(struct inode *ino) +{ + return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; +} + +/* + * ... and these candidates should be on MS_MANDLOCK mounted fs, + * otherwise these will be advisory locks + */ + +static inline int mandatory_lock(struct inode *ino) +{ + return IS_MANDLOCK(ino) && __mandatory_lock(ino); +} static inline int locks_verify_locked(struct inode *inode) { - if (MANDATORY_LOCK(inode)) + if (mandatory_lock(inode)) return locks_mandatory_locked(inode); return 0; } @@ -1385,7 +1398,7 @@ static inline int locks_verify_truncate(struct inode *inode, struct file *filp, loff_t size) { - if (inode->i_flock && MANDATORY_LOCK(inode)) + if (inode->i_flock && mandatory_lock(inode)) return locks_mandatory_area( FLOCK_VERIFY_WRITE, inode, filp, size < inode->i_size ? size : inode->i_size, From 7afaac6202782ec28f2039503bdaef666834d60c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 1 Oct 2007 14:41:13 -0700 Subject: [PATCH 11/17] GFS2: clean up explicit check for mandatory locks The __mandatory_lock(inode) function makes the same check, but makes the code more readable. Signed-off-by: Pavel Emelyanov Cc: Steven Whitehouse Signed-off-by: Andrew Morton --- fs/gfs2/ops_file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 94d76ace0b95..28773cab4a3d 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -535,7 +535,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; - if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + if (__mandatory_lock(&ip->i_inode)) return -ENOLCK; if (sdp->sd_args.ar_localflocks) { @@ -637,7 +637,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; - if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + if (__mandatory_lock(&ip->i_inode)) return -ENOLCK; if (sdp->sd_args.ar_localflocks) From 66abe5f257e719547744fdb8691cf5d20603f051 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 1 Oct 2007 14:41:13 -0700 Subject: [PATCH 12/17] 9PFS: clean up explicit check for mandatory locks The __mandatory_lock(inode) macro makes the same check, but makes the code more readable. Signed-off-by: Pavel Emelyanov Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Andrew Morton --- fs/9p/vfs_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 2a40c2946d0a..716691689fd5 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -105,7 +105,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) P9_DPRINTK(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); /* No mandatory locks */ - if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + if (__mandatory_lock(inode)) return -ENOLCK; if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { From fc5846e555177c2ae01bcded7fddf60cb10dcfd0 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 1 Oct 2007 14:41:14 -0700 Subject: [PATCH 13/17] AFS: clean up explicit check for mandatory locks The __mandatory_lock(inode) macro makes the same check, but makes the code more readable. Signed-off-by: Pavel Emelyanov Cc: David Howells Signed-off-by: Andrew Morton --- fs/afs/flock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/afs/flock.c b/fs/afs/flock.c index af6952e39a18..210acafe4a9b 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -524,8 +524,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl) (long long) fl->fl_start, (long long) fl->fl_end); /* AFS doesn't support mandatory locks */ - if ((vnode->vfs_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && - fl->fl_type != F_UNLCK) + if (__mandatory_lock(&vnode->vfs_inode) && fl->fl_type != F_UNLCK) return -ENOLCK; if (IS_GETLK(cmd)) From dfad9441be82f1eadc3fa3f1bbc93f93d48d1bdf Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 1 Oct 2007 14:41:15 -0700 Subject: [PATCH 14/17] NFS: clean up explicit check for mandatory locks The __mandatory_lock(inode) macro makes the same check, but makes the code more readable. Signed-off-by: Pavel Emelyanov Cc: Trond Myklebust Cc: "J. Bruce Fields" Signed-off-by: Andrew Morton --- fs/nfs/file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 579cf8a7d4a7..9c98ccbf9de0 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -522,8 +522,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) nfs_inc_stats(inode, NFSIOS_VFSLOCK); /* No mandatory locks over NFS */ - if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && - fl->fl_type != F_UNLCK) + if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) return -ENOLCK; if (IS_GETLK(cmd)) From 094f2825218fec1b240cb8537d2d0a10edf5ddc9 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 2 Oct 2007 11:21:34 -0700 Subject: [PATCH 15/17] fs/locks.c: use list_for_each_entry() instead of list_for_each() fs/locks.c: use list_for_each_entry() instead of list_for_each() in posix_locks_deadlock() and get_locks_status() Signed-off-by: Matthias Kaehlcke Signed-off-by: Andrew Morton --- fs/locks.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 9a3fe0d8285b..c17bc00b1e8d 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -700,13 +700,12 @@ EXPORT_SYMBOL(posix_test_lock); static int posix_locks_deadlock(struct file_lock *caller_fl, struct file_lock *block_fl) { - struct list_head *tmp; + struct file_lock *fl; next_task: if (posix_same_owner(caller_fl, block_fl)) return 1; - list_for_each(tmp, &blocked_list) { - struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + list_for_each_entry(fl, &blocked_list, fl_link) { if (posix_same_owner(fl, block_fl)) { fl = fl->fl_next; block_fl = fl; @@ -2164,24 +2163,22 @@ static void move_lock_status(char **p, off_t* pos, off_t offset) int get_locks_status(char *buffer, char **start, off_t offset, int length) { - struct list_head *tmp; + struct file_lock *fl; char *q = buffer; off_t pos = 0; int i = 0; lock_kernel(); - list_for_each(tmp, &file_lock_list) { - struct list_head *btmp; - struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + list_for_each_entry(fl, &file_lock_list, fl_link) { + struct file_lock *bfl; + lock_get_status(q, fl, ++i, ""); move_lock_status(&q, &pos, offset); if(pos >= offset+length) goto done; - list_for_each(btmp, &fl->fl_block) { - struct file_lock *bfl = list_entry(btmp, - struct file_lock, fl_block); + list_for_each_entry(bfl, &fl->fl_block, fl_block) { lock_get_status(q, bfl, i, " ->"); move_lock_status(&q, &pos, offset); From 7f8ada98d9edd83d6ebd01e431e15b024a4a3dc4 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 1 Oct 2007 14:41:15 -0700 Subject: [PATCH 16/17] Rework /proc/locks via seq_files and seq_list helpers Currently /proc/locks is shown with a proc_read function, but its behavior is rather complex as it has to manually handle current offset and buffer length. On the other hand, files that show objects from lists can be easily reimplemented using the sequential files and the seq_list_XXX() helpers. This saves (as usually) 16 lines of code and more than 200 from the .text section. [akpm@linux-foundation.org: no externs in C] [akpm@linux-foundation.org: warning fixes] Signed-off-by: Pavel Emelyanov Cc: "J. Bruce Fields" Cc: Trond Myklebust Signed-off-by: Andrew Morton --- fs/locks.c | 130 +++++++++++++++++++------------------------- fs/proc/proc_misc.c | 19 ++++--- include/linux/fs.h | 1 + 3 files changed, 66 insertions(+), 84 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index c17bc00b1e8d..7f9a3ea47418 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2066,134 +2066,114 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) EXPORT_SYMBOL_GPL(vfs_cancel_lock); -static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx) +#ifdef CONFIG_PROC_FS +#include + +static void lock_get_status(struct seq_file *f, struct file_lock *fl, + int id, char *pfx) { struct inode *inode = NULL; if (fl->fl_file != NULL) inode = fl->fl_file->f_path.dentry->d_inode; - out += sprintf(out, "%d:%s ", id, pfx); + seq_printf(f, "%d:%s ", id, pfx); if (IS_POSIX(fl)) { - out += sprintf(out, "%6s %s ", + seq_printf(f, "%6s %s ", (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", (inode == NULL) ? "*NOINODE*" : mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); } else if (IS_FLOCK(fl)) { if (fl->fl_type & LOCK_MAND) { - out += sprintf(out, "FLOCK MSNFS "); + seq_printf(f, "FLOCK MSNFS "); } else { - out += sprintf(out, "FLOCK ADVISORY "); + seq_printf(f, "FLOCK ADVISORY "); } } else if (IS_LEASE(fl)) { - out += sprintf(out, "LEASE "); + seq_printf(f, "LEASE "); if (fl->fl_type & F_INPROGRESS) - out += sprintf(out, "BREAKING "); + seq_printf(f, "BREAKING "); else if (fl->fl_file) - out += sprintf(out, "ACTIVE "); + seq_printf(f, "ACTIVE "); else - out += sprintf(out, "BREAKER "); + seq_printf(f, "BREAKER "); } else { - out += sprintf(out, "UNKNOWN UNKNOWN "); + seq_printf(f, "UNKNOWN UNKNOWN "); } if (fl->fl_type & LOCK_MAND) { - out += sprintf(out, "%s ", + seq_printf(f, "%s ", (fl->fl_type & LOCK_READ) ? (fl->fl_type & LOCK_WRITE) ? "RW " : "READ " : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE "); } else { - out += sprintf(out, "%s ", + seq_printf(f, "%s ", (fl->fl_type & F_INPROGRESS) ? (fl->fl_type & F_UNLCK) ? "UNLCK" : "READ " : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ "); } if (inode) { #ifdef WE_CAN_BREAK_LSLK_NOW - out += sprintf(out, "%d %s:%ld ", fl->fl_pid, + seq_printf(f, "%d %s:%ld ", fl->fl_pid, inode->i_sb->s_id, inode->i_ino); #else /* userspace relies on this representation of dev_t ;-( */ - out += sprintf(out, "%d %02x:%02x:%ld ", fl->fl_pid, + seq_printf(f, "%d %02x:%02x:%ld ", fl->fl_pid, MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino); #endif } else { - out += sprintf(out, "%d :0 ", fl->fl_pid); + seq_printf(f, "%d :0 ", fl->fl_pid); } if (IS_POSIX(fl)) { if (fl->fl_end == OFFSET_MAX) - out += sprintf(out, "%Ld EOF\n", fl->fl_start); + seq_printf(f, "%Ld EOF\n", fl->fl_start); else - out += sprintf(out, "%Ld %Ld\n", fl->fl_start, - fl->fl_end); + seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end); } else { - out += sprintf(out, "0 EOF\n"); + seq_printf(f, "0 EOF\n"); } } -static void move_lock_status(char **p, off_t* pos, off_t offset) +static int locks_show(struct seq_file *f, void *v) { - int len; - len = strlen(*p); - if(*pos >= offset) { - /* the complete line is valid */ - *p += len; - *pos += len; - return; - } - if(*pos+len > offset) { - /* use the second part of the line */ - int i = offset-*pos; - memmove(*p,*p+i,len-i); - *p += len-i; - *pos += len; - return; - } - /* discard the complete line */ - *pos += len; + struct file_lock *fl, *bfl; + + fl = list_entry(v, struct file_lock, fl_link); + + lock_get_status(f, fl, (long)f->private, ""); + + list_for_each_entry(bfl, &fl->fl_block, fl_block) + lock_get_status(f, bfl, (long)f->private, " ->"); + + f->private++; + return 0; } -/** - * get_locks_status - reports lock usage in /proc/locks - * @buffer: address in userspace to write into - * @start: ? - * @offset: how far we are through the buffer - * @length: how much to read - */ - -int get_locks_status(char *buffer, char **start, off_t offset, int length) +static void *locks_start(struct seq_file *f, loff_t *pos) { - struct file_lock *fl; - char *q = buffer; - off_t pos = 0; - int i = 0; - lock_kernel(); - list_for_each_entry(fl, &file_lock_list, fl_link) { - struct file_lock *bfl; - - lock_get_status(q, fl, ++i, ""); - move_lock_status(&q, &pos, offset); - - if(pos >= offset+length) - goto done; - - list_for_each_entry(bfl, &fl->fl_block, fl_block) { - lock_get_status(q, bfl, i, " ->"); - move_lock_status(&q, &pos, offset); - - if(pos >= offset+length) - goto done; - } - } -done: - unlock_kernel(); - *start = buffer; - if(q-buffer < length) - return (q-buffer); - return length; + f->private = (void *)1; + return seq_list_start(&file_lock_list, *pos); } +static void *locks_next(struct seq_file *f, void *v, loff_t *pos) +{ + return seq_list_next(v, &file_lock_list, pos); +} + +static void locks_stop(struct seq_file *f, void *v) +{ + unlock_kernel(); +} + +struct seq_operations locks_seq_operations = { + .start = locks_start, + .next = locks_next, + .stop = locks_stop, + .show = locks_show, +}; +#endif + /** * lock_may_read - checks that the region is free of locks * @inode: the inode that is being read diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index bee251cb87c8..c9d6d5f400ad 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -66,7 +66,6 @@ extern int get_stram_list(char *); extern int get_filesystem_list(char *); extern int get_exec_domain_list(char *); extern int get_dma_list(char *); -extern int get_locks_status (char *, char **, off_t, int); static int proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof, int len) @@ -617,16 +616,18 @@ static int cmdline_read_proc(char *page, char **start, off_t off, return proc_calc_metrics(page, start, off, count, eof, len); } -static int locks_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int locks_open(struct inode *inode, struct file *filp) { - int len = get_locks_status(page, start, off, count); - - if (len < count) - *eof = 1; - return len; + return seq_open(filp, &locks_seq_operations); } +static const struct file_operations proc_locks_operations = { + .open = locks_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static int execdomains_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -684,7 +685,6 @@ void __init proc_misc_init(void) #endif {"filesystems", filesystems_read_proc}, {"cmdline", cmdline_read_proc}, - {"locks", locks_read_proc}, {"execdomains", execdomains_read_proc}, {NULL,} }; @@ -702,6 +702,7 @@ void __init proc_misc_init(void) entry->proc_fops = &proc_kmsg_operations; } #endif + create_seq_entry("locks", 0, &proc_locks_operations); create_seq_entry("devices", 0, &proc_devinfo_operations); create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); #ifdef CONFIG_BLOCK diff --git a/include/linux/fs.h b/include/linux/fs.h index f5075e0e7301..4f1e8cebea78 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -883,6 +883,7 @@ extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); +extern struct seq_operations locks_seq_operations; struct fasync_struct { int magic; From 5e7fc436426b1f9e106f511a049de91c82ec2c53 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 2 Oct 2007 14:18:12 -0400 Subject: [PATCH 17/17] nfsd: remove IS_ISMNDLCK macro This macro is only used in one place; in this place it seems simpler to put open-code it and move the comment to where it's used. Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 9152f87eea18..085ded6f6d3a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -61,12 +61,6 @@ #define NFSDDBG_FACILITY NFSDDBG_FILEOP -/* We must ignore files (but only files) which might have mandatory - * locks on them because there is no way to know if the accesser has - * the lock. - */ -#define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && mandatory_lock(i)) - /* * This is a cache of readahead params that help us choose the proper * readahead strategy. Initially, we set all readahead parameters to 0 @@ -680,7 +674,12 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, err = nfserr_perm; if (IS_APPEND(inode) && (access & MAY_WRITE)) goto out; - if (IS_ISMNDLK(inode)) + /* + * We must ignore files (but only files) which might have mandatory + * locks on them because there is no way to know if the accesser has + * the lock. + */ + if (S_ISREG((inode)->i_mode) && mandatory_lock(inode)) goto out; if (!inode->i_fop)