fsnotify: mount point listeners list and global mask

currently all of the notification systems implemented select which inodes
they care about and receive messages only about those inodes (or the
children of those inodes.)  This patch begins to flesh out fsnotify support
for the concept of listeners that want to hear notification for an inode
accessed below a given monut point.  This patch implements a second list
of fsnotify groups to hold these types of groups and a second global mask
to hold the events of interest for this type of group.

The reason we want a second group list and mask is because the inode based
notification should_send_event support which makes each group look for a mark
on the given inode.  With one nfsmount listener that means that every group would
have to take the inode->i_lock, look for their mark, not find one, and return
for every operation.   By seperating vfsmount from inode listeners only when
there is a inode listener will the inode groups have to look for their
mark and take the inode lock.  vfsmount listeners will have to grab the lock and
look for a mark but there should be fewer of them, and one vfsmount listener
won't cause the i_lock to be grabbed and released for every fsnotify group
on every io operation.

Signed-off-by: Eric Paris <eparis@redhat.com>
This commit is contained in:
Eric Paris 2009-12-17 21:24:23 -05:00
Родитель 4ca763523e
Коммит 7131485a93
5 изменённых файлов: 117 добавлений и 26 удалений

Просмотреть файл

@ -21,6 +21,7 @@
#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/srcu.h>
#include <linux/fsnotify_backend.h>
@ -134,6 +135,45 @@ void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
}
EXPORT_SYMBOL_GPL(__fsnotify_parent);
static void send_to_group(__u32 mask,
struct fsnotify_group *group,
void *data, int data_is, const char *file_name,
u32 cookie, struct fsnotify_event **event,
struct inode *to_tell)
{
if (!group->ops->should_send_event(group, to_tell, mask,
data, data_is))
return;
if (!*event) {
*event = fsnotify_create_event(to_tell, mask, data,
data_is, file_name,
cookie, GFP_KERNEL);
/*
* shit, we OOM'd and now we can't tell, maybe
* someday someone else will want to do something
* here
*/
if (!*event)
return;
}
group->ops->handle_event(group, *event);
}
static bool needed_by_vfsmount(__u32 test_mask, void *data, int data_is)
{
struct path *path;
if (data_is == FSNOTIFY_EVENT_PATH)
path = (struct path *)data;
else if (data_is == FSNOTIFY_EVENT_FILE)
path = &((struct file *)data)->f_path;
else
return false;
/* hook in this when mnt->mnt_fsnotify_mask is defined */
/* return (test_mask & path->mnt->mnt_fsnotify_mask); */
return false;
}
/*
* This is the main call to fsnotify. The VFS calls into hook specific functions
* in linux/fsnotify.h. Those functions then in turn call here. Here will call
@ -148,38 +188,46 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
/* global tests shouldn't care about events on child only the specific event */
__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
if (list_empty(&fsnotify_inode_groups))
return;
/* if no fsnotify listeners, nothing to do */
if (list_empty(&fsnotify_inode_groups) &&
list_empty(&fsnotify_vfsmount_groups))
return;
if (!(test_mask & fsnotify_inode_mask))
return;
/* if none of the directed listeners or vfsmount listeners care */
if (!(test_mask & fsnotify_inode_mask) &&
!(test_mask & fsnotify_vfsmount_mask))
return;
/* if this inode's directed listeners don't care and nothing on the vfsmount
* listeners list cares, nothing to do */
if (!(test_mask & to_tell->i_fsnotify_mask) &&
!needed_by_vfsmount(test_mask, data, data_is))
return;
if (!(test_mask & to_tell->i_fsnotify_mask))
return;
/*
* SRCU!! the groups list is very very much read only and the path is
* very hot. The VAST majority of events are not going to need to do
* anything other than walk the list so it's crazy to pre-allocate.
*/
idx = srcu_read_lock(&fsnotify_grp_srcu);
list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) {
if (test_mask & group->mask) {
if (!group->ops->should_send_event(group, to_tell, mask,
data, data_is))
continue;
if (!event) {
event = fsnotify_create_event(to_tell, mask, data,
data_is, file_name, cookie,
GFP_KERNEL);
/* shit, we OOM'd and now we can't tell, maybe
* someday someone else will want to do something
* here */
if (!event)
break;
if (test_mask & to_tell->i_fsnotify_mask) {
list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) {
if (test_mask & group->mask) {
send_to_group(mask, group, data, data_is,
file_name, cookie, &event, to_tell);
}
group->ops->handle_event(group, event);
}
}
if (needed_by_vfsmount(test_mask, data, data_is)) {
list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list) {
if (test_mask & group->mask) {
send_to_group(mask, group, data, data_is,
file_name, cookie, &event, to_tell);
}
}
}
srcu_read_unlock(&fsnotify_grp_srcu, idx);
/*
* fsnotify_create_event() took a reference so the event can't be cleaned

Просмотреть файл

@ -10,14 +10,20 @@
extern struct srcu_struct fsnotify_grp_srcu;
/* all groups which receive inode fsnotify events */
extern struct list_head fsnotify_inode_groups;
/* all groups which receive vfsmount fsnotify events */
extern struct list_head fsnotify_vfsmount_groups;
/* all bitwise OR of all event types (FS_*) for all fsnotify_inode_groups */
extern __u32 fsnotify_inode_mask;
/* all bitwise OR of all event types (FS_*) for all fsnotify_vfsmount_groups */
extern __u32 fsnotify_vfsmount_mask;
/* destroy all events sitting in this groups notification queue */
extern void fsnotify_flush_notify(struct fsnotify_group *group);
/* add a group to the inode group list */
extern void fsnotify_add_inode_group(struct fsnotify_group *group);
/* add a group to the vfsmount group list */
extern void fsnotify_add_vfsmount_group(struct fsnotify_group *group);
/* final kfree of a group */
extern void fsnotify_final_destroy_group(struct fsnotify_group *group);

Просмотреть файл

@ -32,10 +32,14 @@
static DEFINE_MUTEX(fsnotify_grp_mutex);
/* protects reads while running the fsnotify_groups list */
struct srcu_struct fsnotify_grp_srcu;
/* all groups registered to receive filesystem notifications */
/* all groups registered to receive inode filesystem notifications */
LIST_HEAD(fsnotify_inode_groups);
/* all groups registered to receive mount point filesystem notifications */
LIST_HEAD(fsnotify_vfsmount_groups);
/* bitwise OR of all events (FS_*) interesting to some group on this system */
__u32 fsnotify_inode_mask;
/* bitwise OR of all events (FS_*) interesting to some group on this system */
__u32 fsnotify_vfsmount_mask;
/*
* When a new group registers or changes it's set of interesting events
@ -44,14 +48,20 @@ __u32 fsnotify_inode_mask;
void fsnotify_recalc_global_mask(void)
{
struct fsnotify_group *group;
__u32 mask = 0;
__u32 inode_mask = 0;
__u32 vfsmount_mask = 0;
int idx;
idx = srcu_read_lock(&fsnotify_grp_srcu);
list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list)
mask |= group->mask;
inode_mask |= group->mask;
list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list)
vfsmount_mask |= group->mask;
srcu_read_unlock(&fsnotify_grp_srcu, idx);
fsnotify_inode_mask = mask;
fsnotify_inode_mask = inode_mask;
fsnotify_vfsmount_mask = vfsmount_mask;
}
/*
@ -77,6 +87,17 @@ void fsnotify_recalc_group_mask(struct fsnotify_group *group)
fsnotify_recalc_global_mask();
}
void fsnotify_add_vfsmount_group(struct fsnotify_group *group)
{
mutex_lock(&fsnotify_grp_mutex);
if (!group->on_vfsmount_group_list)
list_add_tail_rcu(&group->vfsmount_group_list, &fsnotify_vfsmount_groups);
group->on_vfsmount_group_list = 1;
mutex_unlock(&fsnotify_grp_mutex);
}
void fsnotify_add_inode_group(struct fsnotify_group *group)
{
mutex_lock(&fsnotify_grp_mutex);
@ -132,6 +153,9 @@ static void __fsnotify_evict_group(struct fsnotify_group *group)
if (group->on_inode_group_list)
list_del_rcu(&group->inode_group_list);
group->on_inode_group_list = 0;
if (group->on_vfsmount_group_list)
list_del_rcu(&group->vfsmount_group_list);
group->on_vfsmount_group_list = 0;
}
/*
@ -197,6 +221,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
group->max_events = UINT_MAX;
INIT_LIST_HEAD(&group->inode_group_list);
INIT_LIST_HEAD(&group->vfsmount_group_list);
spin_lock_init(&group->mark_lock);
INIT_LIST_HEAD(&group->mark_entries);

Просмотреть файл

@ -328,6 +328,13 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
*/
if (unlikely(list_empty(&group->inode_group_list)))
fsnotify_add_inode_group(group);
/*
* XXX This is where we could also do the fsnotify_add_vfsmount_group
* if we are setting and vfsmount mark....
if (unlikely(list_empty(&group->vfsmount_group_list)))
fsnotify_add_vfsmount_group(group);
*/
/*
* LOCKING ORDER!!!!

Просмотреть файл

@ -99,6 +99,10 @@ struct fsnotify_group {
* or fsnotify_grp_srcu depending on write vs read.
*/
struct list_head inode_group_list;
/*
* same as above except anchored by fsnotify_vfsmount_groups
*/
struct list_head vfsmount_group_list;
/*
* Defines all of the event types in which this group is interested.
@ -137,6 +141,7 @@ struct fsnotify_group {
/* prevents double list_del of group_list. protected by global fsnotify_grp_mutex */
bool on_inode_group_list;
bool on_vfsmount_group_list;
/* groups can define private fields here or use the void *private */
union {