From d8153d4d8b7b6141770e1416c4a338161205ed1b Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:45 +0200 Subject: inotify, fanotify: replace fsnotify_put_group() with fsnotify_destroy_group() Currently in fsnotify_put_group() the ref count of a group is decremented and if it becomes 0 fsnotify_destroy_group() is called. Since a groups ref count is only at group creation set to 1 and never increased after that a call to fsnotify_put_group() always results in a call to fsnotify_destroy_group(). With this patch fsnotify_destroy_group() is called directly. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 63d966d5c2e..d2ad345bdee 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -364,7 +364,8 @@ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops); /* drop reference on a group from fsnotify_alloc_group */ extern void fsnotify_put_group(struct fsnotify_group *group); - +/* destroy group */ +extern void fsnotify_destroy_group(struct fsnotify_group *group); /* take a reference to an event */ extern void fsnotify_get_event(struct fsnotify_event *event); extern void fsnotify_put_event(struct fsnotify_event *event); -- cgit v1.2.3 From 986129520479d689962a42c31acdeaf854ac91f5 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:46 +0200 Subject: fsnotify: introduce fsnotify_get_group() Introduce fsnotify_get_group() which increments the reference counter of a group. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d2ad345bdee..e76cef75295 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -360,8 +360,10 @@ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode /* called from fsnotify listeners, such as fanotify or dnotify */ -/* get a reference to an existing or create a new group */ +/* create a new group */ extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops); +/* get reference to a group */ +extern void fsnotify_get_group(struct fsnotify_group *group); /* drop reference on a group from fsnotify_alloc_group */ extern void fsnotify_put_group(struct fsnotify_group *group); /* destroy group */ -- cgit v1.2.3 From 986ab09807ca9454c3f54aae4db7e1bb00daeed3 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:50 +0200 Subject: fsnotify: use a mutex instead of a spinlock to protect a groups mark list Replaces the groups mark_lock spinlock with a mutex. Using a mutex instead of a spinlock results in more flexibility (i.e it allows to sleep while the lock is held). Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index e76cef75295..c5848346840 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -141,7 +141,7 @@ struct fsnotify_group { unsigned int priority; /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ - spinlock_t mark_lock; /* protect marks_list */ + struct mutex mark_mutex; /* protect marks_list */ atomic_t num_marks; /* 1 for each mark and 1 for not being * past the point of no return when freeing * a group */ -- cgit v1.2.3 From e2a29943e9a2ee2aa737a77f550f46ba72269db4 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:51 +0200 Subject: fsnotify: pass group to fsnotify_destroy_mark() In fsnotify_destroy_mark() dont get the group from the passed mark anymore, but pass the group itself as an additional parameter to the function. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c5848346840..140b4b8a100 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -408,8 +408,9 @@ extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask /* attach the mark to both the group and the inode */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, struct vfsmount *mnt, int allow_dups); -/* given a mark, flag it to be freed when all references are dropped */ -extern void fsnotify_destroy_mark(struct fsnotify_mark *mark); +/* given a group and a mark, flag mark to be freed when all references are dropped */ +extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, + struct fsnotify_group *group); /* run all the marks in a group, and clear all of the vfsmount marks */ extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group); /* run all the marks in a group, and clear all of the inode marks */ -- cgit v1.2.3 From d5a335b845792d2a69ed1e244c0b233117b7db3c Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:52 +0200 Subject: fsnotify: introduce locked versions of fsnotify_add_mark() and fsnotify_remove_mark() This patch introduces fsnotify_add_mark_locked() and fsnotify_remove_mark_locked() which are essentially the same as fsnotify_add_mark() and fsnotify_remove_mark() but assume that the caller has already taken the groups mark mutex. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 140b4b8a100..26c06afa264 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -408,9 +408,13 @@ extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask /* attach the mark to both the group and the inode */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, struct vfsmount *mnt, int allow_dups); +extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct fsnotify_group *group, + struct inode *inode, struct vfsmount *mnt, int allow_dups); /* given a group and a mark, flag mark to be freed when all references are dropped */ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group); +extern void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, + struct fsnotify_group *group); /* run all the marks in a group, and clear all of the vfsmount marks */ extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group); /* run all the marks in a group, and clear all of the inode marks */ -- cgit v1.2.3 From 64c20d2a20fce295c260ea6cb3b468edfa2fb07b Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 14 Jun 2011 17:29:53 +0200 Subject: fsnotify: dont put marks on temporary list when clearing marks by group In clear_marks_by_group_flags() the mark list of a group is iterated and the marks are put on a temporary list. Since we introduced fsnotify_destroy_mark_locked() we dont need the temp list any more and are able to remove the marks while the mark list is iterated and the mark list mutex is held. Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 26c06afa264..5a889935045 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -287,7 +287,6 @@ struct fsnotify_mark { struct fsnotify_inode_mark i; struct fsnotify_vfsmount_mark m; }; - struct list_head free_g_list; /* tmp list used when freeing this mark */ __u32 ignored_mask; /* events types to ignore */ #define FSNOTIFY_MARK_FLAG_INODE 0x01 #define FSNOTIFY_MARK_FLAG_VFSMOUNT 0x02 -- cgit v1.2.3 From 6960b0d909cde5bdff49e4e5c1250edd10be7ebd Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 12 Aug 2011 01:13:31 +0200 Subject: fsnotify: change locking order On Mon, Aug 01, 2011 at 04:38:22PM -0400, Eric Paris wrote: > > I finally built and tested a v3.0 kernel with these patches (I know I'm > SOOOOOO far behind). Not what I hoped for: > > > [ 150.937798] VFS: Busy inodes after unmount of tmpfs. Self-destruct in 5 seconds. Have a nice day... > > [ 150.945290] BUG: unable to handle kernel NULL pointer dereference at 0000000000000070 > > [ 150.946012] IP: [] shmem_free_inode+0x18/0x50 > > [ 150.946012] PGD 2bf9e067 PUD 2bf9f067 PMD 0 > > [ 150.946012] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC > > [ 150.946012] CPU 0 > > [ 150.946012] Modules linked in: nfs lockd fscache auth_rpcgss nfs_acl sunrpc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables ext4 jbd2 crc16 joydev ata_piix i2c_piix4 pcspkr uinput ipv6 autofs4 usbhid [last unloaded: scsi_wait_scan] > > [ 150.946012] > > [ 150.946012] Pid: 2764, comm: syscall_thrash Not tainted 3.0.0+ #1 Red Hat KVM > > [ 150.946012] RIP: 0010:[] [] shmem_free_inode+0x18/0x50 > > [ 150.946012] RSP: 0018:ffff88002c2e5df8 EFLAGS: 00010282 > > [ 150.946012] RAX: 000000004e370d9f RBX: 0000000000000000 RCX: ffff88003a029438 > > [ 150.946012] RDX: 0000000033630a5f RSI: 0000000000000000 RDI: ffff88003491c240 > > [ 150.946012] RBP: ffff88002c2e5e08 R08: 0000000000000000 R09: 0000000000000000 > > [ 150.946012] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88003a029428 > > [ 150.946012] R13: ffff88003a029428 R14: ffff88003a029428 R15: ffff88003499a610 > > [ 150.946012] FS: 00007f5a05420700(0000) GS:ffff88003f600000(0000) knlGS:0000000000000000 > > [ 150.946012] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > > [ 150.946012] CR2: 0000000000000070 CR3: 000000002a662000 CR4: 00000000000006f0 > > [ 150.946012] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > > [ 150.946012] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > > [ 150.946012] Process syscall_thrash (pid: 2764, threadinfo ffff88002c2e4000, task ffff88002bfbc760) > > [ 150.946012] Stack: > > [ 150.946012] ffff88003a029438 ffff88003a029428 ffff88002c2e5e38 ffffffff81102f76 > > [ 150.946012] ffff88003a029438 ffff88003a029598 ffffffff8160f9c0 ffff88002c221250 > > [ 150.946012] ffff88002c2e5e68 ffffffff8115e9be ffff88002c2e5e68 ffff88003a029438 > > [ 150.946012] Call Trace: > > [ 150.946012] [] shmem_evict_inode+0x76/0x130 > > [ 150.946012] [] evict+0x7e/0x170 > > [ 150.946012] [] iput_final+0xd0/0x190 > > [ 150.946012] [] iput+0x33/0x40 > > [ 150.946012] [] fsnotify_destroy_mark_locked+0x145/0x160 > > [ 150.946012] [] fsnotify_destroy_mark+0x36/0x50 > > [ 150.946012] [] sys_inotify_rm_watch+0x77/0xd0 > > [ 150.946012] [] system_call_fastpath+0x16/0x1b > > [ 150.946012] Code: 67 4a 00 b8 e4 ff ff ff eb aa 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 48 83 ec 10 48 89 1c 24 4c 89 64 24 08 48 8b 9f 40 05 00 00 > > [ 150.946012] 83 7b 70 00 74 1c 4c 8d a3 80 00 00 00 4c 89 e7 e8 d2 5d 4a > > [ 150.946012] RIP [] shmem_free_inode+0x18/0x50 > > [ 150.946012] RSP > > [ 150.946012] CR2: 0000000000000070 > > Looks at aweful lot like the problem from: > http://www.spinics.net/lists/linux-fsdevel/msg46101.html > I tried to reproduce this bug with your test program, but without success. However, if I understand correctly, this occurs since we dont hold any locks when we call iput() in mark_destroy(), right? With the patches you tested, iput() is also not called within any lock, since the groups mark_mutex is released temporarily before iput() is called. This is, since the original codes behaviour is similar. However since we now have a mutex as the biggest lock, we can do what you suggested (http://www.spinics.net/lists/linux-fsdevel/msg46107.html) and call iput() with the mutex held to avoid the race. The patch below implements this. It uses nested locking to avoid deadlock in case we do the final iput() on an inode which still holds marks and thus would take the mutex again when calling fsnotify_inode_delete() in destroy_inode(). Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 5a889935045..1af2f6a722c 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -88,9 +88,10 @@ struct fsnotify_event_private_data; * if the group is interested in this event. * handle_event - main call for a group to handle an fs event * free_group_priv - called when a group refcnt hits 0 to clean up the private union - * freeing-mark - this means that a mark has been flagged to die when everything - * finishes using it. The function is supplied with what must be a - * valid group and inode to use to clean up. + * freeing_mark - called when a mark is being destroyed for some reason. The group + * MUST be holding a reference on each mark and that reference must be + * dropped in this function. inotify uses this function to send + * userspace messages that marks have been removed. */ struct fsnotify_ops { bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode, -- cgit v1.2.3 From 0a6b6bd5919a65030b557ec8fe81f6fb3e93744a Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 14 Oct 2011 17:43:39 -0400 Subject: fsnotify: make fasync generic for both inotify and fanotify inotify is supposed to support async signal notification when information is available on the inotify fd. This patch moves that support to generic fsnotify functions so it can be used by all notification mechanisms. Signed-off-by: Eric Paris --- include/linux/fsnotify_backend.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 1af2f6a722c..d5b0910d496 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -148,6 +148,8 @@ struct fsnotify_group { * a group */ struct list_head marks_list; /* all inode marks for this group */ + struct fasync_struct *fsn_fa; /* async notification */ + /* groups can define private fields here or use the void *private */ union { void *private; @@ -156,7 +158,6 @@ struct fsnotify_group { spinlock_t idr_lock; struct idr idr; u32 last_wd; - struct fasync_struct *fa; /* async notification */ struct user_struct *user; } inotify_data; #endif @@ -368,6 +369,8 @@ extern void fsnotify_get_group(struct fsnotify_group *group); extern void fsnotify_put_group(struct fsnotify_group *group); /* destroy group */ extern void fsnotify_destroy_group(struct fsnotify_group *group); +/* fasync handler function */ +extern int fsnotify_fasync(int fd, struct file *file, int on); /* take a reference to an event */ extern void fsnotify_get_event(struct fsnotify_event *event); extern void fsnotify_put_event(struct fsnotify_event *event); -- cgit v1.2.3