Hello all, The current version of the VFS locking patch adds a new semaphore to fs/super.c. This is used to make sure a filesystem does not get mounted on a logical volume while a snapshot is being taken. It also results in all mounts on the system being serialized, and isn't in line with the VFS locking scheme in general. I've been meaning to fix it forever, here's an updated version that adds a super with s->s_dev set to the source volume if nothing is currently mounted on the source volume. This allows me to use the s_umount semaphore in the super block to keep things safe, which is cleaner overall. The other benefit over the existing patch is this one has zero footprint outside the lockfs calls. You're only running new code if you take a snapshot. I've done some testing here, but wanted to let LVM people review it before going further. Patch is below against 2.4.21-rc6. This provides zero new functionality over the existing VFS locking patch, and is experimental. Do not apply this on production servers, and do not apply unless you want to help test. -chris ===== drivers/md/lvm.c 1.19 vs edited ===== --- diff/drivers/md/dm-snapshot.c 2003-10-16 10:44:23.000000000 +0100 +++ source/drivers/md/dm-snapshot.c 2003-10-16 10:44:38.000000000 +0100 @@ -525,7 +525,7 @@ } /* Flush IO to the origin device */ - fsync_dev(s->origin->dev); + fsync_dev_lockfs(s->origin->dev); /* Add snapshot to the list of snapshots for this origin */ if (register_snapshot(s)) { @@ -539,6 +539,7 @@ bad6: kcopyd_client_destroy(s->kcopyd_client); + unlockfs(s->origin->dev); bad5: s->store.destroy(&s->store); --- diff/drivers/md/lvm.c 2003-10-10 23:39:06.000000000 +0100 +++ source/drivers/md/lvm.c 2003-10-16 10:44:38.000000000 +0100 @@ -236,9 +236,6 @@ #define DEVICE_OFF(device) #define LOCAL_END_REQUEST -/* lvm_do_lv_create calls fsync_dev_lockfs()/unlockfs() */ -/* #define LVM_VFS_ENHANCEMENT */ - #include #include #include @@ -2250,12 +2247,8 @@ if (lv_ptr->lv_access & LV_SNAPSHOT) { lv_t *org = lv_ptr->lv_snapshot_org, *last; - /* sync the original logical volume */ - fsync_dev(org->lv_dev); -#ifdef LVM_VFS_ENHANCEMENT /* VFS function call to sync and lock the filesystem */ fsync_dev_lockfs(org->lv_dev); -#endif down_write(&org->lv_lock); org->lv_access |= LV_SNAPSHOT_ORG; @@ -2281,11 +2274,9 @@ else set_device_ro(lv_ptr->lv_dev, 1); -#ifdef LVM_VFS_ENHANCEMENT /* VFS function call to unlock the filesystem */ if (lv_ptr->lv_access & LV_SNAPSHOT) unlockfs(lv_ptr->lv_snapshot_org->lv_dev); -#endif lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = lvm_fs_create_lv(vg_ptr, lv_ptr); --- diff/fs/buffer.c 2003-10-16 10:44:23.000000000 +0100 +++ source/fs/buffer.c 2003-10-16 10:44:38.000000000 +0100 @@ -383,6 +383,34 @@ fsync_dev(dev); } +int fsync_dev_lockfs(kdev_t dev) +{ + /* you are not allowed to try locking all the filesystems + ** on the system, your chances of getting through without + ** total deadlock are slim to none. + */ + if (!dev) + return fsync_dev(dev) ; + + sync_buffers(dev, 0); + + lock_kernel(); + /* note, the FS might need to start transactions to + ** sync the inodes, or the quota, no locking until + ** after these are done + */ + sync_inodes(dev); + DQUOT_SYNC(dev); + /* if inodes or quotas could be dirtied during the + ** sync_supers_lockfs call, the FS is responsible for getting + ** them on disk, without deadlocking against the lock + */ + sync_supers_lockfs(dev) ; + unlock_kernel(); + + return sync_buffers(dev, 1) ; +} + asmlinkage long sys_sync(void) { fsync_dev(0); --- diff/fs/reiserfs/super.c 2003-08-26 13:50:12.000000000 +0100 +++ source/fs/reiserfs/super.c 2003-10-16 10:44:38.000000000 +0100 @@ -73,7 +73,7 @@ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); reiserfs_block_writes(&th) ; - journal_end(&th, s, 1) ; + journal_end_sync(&th, s, 1) ; } s->s_dirt = dirty; unlock_kernel() ; --- diff/fs/super.c 2003-08-26 13:50:12.000000000 +0100 +++ source/fs/super.c 2003-10-16 10:44:38.000000000 +0100 @@ -39,6 +39,12 @@ spinlock_t sb_lock = SPIN_LOCK_UNLOCKED; /* + * stub of a filesystem used to make sure an FS isn't mounted + * in the middle of a lockfs call + */ +static DECLARE_FSTYPE_DEV(lockfs_fs_type, "lockfs", NULL); + +/* * Handling of filesystem drivers list. * Rules: * Inclusion to/removals from/scanning of list are protected by spinlock. @@ -436,6 +442,25 @@ put_super(sb); } +static void write_super_lockfs(struct super_block *sb) +{ + lock_super(sb); + if (sb->s_root && sb->s_op) { + if (sb->s_dirt && sb->s_op->write_super) + sb->s_op->write_super(sb); + if (sb->s_op->write_super_lockfs) + sb->s_op->write_super_lockfs(sb); + } + unlock_super(sb); + + /* + * if no lockfs call is provided, use the sync_fs call instead. + * this must be done without the super lock held + */ + if (!sb->s_op->write_super_lockfs && sb->s_op->sync_fs) + sb->s_op->sync_fs(sb); +} + static inline void write_super(struct super_block *sb) { lock_super(sb); @@ -483,6 +508,119 @@ spin_unlock(&sb_lock); } +static struct super_block *find_super_for_lockfs(kdev_t dev) +{ + struct super_block *lockfs_sb = alloc_super(); + struct super_block * s; + + if (!dev) + return NULL; +restart: + spin_lock(&sb_lock); + s = find_super(dev); + if (s) { + spin_unlock(&sb_lock); + down_read(&s->s_umount); + if (s->s_root) { + destroy_super(lockfs_sb); + return s; + } + drop_super(s); + goto restart; + } + /* if (s) we either return or goto, so we know s == NULL here. + * At this point, there are no mounted filesystems on this device, + * so we pretend to mount one. + */ + if (!lockfs_sb) { + spin_unlock(&sb_lock); + return NULL; + } + s = lockfs_sb; + s->s_dev = dev; + if (lockfs_fs_type.fs_supers.prev == NULL) + INIT_LIST_HEAD(&lockfs_fs_type.fs_supers); + insert_super(s, &lockfs_fs_type); + s->s_root = (struct dentry *)1; + /* alloc_super gives us a write lock on s_umount, this + * way we know there are no concurrent lockfs holders for this dev. + * It allows us to remove the temp super from the list of supers + * immediately when unlockfs is called + */ + return s; +} +/* + * Note: don't check the dirty flag before waiting, we want the lock + * to happen every time this is called. dev must be non-zero + */ +void sync_supers_lockfs(kdev_t dev) +{ + struct super_block *sb; + sb = find_super_for_lockfs(dev); + if (sb) { + write_super_lockfs(sb); + /* the drop_super is done by unlockfs */ + } +} + +static void drop_super_lockfs(struct super_block *s) +{ + if (s->s_type == &lockfs_fs_type) { + struct file_system_type *fs = s->s_type; + + /* + * nobody else is allowed to grab_super() on our temp + */ + if (!deactivate_super(s)) + BUG(); + + spin_lock(&sb_lock); + s->s_root = NULL; + list_del(&s->s_list); + list_del(&s->s_instances); + spin_unlock(&sb_lock); + + up_write(&s->s_umount); + put_super(s); + put_filesystem(fs); + } else + drop_super(s); +} + +void unlockfs(kdev_t dev) +{ + struct super_block *s; + if (!dev) + return; + + spin_lock(&sb_lock); + s = find_super(dev); + if (s) { + /* + * find_super and the original lockfs call both incremented + * the reference count. drop one of them + */ + s->s_count--; + spin_unlock(&sb_lock); + if (s->s_root) { + if (s->s_op->unlockfs) + s->s_op->unlockfs(s); + drop_super_lockfs(s); + goto out; + } else { + printk("unlockfs: no s_root, dev %s\n", kdevname(dev)); + BUG(); + } + } else { + printk("unlockfs: no super found, dev %s\n", kdevname(dev)); + BUG(); + } + + spin_unlock(&sb_lock); +out: + return; +} + /** * get_super - get the superblock of a device * @dev: device to get the superblock for --- diff/include/linux/fs.h 2003-10-16 10:44:23.000000000 +0100 +++ source/include/linux/fs.h 2003-10-16 10:44:38.000000000 +0100 @@ -1273,6 +1273,7 @@ extern int sync_buffers(kdev_t, int); extern void sync_dev(kdev_t); extern int fsync_dev(kdev_t); +extern int fsync_dev_lockfs(kdev_t); extern int fsync_super(struct super_block *); extern int fsync_no_super(kdev_t); extern void sync_inodes_sb(struct super_block *); @@ -1290,6 +1291,8 @@ extern int filemap_fdatasync(struct address_space *); extern int filemap_fdatawait(struct address_space *); extern void sync_supers(kdev_t dev, int wait); +extern void sync_supers_lockfs(kdev_t); +extern void unlockfs(kdev_t); extern int bmap(struct inode *, int); extern int notify_change(struct dentry *, struct iattr *); extern int permission(struct inode *, int); --- diff/kernel/ksyms.c 2003-10-16 10:44:23.000000000 +0100 +++ source/kernel/ksyms.c 2003-10-16 10:44:38.000000000 +0100 @@ -189,6 +189,8 @@ EXPORT_SYMBOL(invalidate_inode_pages); EXPORT_SYMBOL(truncate_inode_pages); EXPORT_SYMBOL(fsync_dev); +EXPORT_SYMBOL(fsync_dev_lockfs); +EXPORT_SYMBOL(unlockfs); EXPORT_SYMBOL(fsync_no_super); EXPORT_SYMBOL(permission); EXPORT_SYMBOL(vfs_permission);