New VFS-lock patch (from 4/1/2004). - Replaces patch 8 from 2.6.5-rc1-udm1. --- diff/fs/block_dev.c 2004-04-06 15:54:23.025006488 +0100 +++ source/fs/block_dev.c 2004-04-06 15:54:25.088692760 +0100 @@ -251,6 +251,7 @@ static void init_once(void * foo, kmem_c { memset(bdev, 0, sizeof(*bdev)); sema_init(&bdev->bd_sem, 1); + sema_init(&bdev->bd_mount_sem, 1); INIT_LIST_HEAD(&bdev->bd_inodes); INIT_LIST_HEAD(&bdev->bd_list); inode_init_once(&ei->vfs_inode); --- diff/fs/buffer.c 2004-04-06 15:54:23.027006184 +0100 +++ source/fs/buffer.c 2004-04-06 15:54:25.089692608 +0100 @@ -264,6 +264,73 @@ int fsync_bdev(struct block_device *bdev } /* + * triggered by the device mapper code to lock a filesystem and force + * it into a consistent state. + * + * This takes the block device bd_mount_sem to make sure no new mounts + * happen on bdev until unlockfs is called. If a super is found on this + * block device, we hould a read lock on the s->s_umount sem to make sure + * nobody unmounts until the snapshot creation is done + */ +struct super_block *freeze_bdev(struct block_device *bdev) +{ + struct super_block *sb; + + if (!bdev) + return NULL; + down(&bdev->bd_mount_sem); + sb = get_super(bdev); + if (sb && !(sb->s_flags & MS_RDONLY)) { + sb->s_frozen = SB_FREEZE_WRITE; + wmb(); + + sync_inodes_sb(sb, 0); + DQUOT_SYNC(sb); + + sb->s_frozen = SB_FREEZE_TRANS; + wmb(); + + lock_super(sb); + if (sb->s_dirt && sb->s_op->write_super) + sb->s_op->write_super(sb); + unlock_super(sb); + + if (sb->s_op->sync_fs) + sb->s_op->sync_fs(sb, 1); + + sync_blockdev(sb->s_bdev); + sync_inodes_sb(sb, 1); + sync_blockdev(sb->s_bdev); + + if (sb->s_op->write_super_lockfs) + sb->s_op->write_super_lockfs(sb); + } + + sync_blockdev(bdev); + return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */ +} +EXPORT_SYMBOL(freeze_bdev); + +void thaw_bdev(struct block_device *bdev, struct super_block *sb) +{ + if (!bdev) + return; + if (sb) { + BUG_ON(sb->s_bdev != bdev); + + if (sb->s_op->unlockfs) + sb->s_op->unlockfs(sb); + sb->s_frozen = SB_UNFROZEN; + wmb(); + wake_up(&sb->s_wait_unfrozen); + drop_super(sb); + } + + up(&bdev->bd_mount_sem); +} +EXPORT_SYMBOL(thaw_bdev); + +/* * sync everything. Start out by waking pdflush, because that writes back * all queues in parallel. */ --- diff/fs/super.c 2004-04-06 15:54:23.028006032 +0100 +++ source/fs/super.c 2004-04-06 15:54:25.090692456 +0100 @@ -78,6 +78,7 @@ static struct super_block *alloc_super(v sema_init(&s->s_dquot.dqio_sem, 1); sema_init(&s->s_dquot.dqonoff_sem, 1); init_rwsem(&s->s_dquot.dqptr_sem); + init_waitqueue_head(&s->s_wait_unfrozen); s->s_maxbytes = MAX_NON_LFS; s->dq_op = sb_dquot_ops; s->s_qcop = sb_quotactl_ops; @@ -622,7 +623,14 @@ struct super_block *get_sb_bdev(struct f if (IS_ERR(bdev)) return (struct super_block *)bdev; + /* + * once the super is inserted into the list by sget, s_umount + * will protect the lockfs code from trying to start a snapshot + * while we are mounting + */ + down(&bdev->bd_mount_sem); s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); + up(&bdev->bd_mount_sem); if (IS_ERR(s)) goto out; --- diff/include/linux/buffer_head.h 2004-04-06 15:54:23.029005880 +0100 +++ source/include/linux/buffer_head.h 2004-04-06 15:54:25.090692456 +0100 @@ -164,6 +164,8 @@ void __wait_on_buffer(struct buffer_head wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); void wake_up_buffer(struct buffer_head *bh); int fsync_bdev(struct block_device *); +struct super_block *freeze_bdev(struct block_device *); +void thaw_bdev(struct block_device *, struct super_block *); int fsync_super(struct super_block *); int fsync_no_super(struct block_device *); struct buffer_head *__find_get_block(struct block_device *, sector_t, int); --- diff/include/linux/fs.h 2004-04-06 15:54:23.030005728 +0100 +++ source/include/linux/fs.h 2004-04-06 15:54:25.091692304 +0100 @@ -345,6 +345,7 @@ struct block_device { struct inode * bd_inode; /* will die */ int bd_openers; struct semaphore bd_sem; /* open/close mutex */ + struct semaphore bd_mount_sem; /* mount mutex */ struct list_head bd_inodes; void * bd_holder; int bd_holders; @@ -731,6 +732,9 @@ struct super_block { struct list_head s_instances; struct quota_info s_dquot; /* Diskquota specific options */ + int s_frozen; + wait_queue_head_t s_wait_unfrozen; + char s_id[32]; /* Informational name */ struct kobject kobj; /* anchor for sysfs */ @@ -744,6 +748,18 @@ struct super_block { }; /* + * Snapshotting support. + */ +enum { + SB_UNFROZEN = 0, + SB_FREEZE_WRITE = 1, + SB_FREEZE_TRANS = 2, +}; + +#define vfs_check_frozen(sb, level) \ + wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) + +/* * Superblock locking. */ static inline void lock_super(struct super_block * sb) --- diff/mm/filemap.c 2004-04-06 15:53:43.095076760 +0100 +++ source/mm/filemap.c 2004-04-06 15:54:25.092692152 +0100 @@ -1785,6 +1785,8 @@ generic_file_aio_write_nolock(struct kio unsigned long seg; char __user *buf; + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + ocount = 0; for (seg = 0; seg < nr_segs; seg++) { const struct iovec *iv = &iov[seg];