Automatically register and deregister extra major numbers to get around the 256 minor limit in 2.4. --- diff/drivers/md/dm-ioctl.c 2003-04-17 17:48:46.000000000 +0100 +++ source/drivers/md/dm-ioctl.c 2003-04-17 17:52:19.000000000 +0100 @@ -562,10 +562,9 @@ static int create(struct dm_ioctl *param, struct dm_ioctl *user) { int r; - kdev_t dev; + kdev_t dev = 0; struct dm_table *t; struct mapped_device *md; - unsigned int minor = 0; r = check_name(param->name); if (r) @@ -582,9 +581,9 @@ } if (param->flags & DM_PERSISTENT_DEV_FLAG) - minor = minor(to_kdev_t(param->dev)); + dev = to_kdev_t(param->dev); - r = dm_create(minor, t, &md); + r = dm_create(dev, t, &md); if (r) { dm_table_put(t); return r; --- diff/drivers/md/dm.c 2003-04-17 17:43:53.000000000 +0100 +++ source/drivers/md/dm.c 2003-04-17 17:50:39.000000000 +0100 @@ -12,18 +12,15 @@ #include #include #include +#include #include #include #include static const char *_name = DM_NAME; -#define MAX_DEVICES (1 << MINORBITS) #define DEFAULT_READ_AHEAD 64 -static unsigned int major = 0; -static unsigned int _major = 0; - struct dm_io { struct mapped_device *md; @@ -74,61 +71,276 @@ #define MIN_IOS 256 static kmem_cache_t *_io_cache; -/* block device arrays */ -static int _block_size[MAX_DEVICES]; -static int _blksize_size[MAX_DEVICES]; -static int _hardsect_size[MAX_DEVICES]; - static struct mapped_device *get_kdev(kdev_t dev); static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh); static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb); +/*----------------------------------------------------------------- + * In order to avoid the 256 minor number limit we are going to + * register more major numbers as neccessary. + *---------------------------------------------------------------*/ +#define MAX_MINORS (1 << MINORBITS) -static __init int local_init(void) +struct major_details { + unsigned int major; + + int transient; + struct list_head transient_list; + + unsigned int first_free_minor; + int nr_free_minors; + + struct mapped_device *mds[MAX_MINORS]; + int blk_size[MAX_MINORS]; + int blksize_size[MAX_MINORS]; + int hardsect_size[MAX_MINORS]; +}; + +static struct rw_semaphore _dev_lock; +static struct major_details *_majors[MAX_BLKDEV]; + +/* + * This holds a list of majors that non-specified device numbers + * may be allocated from. Only majors with free minors appear on + * this list. + */ +static LIST_HEAD(_transients_free); + +static int __alloc_major(unsigned int major, struct major_details **result) { int r; + unsigned int transient = !major; + struct major_details *maj; - /* allocate a slab for the dm_ios */ - _io_cache = kmem_cache_create("dm io", - sizeof(struct dm_io), 0, 0, NULL, NULL); + /* Major already allocated? */ + if (major && _majors[major]) + return 0; - if (!_io_cache) + maj = kmalloc(sizeof(*maj), GFP_KERNEL); + if (!maj) return -ENOMEM; - _major = major; - r = register_blkdev(_major, _name, &dm_blk_dops); + memset(maj, 0, sizeof(*maj)); + INIT_LIST_HEAD(&maj->transient_list); + + maj->nr_free_minors = MAX_MINORS; + + r = register_blkdev(major, _name, &dm_blk_dops); if (r < 0) { - DMERR("register_blkdev failed"); - kmem_cache_destroy(_io_cache); + DMERR("register_blkdev failed for %d", major); + kfree(maj); return r; } + if (r > 0) + major = r; - if (!_major) - _major = r; + maj->major = major; - /* set up the arrays */ - read_ahead[_major] = DEFAULT_READ_AHEAD; - blk_size[_major] = _block_size; - blksize_size[_major] = _blksize_size; - hardsect_size[_major] = _hardsect_size; + if (transient) { + maj->transient = transient; + list_add_tail(&maj->transient_list, &_transients_free); + } - blk_queue_make_request(BLK_DEFAULT_QUEUE(_major), dm_request); + _majors[major] = maj; + blk_size[major] = maj->blk_size; + blksize_size[major] = maj->blksize_size; + hardsect_size[major] = maj->hardsect_size; + read_ahead[major] = DEFAULT_READ_AHEAD; + + blk_queue_make_request(BLK_DEFAULT_QUEUE(major), dm_request); + *result = maj; return 0; } -static void local_exit(void) +static void __free_major(struct major_details *maj) { - kmem_cache_destroy(_io_cache); + unsigned int major = maj->major; - if (unregister_blkdev(_major, _name) < 0) + list_del(&maj->transient_list); + + read_ahead[major] = 0; + blk_size[major] = NULL; + blksize_size[major] = NULL; + hardsect_size[major] = NULL; + + _majors[major] = NULL; + kfree(maj); + + if (unregister_blkdev(major, _name) < 0) DMERR("devfs_unregister_blkdev failed"); +} - read_ahead[_major] = 0; - blk_size[_major] = NULL; - blksize_size[_major] = NULL; - hardsect_size[_major] = NULL; - _major = 0; +static void free_all_majors(void) +{ + unsigned int major = ARRAY_SIZE(_majors); + + down_write(&_dev_lock); + + while (major--) + if (_majors[major]) + __free_major(_majors[major]); + + up_write(&_dev_lock); +} + +static void free_dev(kdev_t dev) +{ + unsigned int major = major(dev); + unsigned int minor = minor(dev); + struct major_details *maj; + + down_write(&_dev_lock); + + maj = _majors[major]; + if (!maj) + goto out; + + maj->mds[minor] = NULL; + maj->nr_free_minors++; + + if (maj->nr_free_minors == MAX_MINORS) { + __free_major(maj); + goto out; + } + + if (!maj->transient) + goto out; + + if (maj->nr_free_minors == 1) + list_add_tail(&maj->transient_list, &_transients_free); + + if (minor < maj->first_free_minor) + maj->first_free_minor = minor; + + out: + up_write(&_dev_lock); +} + +static void __alloc_minor(struct major_details *maj, unsigned int minor, + struct mapped_device *md) +{ + maj->mds[minor] = md; + md->dev = mk_kdev(maj->major, minor); + maj->nr_free_minors--; + + if (maj->transient && !maj->nr_free_minors) + list_del_init(&maj->transient_list); +} + +/* + * See if requested kdev_t is available. + */ +static int specific_dev(kdev_t dev, struct mapped_device *md) +{ + int r = 0; + unsigned int major = major(dev); + unsigned int minor = minor(dev); + struct major_details *maj; + + if (!major || (major > MAX_BLKDEV) || (minor >= MAX_MINORS)) { + DMWARN("device number requested out of range (%d, %d)", + major, minor); + return -EINVAL; + } + + down_write(&_dev_lock); + maj = _majors[major]; + + /* Register requested major? */ + if (!maj) { + r = __alloc_major(major, &maj); + if (r) + goto out; + + major = maj->major; + } + + if (maj->mds[minor]) { + r = -EBUSY; + goto out; + } + + __alloc_minor(maj, minor, md); + + out: + up_write(&_dev_lock); + return r; +} + +/* + * Find first unused device number, requesting a new major number + * if required. + */ +static int first_free_dev(struct mapped_device *md) +{ + int r = 0; + struct major_details *maj; + + down_write(&_dev_lock); + + if (!list_empty(&_transients_free)) + maj = list_entry(_transients_free.next, struct major_details, + transient_list); + + else { + r = __alloc_major(0, &maj); + if (r) + goto out; + } + + while (maj->mds[maj->first_free_minor++]) + ; + + __alloc_minor(maj, maj->first_free_minor - 1, md);/* FIXME ? */ + + out: + up_write(&_dev_lock); + return r; +} + +static struct mapped_device *get_kdev(kdev_t dev) +{ + struct mapped_device *md; + struct major_details *maj; + + down_read(&_dev_lock); + maj = _majors[major(dev)]; + if (!maj) { + md = NULL; + goto out; + } + md = maj->mds[minor(dev)]; + if (md) + dm_get(md); + + out: + up_read(&_dev_lock); + return md; +} + +/*----------------------------------------------------------------- + * init/exit code + *---------------------------------------------------------------*/ + +static __init int local_init(void) +{ + init_rwsem(&_dev_lock); + + /* allocate a slab for the dm_ios */ + _io_cache = kmem_cache_create("dm io", + sizeof(struct dm_io), 0, 0, NULL, NULL); + + if (!_io_cache) + return -ENOMEM; + + return 0; +} + +static void local_exit(void) +{ + kmem_cache_destroy(_io_cache); + free_all_majors(); DMINFO("cleaned up"); } @@ -167,7 +379,7 @@ return 0; - bad: + bad: while (i--) _inits[i].exit(); @@ -227,18 +439,15 @@ } /* In 512-byte units */ -#define VOLUME_SIZE(minor) (_block_size[(minor)] << 1) +#define VOLUME_SIZE(dev) (blk_size[major(dev)][minor(dev)] << 1) /* FIXME: check this */ static int dm_blk_ioctl(struct inode *inode, struct file *file, - uint command, unsigned long a) + unsigned int command, unsigned long a) { - int minor = MINOR(inode->i_rdev); + kdev_t dev = inode->i_rdev; long size; - if (minor >= MAX_DEVICES) - return -ENXIO; - switch (command) { case BLKROSET: case BLKROGET: @@ -256,13 +465,13 @@ break; case BLKGETSIZE: - size = VOLUME_SIZE(minor); + size = VOLUME_SIZE(dev); if (copy_to_user((void *) a, &size, sizeof(long))) return -EFAULT; break; case BLKGETSIZE64: - size = VOLUME_SIZE(minor); + size = VOLUME_SIZE(dev); if (put_user((u64) ((u64) size) << 9, (u64 *) a)) return -EFAULT; break; @@ -445,10 +654,12 @@ static int check_dev_size(kdev_t dev, unsigned long block) { + unsigned int major = major(dev); + unsigned int minor = minor(dev); + /* FIXME: check this */ - int minor = MINOR(dev); - unsigned long max_sector = (_block_size[minor] << 1) + 1; - unsigned long sector = (block + 1) * (_blksize_size[minor] >> 9); + unsigned long max_sector = (blk_size[major][minor] << 1) + 1; + unsigned long sector = (block + 1) * (blksize_size[major][minor] >> 9); return (sector > max_sector) ? 0 : 1; } @@ -476,7 +687,7 @@ memset(&bh, 0, sizeof(bh)); bh.b_blocknr = block; bh.b_dev = bh.b_rdev = dev; - bh.b_size = _blksize_size[MINOR(dev)]; + bh.b_size = blksize_size[major(dev)][minor(dev)]; bh.b_rsector = block * (bh.b_size >> 9); /* find target */ @@ -523,79 +734,20 @@ return r; } -/*----------------------------------------------------------------- - * A bitset is used to keep track of allocated minor numbers. - *---------------------------------------------------------------*/ -static spinlock_t _minor_lock = SPIN_LOCK_UNLOCKED; -static struct mapped_device *_mds[MAX_DEVICES]; - -static void free_minor(unsigned int minor) -{ - spin_lock(&_minor_lock); - _mds[minor] = NULL; - spin_unlock(&_minor_lock); -} - /* - * See if the device with a specific minor # is free. + * Allocate and initialise a blank device with a given minor. */ -static int specific_minor(struct mapped_device *md, unsigned int minor) -{ - int r = -EBUSY; - - if (minor >= MAX_DEVICES) { - DMWARN("request for a mapped_device beyond MAX_DEVICES (%d)", - MAX_DEVICES); - return -EINVAL; - } - - spin_lock(&_minor_lock); - if (!_mds[minor]) { - _mds[minor] = md; - r = 0; - } - spin_unlock(&_minor_lock); - - return r; -} - -static int next_free_minor(struct mapped_device *md, unsigned int *minor) -{ - int i; - - spin_lock(&_minor_lock); - for (i = 0; i < MAX_DEVICES; i++) { - if (!_mds[i]) { - _mds[i] = md; - *minor = i; - break; - } - } - spin_unlock(&_minor_lock); - - return (i < MAX_DEVICES) ? 0 : -EBUSY; -} - -static struct mapped_device *get_kdev(kdev_t dev) +static void free_md(struct mapped_device *md) { - struct mapped_device *md; - - if (major(dev) != _major) - return NULL; - - spin_lock(&_minor_lock); - md = _mds[minor(dev)]; - if (md) - dm_get(md); - spin_unlock(&_minor_lock); - - return md; + free_dev(md->dev); + mempool_destroy(md->io_pool); + kfree(md); } /* * Allocate and initialise a blank device with a given minor. */ -static struct mapped_device *alloc_dev(unsigned int minor) +static struct mapped_device *alloc_md(kdev_t dev) { int r; struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL); @@ -605,13 +757,13 @@ return NULL; } - /* get a minor number for the dev */ - if (minor) - r = specific_minor(md, minor); - else - r = next_free_minor(md, &minor); + /* Allocate suitable device number */ + if (!dev) + r = first_free_dev(md); + else + r = specific_dev(dev, md); - if (r < 0) { + if (r) { kfree(md); return NULL; } @@ -621,12 +773,11 @@ md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab, mempool_free_slab, _io_cache); if (!md->io_pool) { - free_minor(minor); + free_md(md); kfree(md); return NULL; } - md->dev = mk_kdev(_major, minor); init_rwsem(&md->lock); atomic_set(&md->holders, 1); atomic_set(&md->pending, 0); @@ -635,13 +786,6 @@ return md; } -static void free_dev(struct mapped_device *md) -{ - free_minor(minor(md->dev)); - mempool_destroy(md->io_pool); - kfree(md); -} - /* * The hardsect size for a mapped device is the largest hardsect size * from the devices it maps onto. @@ -666,14 +810,16 @@ */ static int __bind(struct mapped_device *md, struct dm_table *t) { - int minor = minor(md->dev); + unsigned int minor = minor(md->dev); + unsigned int major = major(md->dev); md->map = t; /* in k */ - _block_size[minor] = dm_table_get_size(t) >> 1; - _blksize_size[minor] = BLOCK_SIZE; - _hardsect_size[minor] = __find_hardsect_size(dm_table_get_devices(t)); - register_disk(NULL, md->dev, 1, &dm_blk_dops, _block_size[minor]); + blk_size[major][minor] = dm_table_get_size(t) >> 1; + blksize_size[major][minor] = BLOCK_SIZE; + hardsect_size[major][minor] = + __find_hardsect_size(dm_table_get_devices(t)); + register_disk(NULL, md->dev, 1, &dm_blk_dops, blk_size[major][minor]); dm_table_get(t); return 0; @@ -681,32 +827,32 @@ static void __unbind(struct mapped_device *md) { - int minor = minor(md->dev); + unsigned int minor = minor(md->dev); + unsigned int major = major(md->dev); dm_table_put(md->map); md->map = NULL; - _block_size[minor] = 0; - _blksize_size[minor] = 0; - _hardsect_size[minor] = 0; + blk_size[major][minor] = 0; + blksize_size[major][minor] = 0; + hardsect_size[major][minor] = 0; } /* * Constructor for a new device. */ -int dm_create(unsigned int minor, struct dm_table *table, - struct mapped_device **result) +int dm_create(kdev_t dev, struct dm_table *table, struct mapped_device **result) { int r; struct mapped_device *md; - md = alloc_dev(minor); + md = alloc_md(dev); if (!md) return -ENXIO; r = __bind(md, table); if (r) { - free_dev(md); + free_md(md); return r; } @@ -723,7 +869,7 @@ { if (atomic_dec_and_test(&md->holders)) { __unbind(md); - free_dev(md); + free_md(md); } } @@ -880,8 +1026,6 @@ module_init(dm_init); module_exit(dm_exit); -MODULE_PARM(major, "i"); -MODULE_PARM_DESC(major, "The major number of the device mapper"); MODULE_DESCRIPTION(DM_NAME " driver"); MODULE_AUTHOR("Joe Thornber "); MODULE_LICENSE("GPL"); --- diff/drivers/md/dm.h 2003-04-17 17:47:29.000000000 +0100 +++ source/drivers/md/dm.h 2003-04-17 17:50:39.000000000 +0100 @@ -53,8 +53,7 @@ * Functions for manipulating a struct mapped_device. * Drop the reference with dm_put when you finish with the object. *---------------------------------------------------------------*/ -int dm_create(unsigned int minor, struct dm_table *table, - struct mapped_device **md); +int dm_create(kdev_t dev, struct dm_table *table, struct mapped_device **md); /* * Reference counting for md.