This implements a loopback target for device mapper allowing a regular file to be treated as a block device. Signed-off-by: Bryn Reeves drivers/md/dm-loop.c | 648 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 648 insertions(+) Index: linux-2.6.19/drivers/md/dm-loop.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.19/drivers/md/dm-loop.c 2006-12-06 20:49:43.000000000 +0000 @@ -0,0 +1,648 @@ +/* + * Copyright (C) 2006 Red Hat, Inc. All rights reserved. + * + * This file is part of device-mapper. + * + * Extent mapping implementation heavily influenced by mm/swapfile.c + * + * This file is released under the GPL. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dm.h" +#include "dm-bio-list.h" +#include "dm-bio-record.h" + +#define DM_MSG_PREFIX "loop" +#define LOOP_MAX_EXTENTS 1024 + +#define DMLOOP_READONLY 0x01 +#define DMLOOP_SYNC 0x02 + +typedef enum { + DMLOOP_DEV +} dm_extent_t; + +struct extent { + sector_t start; + sector_t len; + dm_extent_t type; + u64 data; +}; + +struct extent_map { + int nr_extents; + int cur_extent; + struct extent extents[0]; +}; + +#define DMLOOP_MAP_SIZE(x) ((x)*sizeof(struct extent)+sizeof(struct extent_map)) + +/* expect a struct extent_map *map */ +#define DMLOOP_EXTENT(x) ((struct extent *)(&map->extents[(x)])) +#define DMLOOP_EXTENT_TYPE(x) ((x)->type) + +/* dm-loop context */ +struct loop_c { + int flags; + + /* information describing the backing store */ + struct file *filp; + struct block_device *bdev; + char name[BDEVNAME_SIZE + 1]; + struct extent_map *map; + unsigned blkbits; + loff_t offset; + + sector_t sectors; /* size of mapped area in sectors*/ + loff_t size; /* size of entire file in bytes */ + + char *loop_path; +}; + +#ifdef CONFIG_DM_DEBUG +static void dump_extent(struct extent *e) +{ + const char types[] = { 'f', 'd' }; + + if (!e) + return; + + if (e->type != DMLOOP_DEV) { + DMWARN("unknown extent type in map, skipping."); + return; + } + + DMDEBUG("start: %8llu len: %4llu %4c.rstart: %8llu", + e->start, e->len, types[e->type], + (sector_t)e->data ); +} + +static void dump_extent_map(struct extent_map *map) +{ + unsigned i; + + if (!map) + return; + + DMDEBUG("extent map (nr_extents = %d, cur_extent = %d)", + map->nr_extents, map->cur_extent); + + for (i = 0; i < map->nr_extents; i++) + dump_extent(DMLOOP_EXTENT(i)); +} + +#else /* CONFIG_DM_DEBUG */ +#define dump_extent_map(a) +#endif /* DMLOOP_TRACE */ + +static struct extent_map *finalize_map(struct extent_map * map) +{ + struct extent_map *_map; + + if (!map) + goto out; + + _map = kmalloc(DMLOOP_MAP_SIZE(map->nr_extents), GFP_KERNEL); + DMDEBUG("attempted to re-allocate extent map and header to %u bytes", + DMLOOP_MAP_SIZE(map->nr_extents)); + + if (!_map) { + DMERR("Could not re-allocate final extent map"); + kfree(map); + goto out; + } + + memcpy(_map, map, DMLOOP_MAP_SIZE(map->nr_extents)); + kfree(map); + return _map; +out: + return NULL; +} + +#define _ADD_EXTENT(s, l, t) \ +do{ \ + DMLOOP_EXTENT((nr_extents))->start = (s); \ + DMLOOP_EXTENT((nr_extents))->len = (l); \ + DMLOOP_EXTENT((nr_extents))->type = (t); \ + ((nr_extents++)); \ +} while(0); + +#define ADD_DEV_EXTENT(s, l, r) \ +do { \ + map->extents[nr_extents].data = (u64)r; \ + _ADD_EXTENT(s, l, DMLOOP_DEV) \ +} while(0); + +static int setup_loop_extents(struct loop_c *lc) +{ + struct extent_map *map; + struct inode *inode; + unsigned blkbits; + unsigned shiftbits; + sector_t probe_block; + sector_t last_block; + sector_t start = 0; + int nr_extents = 0; + + map = kzalloc(DMLOOP_MAP_SIZE(LOOP_MAX_EXTENTS), GFP_KERNEL); + if (!map) { + DMERR("Could not allocate initial extent map"); + return -ENOMEM; + } + + DMDEBUG("Allocated initial extent map of %u bytes, %d entries.", + DMLOOP_MAP_SIZE(LOOP_MAX_EXTENTS), LOOP_MAX_EXTENTS); + + inode = lc->filp->f_mapping->host; + /* FIXME Check if this is possible */ + if (!inode) + goto out_free; + + if (!inode->i_sb || !inode->i_sb->s_bdev) { + strcpy(lc->name, "none"); + DMERR("Non-block-device-based filesystems are not supported"); + goto out_free; + } + + lc->bdev = inode->i_sb->s_bdev; + bdevname(lc->bdev, &lc->name[0]); + DMDEBUG("setting real device to %s", lc->name); + + blkbits = inode->i_blkbits; + probe_block = lc->offset >> blkbits; + shiftbits = blkbits - SECTOR_SHIFT; + last_block = lc->size >> blkbits; + + DMDEBUG("scanning file blocks %llu-%llu", probe_block, last_block - 1); + DMDEBUG("using: blkbits=%u, probe_block=%llu, " + "sectors_per_block=%u, last_block=%llu", + blkbits, probe_block, 1 << shiftbits, last_block); + +// FIXME Can this be a separate function? + while (probe_block < last_block && nr_extents < LOOP_MAX_EXTENTS) { + sector_t first_block; + sector_t cur_block; + sector_t nr_blocks = 0; + + first_block = bmap(inode, probe_block); + DMDEBUG("new extent starting r/b/o: %llu/%llu/%llu", + first_block, probe_block, probe_block << blkbits); + + if (!first_block) + goto bad_bmap; + + DMDEBUG(" (%d) bmapped first file block %llu to %llu", + nr_extents + 1, probe_block, first_block); + + probe_block++; + + for (cur_block = first_block; probe_block < last_block; probe_block++) { + nr_blocks++; + cur_block = bmap(inode, probe_block); + if (!cur_block) + goto bad_bmap; + if (cur_block != first_block + nr_blocks) { + /* Discontiguity */ + sector_t len = nr_blocks << shiftbits; + DMDEBUG("adding device extent %d (%llu/%llu/%llu)", + nr_extents, start, len, first_block); + ADD_DEV_EXTENT(start, len, (first_block << shiftbits)); + start = (probe_block - (lc->offset >> blkbits)) << shiftbits; + goto reprobe; + } + } + DMDEBUG("adding final device extent %d (%llu/%llu/%llu)", + nr_extents, start, (nr_blocks + 1) << shiftbits, + first_block << shiftbits); + ADD_DEV_EXTENT(start, (nr_blocks + 1) << shiftbits, first_block << shiftbits); +reprobe: + continue; + } + + map->nr_extents = nr_extents; + map->cur_extent = 0; + + DMDEBUG("created initial extent map, finalizing."); + map = finalize_map(map); + DMINFO("Finalized extent map of %u bytes, %d entries.", + (map->nr_extents * sizeof(struct extent)), + map->nr_extents); + + dump_extent_map(map); + lc->blkbits = blkbits; + lc->map = map; + + return 0; + +bad_bmap: + DMERR("Loopfile has holes"); + dump_extent_map(map); +out_free: + kfree(map); + return -EINVAL; +} + +static int contains_sector(struct extent *e, sector_t s) +{ + return ((s < (e->start + (e->len))) && e->start <= s); +} + +/* + * For now this just tries to work. There is lots of scope for improving + * performance later, once the behaviour is better understood. +*/ +static struct extent *find_extent(struct extent_map *map, sector_t s) +{ + unsigned i; + + if (contains_sector(DMLOOP_EXTENT(map->cur_extent), s)) + return DMLOOP_EXTENT(map->cur_extent); + + /* FIXME */ + for(i = 0; i < map->nr_extents; i++) + if (contains_sector(DMLOOP_EXTENT(i), s)) { + map->cur_extent = i; + return DMLOOP_EXTENT(i); + } + + return NULL; +} + +/* bmap debugging support */ +#ifdef CONFIG_DM_DEBUG +#define CACHE_OLD_SECTOR sector_t old_bi_sector = bio->bi_sector +unsigned bmap_debug; +#define BMAP_DEBUG \ +do { \ + /* temporary - x check for split_io */ \ + if (bio_sectors(bio) > (e->start + e->len)) { \ + DMDEBUG("WARNING: bio doesn't fit in extent"); \ + return -EIO; \ + } \ + if (bmap_debug) \ + DMDEBUG("mapping %u logical sectors starting %llu " \ + "to dev extent at real sector %llu", \ + bio_sectors(bio), old_bi_sector, bio->bi_sector); \ +} while(0); +#else +#define CACHE_OLD_SECTOR +#define BMAP_DEBUG +#endif /* CONFIG_DM_DEBUG */ + +/* + * Perform a simple remapping of logical -> physical sector using the extent table. + * +*/ +static int do_remap_dev_bio(struct dm_target *ti, struct bio *bio, struct extent *e) +{ + struct loop_c *lc = (struct loop_c*) ti->private; + + CACHE_OLD_SECTOR; + bio->bi_bdev = lc->bdev; + bio->bi_sector = ((sector_t)e->data + + (bio->bi_sector - (e->start + ti->begin))); + BMAP_DEBUG; + + return 1; +} + +static int loop_map(struct dm_target *ti, struct bio *bio, + union map_info *context) +{ + struct loop_c *lc = ti->private; + struct extent *e; + + if (bio_barrier(bio)) + return -EOPNOTSUPP; + + e = find_extent(lc->map, bio->bi_sector - ti->begin); + if (!e) { + DMERR("Error: sector %llu in device, but no matching " + "extent found.", bio->bi_sector); + goto error; + } + + switch (DMLOOP_EXTENT_TYPE(e)) { + case DMLOOP_DEV: + return do_remap_dev_bio(ti, bio, e); + default: + DMERR("Illegal extent type %d at offset 0x%x\n", + DMLOOP_EXTENT_TYPE(e), (e - lc->map->extents)); + BUG(); + } + +error: + return -EIO; +} + +/* + * This needs some thought on handling unlinked backing files. some parts of + * the kernel return a cached name (now invalid), while others return a dcache + * "/path/to/foo (deleted)" name (never was/is valid). Which is better is + * debatable. + * + * On the one hand, using a cached name gives table output which is directly + * usable assuming the user re-creates the unlinked image file, on the other + * it is more consistent with e.g. swap to use the dcache name. +*/ +static int loop_status(struct dm_target *ti, status_type_t type, + char *result, unsigned maxlen) +{ + struct loop_c *lc = (struct loop_c *) ti->private; + + switch (type) { + case STATUSTYPE_INFO: + break; + + case STATUSTYPE_TABLE: + snprintf(result, maxlen, "%s %llu", lc->loop_path, + lc->offset); + break; + } + return 0; +} + +static int loop_invalidate_file(struct file *filp) +{ + return invalidate_inode_pages(filp->f_mapping); +} + +/* + * This should map start/end to pgoff_t and use + * invalidate_inode_pages_range. + * For now we toss out the whole lot. +static int loop_invalidate_file_range(struct file *filp, + loff_t start, loff_t end) +{ + start = start; end = end; + return loop_invalidate_file(filp); +} +*/ + +static void loop_put_file(struct file *filp) +{ + struct inode *inode; + + if (!filp) + return; + + inode = filp->f_mapping->host; + + mutex_lock(&inode->i_mutex); + inode->i_flags &= ~S_SWAPFILE; + mutex_unlock(&inode->i_mutex); + + filp_close(filp, NULL); +} + +static struct file *loop_get_file(char *loop_path, unsigned *flags) +{ + struct file *filp; + struct inode *inode; + int r; + + filp = filp_open(loop_path, + ((*flags & DMLOOP_READONLY) ? O_RDONLY : O_RDWR) | + O_DIRECT | O_LARGEFILE, 0); + if (IS_ERR(filp)) + return filp; + + inode = filp->f_mapping->host; + if (!S_ISREG(inode->i_mode)) { + DMERR("file is not a regular file: %s", loop_path); + r = -EINVAL; + goto out; + } + + if (mapping_writably_mapped(filp->f_mapping)) { + DMERR("file is mapped into userspace for writing: %s", loop_path); + r = -EBUSY; + goto out; + } + + if (mapping_mapped(filp->f_mapping)) + DMWARN("file is mapped into userspace: %s", loop_path); + + if (IS_SWAPFILE(inode)) { + DMERR("file is already in use: %s", loop_path); + goto out; + } + + /* + * We overload the S_SWAPFILE flag for loop targets because + * it provides the same no-truncate semantics we require, and holding + * onto i_sem is no longer an option. + */ + mutex_lock(&inode->i_mutex); + inode->i_flags |= S_SWAPFILE; + mutex_unlock(&inode->i_mutex); + + return filp; + +out: + fput(filp); + + return ERR_PTR(r); +} + +static int loop_setup_size(struct loop_c *lc, struct dm_target *ti, char **estr) +{ + struct inode *inode = lc->filp->f_mapping->host; + + lc->size = i_size_read(inode); + lc->blkbits = inode->i_blkbits; + + if (lc->offset & (1 << lc->blkbits - 1)) { + DMERR("Backing file offset of %lld bytes not a multiple of " + "filesystem blocksize (%d)", lc->offset, + 1 << lc->blkbits); + *estr = "Loop file offset must be a multiple of fs blocksize"; + goto error; + } + + if (!lc->size) { + *estr = "Backing file is empty"; + goto error; + } + + if (lc->size < to_bytes(1)) { + *estr = "Backing file cannot be less than one sector in size"; + goto error; + } + + lc->sectors = to_sector(inode->i_size); + if (to_bytes(lc->sectors) < lc->size) + DMWARN("Not using %llu bytes in incomplete block at EOF", + lc->size - to_bytes(lc->sectors)); + + if (lc->size - lc->offset < to_bytes(ti->len)) { + *estr = "Mapped region cannot be smaller than target size"; + goto error; + } + + return 0; + +error: + return -EINVAL; +} + +void loop_flush(struct dm_target *ti) +{ + struct loop_c *lc = ti->private; + + loop_invalidate_file(lc->filp); +} + +static void loop_dtr(struct dm_target *ti) +{ + struct loop_c *lc = ti->private; + + if (!(lc->flags & DMLOOP_READONLY)) + loop_invalidate_file(lc->filp); + + loop_put_file(lc->filp); + + DMINFO("Released file %s", lc->loop_path); + + if (lc->map) + kfree(lc->map); + + kfree(lc); +} + +/* + * Construct a loopback mapping: + */ +static int loop_ctr(struct dm_target *ti, unsigned argc, char **argv) +{ + struct loop_c *lc; + int r; + + if (argc != 2) { + ti->error = "Invalid argument count"; + DMDEBUG("Invalid argument count"); + return -EINVAL; + } + + lc = kzalloc(sizeof(*lc), GFP_KERNEL); + if (!lc) { + ti->error = "Cannot allocate loop context"; + return -ENOMEM; + } + + r = -ENOMEM; + lc->loop_path = kstrdup(argv[0], GFP_KERNEL); + if (!lc->loop_path) + goto out; + + r = -EINVAL; + if (sscanf(argv[1], "%lld", &lc->offset) != 1) { + ti->error = "Invalid file offset"; + goto out; + } + + if (!(dm_table_get_mode(ti->table) & FMODE_WRITE)) + lc->flags |= DMLOOP_READONLY; + + lc->filp = loop_get_file(lc->loop_path, &lc->flags); + if (IS_ERR(lc->filp)) { + ti->error = "Bad loop backing file"; + r = PTR_ERR(lc->filp); + goto out; + } + + r = loop_setup_size(lc, ti, &ti->error); + if (r) + goto out_putf; + + r = setup_loop_extents(lc); + if (r) { + ti->error = "Could not create extent map"; + goto out_putf; + } + + /* Split I/O at block boundaries */ + ti->split_io = 1 << (lc->blkbits - SECTOR_SHIFT); + DMDEBUG("Splitting io at %llu sector boundaries", ti->split_io); + + if (lc->bdev) + dm_set_device_limits(ti, lc->bdev); + + DMDEBUG("Constructed loop target to %s on real device %s " + "(%lldk, %llu sectors)", lc->loop_path, + lc->name, (lc->size >> 10), lc->sectors); + + ti->private = lc; + + return 0; + +out_putf: + loop_put_file(lc->filp); + +out: + kfree(lc); + return r; +} + +static struct target_type loop_target = { + .name = "loop", + .version = {0, 0, 1}, + .module = THIS_MODULE, + .ctr = loop_ctr, + .dtr = loop_dtr, + .map = loop_map, + .presuspend = loop_flush, + .flush = loop_flush, + .status = loop_status, +}; + +int __init dm_loop_init(void) +{ + int r; + + r = dm_register_target(&loop_target); + + if (r < 0) { + DMERR("Register failed %d", r); + goto out; + } + + r = -ENOMEM; + + DMINFO("Loop target registered"); + return 0; + +out: + return r; +} + +void dm_loop_exit(void) +{ + int r; + + r = dm_unregister_target(&loop_target); + + if (r < 0) + DMERR("Target unregister failed %d", r); +} + +module_init(dm_loop_init); +module_exit(dm_loop_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Bryn Reeves "); +MODULE_DESCRIPTION("device-mapper loop target"); + +#ifdef CONFIG_DM_DEBUG +module_param(bmap_debug, int, 0); +MODULE_PARM_DESC(bmap_debug, "enable bmap debugging output (VERY noisy)."); +#endif /* CONFIG_DM_DEBUG */