dm-multipath: ps-init-fn.patch This adds the ps-init function. From: Mike Christie --- diff/drivers/md/dm-mpath.c 2004-09-21 16:37:20.000000000 +0100 +++ source/drivers/md/dm-mpath.c 2004-09-21 16:37:27.000000000 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include #include /* Path properties */ @@ -27,27 +28,36 @@ struct priority_group *pg; }; +inline struct block_device *dm_path_to_bdev(struct path *path) +{ + return path->dev->bdev; +} + struct priority_group { struct list_head list; struct multipath *m; - struct path_selector *ps; + struct path_selector ps; unsigned nr_paths; struct list_head paths; }; +#define ps_to_pg(__ps) container_of((__ps), struct priority_group, ps) + /* Multipath context */ struct multipath { struct list_head list; struct dm_target *ti; + spinlock_t lock; + unsigned nr_priority_groups; struct list_head priority_groups; + int initializing_pg; + struct completion init_pg_wait; struct priority_group *current_pg; - spinlock_t lock; - struct work_struct dispatch_failed; struct bio_list failed_ios; @@ -94,13 +104,7 @@ if (!pg) return NULL; - pg->ps = kmalloc(sizeof(*pg->ps), GFP_KERNEL); - if (!pg->ps) { - kfree(pg); - return NULL; - } - memset(pg->ps, 0, sizeof(*pg->ps)); - + memset(pg, 0, sizeof(*pg)); INIT_LIST_HEAD(&pg->paths); return pg; @@ -120,14 +124,11 @@ static void free_priority_group(struct priority_group *pg, struct dm_target *ti) { - struct path_selector *ps = pg->ps; + struct path_selector *ps = &pg->ps; - if (ps) { - if (ps->type) { - ps->type->dtr(ps); - dm_put_path_selector(ps->type); - } - kfree(ps); + if (ps->type) { + ps->type->dtr(ps); + dm_put_path_selector(ps->type); } free_paths(&pg->paths, ti); @@ -142,6 +143,8 @@ if (m) { memset(m, 0, sizeof(*m)); INIT_LIST_HEAD(&m->priority_groups); + init_completion(&m->init_pg_wait); + m->initializing_pg = 0; m->lock = SPIN_LOCK_UNLOCKED; INIT_WORK(&m->dispatch_failed, dispatch_failed_ios, m); INIT_WORK(&m->trigger_event, trigger_event, m); @@ -169,45 +172,115 @@ kfree(m); } -/*----------------------------------------------------------------- - * The multipath daemon is responsible for resubmitting failed ios. - *---------------------------------------------------------------*/ -static struct path *select_path(struct multipath *m, struct bio *bio, - union map_info *info) +static void __ps_init_complete(struct multipath *m, + struct priority_group *pg) +{ + m->initializing_pg = 0; + m->current_pg = pg; + complete_all(&m->init_pg_wait); + schedule_work(&m->dispatch_failed); +} + +void dm_ps_init_complete(struct path_selector *ps) { - struct priority_group *pg; - struct path *path = NULL; unsigned long flags; + struct priority_group *pg = ps_to_pg(ps); + struct multipath *m = pg->m; spin_lock_irqsave(&m->lock, flags); + __ps_init_complete(m, pg); + spin_unlock_irqrestore(&m->lock, flags); +} - pg = m->current_pg; - if (pg && (path = pg->ps->type->select_path(pg->ps, bio, info))) - goto done; +EXPORT_SYMBOL(dm_ps_init_complete); + +static int select_group(struct multipath *m, struct mpath_io *mpio, + struct bio *bio) +{ + struct priority_group *pg = NULL; + int err; + + m->current_pg = NULL; + m->initializing_pg = 1; + init_completion(&m->init_pg_wait); - /* - * loop through the priority groups until we - * find a valid path. - */ list_for_each_entry (pg, &m->priority_groups, list) { - path = pg->ps->type->select_path(pg->ps, bio, info); - if (path) { - m->current_pg = pg; - break; + + if (pg->ps.type->init) { + spin_unlock_irq(&m->lock); + err = pg->ps.type->init(&pg->ps); + spin_lock_irq(&m->lock); + + if (err == DM_PS_INITIALIZING) + return DM_PS_INITIALIZING; + else if (err == DM_PS_FAILED) + continue; } + + mpio->path = pg->ps.type->select_path(&pg->ps, bio, + &mpio->info); + if (mpio->path) + break; } - done: - spin_unlock_irqrestore(&m->lock, flags); + __ps_init_complete(m, mpio->path ? pg : NULL); + return mpio->path ? DM_PS_SUCCESS : DM_PS_FAILED; +} + +static int select_path1(struct multipath *m, struct mpath_io *mpio, + struct bio *bio, int wait) +{ + mpio->path = NULL; - return path; + retest: + /* + * completion event, current_pg, initializing_pg and + * in the case of wait=0 adding to the failed_ios list for + * resubmission are protected under the m->lock to avoid races. + */ + if (unlikely(m->initializing_pg)) { + if (!wait) + return -EWOULDBLOCK; + + spin_unlock_irq(&m->lock); + wait_for_completion(&m->init_pg_wait); + spin_lock_irq(&m->lock); + goto retest; + } + + if (m->current_pg) { + struct path_selector *ps = &m->current_pg->ps; + + mpio->path = ps->type->select_path(ps, bio, &mpio->info); + if (!mpio->path && + (select_group(m, mpio, bio) == DM_PS_INITIALIZING)) + /* + * while the lock was dropped the + * initialization might have completed. + */ + goto retest; + } + + return mpio->path ? 0 : -EIO; } -static int map_io(struct multipath *m, struct bio *bio, struct mpath_io *mpio) +static int map_io(struct multipath *m, struct mpath_io *mpio, + struct bio *bio, int wait) { - mpio->path = select_path(m, bio, &mpio->info); - if (unlikely(!mpio->path)) - return -EIO; + int err; + + spin_lock_irq(&m->lock); + err = select_path1(m, mpio, bio, wait); + if (err == -EWOULDBLOCK) + /* + * when the ps init is completed it will + * remap and submit this bio + */ + bio_list_add(&m->failed_ios, bio); + spin_unlock_irq(&m->lock); + + if (err) + return err; bio->bi_bdev = mpio->path->dev->bdev; return 0; @@ -225,9 +298,29 @@ spin_unlock_irqrestore(&m->lock, flags); while (bio) { + int err; + struct mpath_io *mpio; + union map_info *info; + next = bio->bi_next; bio->bi_next = NULL; - generic_make_request(bio); + + info = dm_get_mapinfo(bio); + mpio = info->ptr; + + /* + * For -EWOULDBLOCK the bio could not be mapped + * due to a ps initialization. The bio has been + * requeued, and the work will be processed when + * the initialization is completed. + */ + err = map_io(m, mpio, bio, 0); + if (!err) + generic_make_request(bio); + else if (err != -EWOULDBLOCK) + /* no paths left */ + bio_endio(bio, bio->bi_size, -EIO); + bio = next; } } @@ -359,13 +452,12 @@ goto bad; } - r = pst->ctr(pg->ps); + r = pst->ctr(&pg->ps); if (r) { - /* FIXME: need to put the pst ? fix after - * factoring out the register */ + dm_put_path_selector(pst); goto bad; } - pg->ps->type = pst; + pg->ps.type = pst; /* * read the paths @@ -389,7 +481,7 @@ path_args.argc = nr_params; path_args.argv = as->argv; - path = parse_path(&path_args, pg->ps, ti); + path = parse_path(&path_args, &pg->ps, ti); if (!path) goto bad; @@ -461,42 +553,45 @@ union map_info *info) { int r; - struct mpath_io *io; + struct mpath_io *mpio; struct multipath *m = (struct multipath *) ti->private; - io = mempool_alloc(m->mpio_pool, GFP_NOIO); - dm_bio_record(&io->details, bio); + mpio = mempool_alloc(m->mpio_pool, GFP_NOIO); + dm_bio_record(&mpio->details, bio); bio->bi_rw |= (1 << BIO_RW_FAILFAST); - r = map_io(m, bio, io); + r = map_io(m, mpio, bio, 1); if (unlikely(r)) { - mempool_free(io, m->mpio_pool); + mempool_free(mpio, m->mpio_pool); return r; } - info->ptr = io; + info->ptr = mpio; return 1; } static int do_end_io(struct multipath *m, struct bio *bio, - int error, struct mpath_io *io) + int error, struct mpath_io *mpio) { - struct path_selector *ps = io->path->pg->ps; + struct path_selector *ps = &mpio->path->pg->ps; - error = ps->type->end_io(ps, bio, error, &io->info); + ps->type->end_io(ps, bio, error, &mpio->info); if (error) { - /* remap */ - dm_bio_restore(&io->details, bio); - if (map_io(m, bio, io)) - /* no paths left */ - return -EIO; - /* queue for the daemon to resubmit */ + dm_bio_restore(&mpio->details, bio); + + /* queue for the daemon to resubmit or fail */ spin_lock(&m->lock); bio_list_add(&m->failed_ios, bio); + /* + * If a ps is initializing we do not queue the work + * becuase when the ps initialization has completed + * it will queue the dispatch function to be run. + */ + if (!m->initializing_pg) + schedule_work(&m->dispatch_failed); spin_unlock(&m->lock); - schedule_work(&m->dispatch_failed); return 1; /* io not complete */ } @@ -510,7 +605,18 @@ struct mpath_io *io = (struct mpath_io *) info->ptr; int r; - r = do_end_io(m, bio, error, io); + /* + * If we report to dm that we are going to retry the + * bio, but that fails due to a pst->init failure + * calling bio_endio from dm-mpath.c will end up + * calling dm-mpath's endio fn, so this test catches + * that case. + */ + if (io->path) + r = do_end_io(m, bio, error, io); + else + r = -EIO; + if (r <= 0) mempool_free(io, m->mpio_pool); @@ -538,12 +644,12 @@ DMEMIT("%u ", m->nr_priority_groups); list_for_each_entry(pg, &m->priority_groups, list) { - DMEMIT("%u %u ", pg->nr_paths, pg->ps->type->info_args); + DMEMIT("%u %u ", pg->nr_paths, pg->ps.type->info_args); list_for_each_entry(p, &pg->paths, list) { format_dev_t(buffer, p->dev->bdev->bd_dev); DMEMIT("%s ", buffer); - sz += pg->ps->type->status(pg->ps, p, type, + sz += pg->ps.type->status(&pg->ps, p, type, result + sz, maxlen - sz); } } @@ -553,13 +659,13 @@ DMEMIT("%u ", m->nr_priority_groups); list_for_each_entry(pg, &m->priority_groups, list) { - DMEMIT("%s %u %u ", pg->ps->type->name, - pg->nr_paths, pg->ps->type->table_args); + DMEMIT("%s %u %u ", pg->ps.type->name, + pg->nr_paths, pg->ps.type->table_args); list_for_each_entry(p, &pg->paths, list) { format_dev_t(buffer, p->dev->bdev->bd_dev); DMEMIT("%s ", buffer); - sz += pg->ps->type->status(pg->ps, p, type, + sz += pg->ps.type->status(&pg->ps, p, type, result + sz, maxlen - sz); } --- diff/drivers/md/dm-path-selector.c 2004-09-21 16:37:20.000000000 +0100 +++ source/drivers/md/dm-path-selector.c 2004-09-21 16:37:27.000000000 +0100 @@ -254,15 +254,15 @@ return 0; } -static int rr_end_io(struct path_selector *ps, struct bio *bio, int error, - union map_info *info) +static void rr_end_io(struct path_selector *ps, struct bio *bio, int error, + union map_info *info) { unsigned long flags; struct selector *s = (struct selector *) ps->context; struct path_info *pi = (struct path_info *)info->ptr; if (likely(!error)) - return 0; + return; spin_lock_irqsave(&s->lock, flags); @@ -274,8 +274,6 @@ } spin_unlock_irqrestore(&s->lock, flags); - - return -EIO; } /* Path selector */ --- diff/drivers/md/dm-path-selector.h 2004-09-21 16:37:20.000000000 +0100 +++ source/drivers/md/dm-path-selector.h 2004-09-21 16:37:27.000000000 +0100 @@ -16,6 +16,8 @@ struct path; +struct block_device *dm_path_to_bdev(struct path *path); + /* * We provide an abstraction for the code that chooses which path * to send some io down. @@ -33,6 +35,22 @@ typedef void (*ps_dtr_fn) (struct path_selector *ps); /* + * Allows the ps to initialize itself. It should return one + * of the following return values. iif DM_PS_INITIALIZING is + * returned the path-selector must call dm_ps_init_complete + * when the initializtion has completed. + */ +enum { + DM_PS_SUCCESS, + DM_PS_FAILED, + DM_PS_INITIALIZING, +}; + +void dm_ps_init_complete(struct path_selector *ps); + +typedef int (*ps_init_fn) (struct path_selector *ps); + +/* * Add an opaque path object, along with some selector specific * path args (eg, path priority). */ @@ -52,8 +70,8 @@ typedef struct path *(*ps_select_path_fn) (struct path_selector *ps, struct bio *bio, union map_info *info); -typedef int (*ps_end_io) (struct path_selector *ps, struct bio *bio, - int error, union map_info *info); +typedef void (*ps_end_io) (struct path_selector *ps, struct bio *bio, + int error, union map_info *info); /* * Table content based on parameters added in ps_add_path_fn @@ -73,6 +91,7 @@ unsigned int info_args; ps_ctr_fn ctr; ps_dtr_fn dtr; + ps_init_fn init; ps_add_path_fn add_path; ps_select_path_fn select_path; --- diff/drivers/md/dm.c 2004-09-21 16:34:04.000000000 +0100 +++ source/drivers/md/dm.c 2004-09-21 16:37:27.000000000 +0100 @@ -1127,6 +1127,15 @@ return test_bit(DMF_SUSPENDED, &md->flags); } +inline union map_info *dm_get_mapinfo(struct bio *bio) +{ + if (bio && bio->bi_private) + return &((struct target_io *)bio->bi_private)->info; + return NULL; +} + +EXPORT_SYMBOL(dm_get_mapinfo); + static struct block_device_operations dm_blk_dops = { .open = dm_blk_open, .release = dm_blk_close, --- diff/include/linux/device-mapper.h 2004-05-19 22:12:59.000000000 +0100 +++ source/include/linux/device-mapper.h 2004-09-21 16:37:27.000000000 +0100 @@ -18,6 +18,8 @@ unsigned long long ll; }; +inline union map_info *dm_get_mapinfo(struct bio *bio); + /* * In the constructor the target parameter will already have the * table, type, begin and len fields filled in.