Multipath: makes priority group / path-selector initialization possible. Remapping failed bios from end_io was not nice becuase you couldn't choose a path until you knew a group was activated and what paths in that group were still functional. Plus, incoming IO needed to be mapped while failures were coming in. I guess you could just make map_io sleep or do some internal queueing. I choose the former, so I had to move the remap map_io call to dispatch_failed_ios and add a get_mapinfo accessor function. I think queueing incoming io while you finish the failed may be better, but targets calling dm's queueing mechanism didn't seem right, and reimplementing it didn't either. Well, I do not know if it is better or not. While paths are failing performance is going to take a hit so io ordering may only be a problem if you are concerned with journaling or barrier type stuff (but as we end up sending IO down multiple queues we cannot gaurantee ordering on the normal IO code path anyways). [Mike Christie] --- diff/drivers/md/dm-mpath.c 2004-04-06 15:54:32.713533608 +0100 +++ source/drivers/md/dm-mpath.c 2004-04-06 15:54:55.019142640 +0100 @@ -17,6 +17,7 @@ #include #include #include +#include #include /* @@ -61,6 +62,10 @@ struct multipath { spinlock_t lock; + int initializing_pg; + struct completion init_pg_wait; + struct priority_group *current_pg; + struct path *current_path; unsigned current_count; @@ -158,6 +163,8 @@ static struct multipath *alloc_multipath if (m) { memset(m, 0, sizeof(*m)); INIT_LIST_HEAD(&m->priority_groups); + init_completion(&m->init_pg_wait); + m->initializing_pg = 0; m->lock = SPIN_LOCK_UNLOCKED; INIT_WORK(&m->dispatch_failed, dispatch_failed_ios, m); INIT_WORK(&m->trigger_event, trigger_event, m); @@ -185,41 +192,75 @@ static void free_multipath(struct multip kfree(m); } -/*----------------------------------------------------------------- - * The multipath daemon is responsible for resubmitting failed ios. - *---------------------------------------------------------------*/ -static int __choose_path(struct multipath *m) +static struct path *__choose_path(struct multipath *m) { - struct priority_group *pg; + struct priority_group *pg, *orig_pg = m->current_pg; struct path *path = NULL; - /* loop through the priority groups until we find a valid path. */ + m->current_pg = NULL; + + init_completion(&m->init_pg_wait); + m->initializing_pg = 1; + list_for_each_entry (pg, &m->priority_groups, list) { + if (orig_pg == pg) + continue; + + if (pg->ps->type->init) { + int err; + + spin_unlock_irq(&m->lock); + err = pg->ps->type->init(pg->ps); + spin_lock_irq(&m->lock); + if (err) + continue; + } + path = pg->ps->type->select_path(pg->ps); - if (path) + if (path) { + m->current_pg = pg; break; + } } - m->current_path = path; - m->current_count = MPATH_MIN_IO; + m->initializing_pg = 0; + complete_all(&m->init_pg_wait); - return 0; + return path; } - + static struct path *get_current_path(struct multipath *m) { - struct path *path; - unsigned long flags; + struct path *path = NULL; - spin_lock_irqsave(&m->lock, flags); + retry: + spin_lock_irq(&m->lock); - /* Do we need to select a new path? */ - if (!m->current_path || --m->current_count == 0) - __choose_path(m); + if (m->current_path && --m->current_count > 0) + goto done; + /* + * completion event, current_pg, and initializing_pg + * are protected under the m->lock to avoid races. + */ + if (unlikely(m->initializing_pg)) { + spin_unlock_irq(&m->lock); + wait_for_completion(&m->init_pg_wait); + goto retry; + } + + if (m->current_pg) { + path = m->current_pg->ps->type->select_path(m->current_pg->ps); + if (!path) + path = __choose_path(m); + } + + m->current_path = MPATH_MIN_IO; + m->current_path = path; + done: path = m->current_path; - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irq(&m->lock); return path; } @@ -234,10 +275,12 @@ static int map_io(struct multipath *m, s return 0; } +/*----------------------------------------------------------------- + * The multipath daemon is responsible for resubmitting failed ios. + *---------------------------------------------------------------*/ static void dispatch_failed_ios(void *data) { struct multipath *m = (struct multipath *) data; - unsigned long flags; struct bio *bio = NULL, *next; @@ -246,9 +289,20 @@ static void dispatch_failed_ios(void *da spin_unlock_irqrestore(&m->lock, flags); while (bio) { + struct mpath_io *io; + union map_info *info; + next = bio->bi_next; bio->bi_next = NULL; - generic_make_request(bio); + + info = dm_get_mapinfo(bio); + io = info->ptr; + + if (map_io(m, bio, &io->path)) + /* no paths left */ + bio_endio(bio, bio->bi_size, -EIO); + else + generic_make_request(bio); bio = next; } } @@ -461,6 +515,8 @@ static int multipath_ctr(struct dm_targe list_add_tail(&pg->list, &m->priority_groups); } + m->current_pg = list_entry(m->priority_groups.next, + struct priority_group, list); ti->private = m; m->ti = ti; @@ -516,17 +572,13 @@ static void update_path(struct path *pat static int do_end_io(struct multipath *m, struct bio *bio, int error, struct mpath_io *io) { - int r; - if (error) { + if (!io->path) + return -EIO; + update_path(io->path, error); - /* remap */ dm_bio_restore(&io->details, bio); - r = map_io(m, bio, &io->path); - if (r) - /* no paths left */ - return -EIO; /* queue for the daemon to resubmit */ spin_lock(&m->lock); --- diff/drivers/md/dm-path-selector.h 2004-04-06 15:54:32.714533456 +0100 +++ source/drivers/md/dm-path-selector.h 2004-04-06 15:54:55.019142640 +0100 @@ -33,6 +33,12 @@ typedef int (*ps_ctr_fn) (struct path_se typedef void (*ps_dtr_fn) (struct path_selector *ps); /* + * Allows the ps to initialize itself. This fn may sleep. + * The multipath context lock is not held. + */ +typedef int (*ps_init_fn) (struct path_selector *ps); + +/* * Add an opaque path object, along with some selector specific * path args (eg, path priority). */ @@ -73,6 +79,7 @@ struct path_selector_type { unsigned int info_args; ps_ctr_fn ctr; ps_dtr_fn dtr; + ps_init_fn init; ps_add_path_fn add_path; ps_update_path_fn update_path; --- diff/drivers/md/dm.c 2004-04-06 15:54:26.962407912 +0100 +++ source/drivers/md/dm.c 2004-04-06 15:54:55.020142488 +0100 @@ -1106,6 +1106,15 @@ int dm_suspended(struct mapped_device *m return test_bit(DMF_SUSPENDED, &md->flags); } +inline union map_info *dm_get_mapinfo(struct bio *bio) +{ + if (bio && bio->bi_private) + return &((struct target_io *)bio->bi_private)->info; + return NULL; +} + +EXPORT_SYMBOL(dm_get_mapinfo); + struct block_device_operations dm_blk_dops = { .open = dm_blk_open, .release = dm_blk_close, --- diff/include/linux/device-mapper.h 2004-04-05 12:57:08.000000000 +0100 +++ source/include/linux/device-mapper.h 2004-04-06 15:54:55.020142488 +0100 @@ -18,6 +18,8 @@ union map_info { unsigned long long ll; }; +inline union map_info *dm_get_mapinfo(struct bio *bio); + /* * In the constructor the target parameter will already have the * table, type, begin and len fields filled in.