A huge patch, this started out as Kevin Corrys patch to merge the valid/invalid path lists. But I kept seeing things I wanted to change. Use with care. --- diff/drivers/md/dm-mpath.c 2004-02-02 16:19:16.000000000 +0000 +++ source/drivers/md/dm-mpath.c 2004-02-03 11:17:56.000000000 +0000 @@ -26,11 +26,13 @@ struct list_head list; struct dm_dev *dev; + struct priority_group *pg; + spinlock_t failed_lock; int has_failed; - atomic_t fail_count; - atomic_t fail_total; + unsigned fail_count; + unsigned fail_total; struct semaphore test_lock; struct bio *test_bio; @@ -43,8 +45,9 @@ unsigned priority; struct multipath *m; struct path_selector *ps; - struct list_head valid_paths; - struct list_head invalid_paths; + + unsigned nr_paths; + struct list_head paths; }; /* Multipath context */ @@ -52,13 +55,14 @@ struct list_head list; struct dm_target *ti; - spinlock_t path_lock; + unsigned nr_priority_groups; struct list_head priority_groups; + + spinlock_t lock; struct path *current_path; - atomic_t count; + unsigned current_count; unsigned min_io; - spinlock_t failed_lock; struct bio_list failed_ios; struct bio_list test_ios; @@ -72,8 +76,8 @@ if (path) { memset(path, 0, sizeof(*path)); - atomic_set(&path->fail_count, MPATH_FAIL_COUNT); - atomic_set(&path->fail_total, 0); + path->failed_lock = SPIN_LOCK_UNLOCKED; + path->fail_count = MPATH_FAIL_COUNT; init_MUTEX_LOCKED(&path->test_lock); /* resume will unlock */ path->test_bio = bio_alloc(GFP_KERNEL, 1); @@ -116,8 +120,7 @@ } memset(pg->ps, 0, sizeof(*pg->ps)); - INIT_LIST_HEAD(&pg->valid_paths); - INIT_LIST_HEAD(&pg->invalid_paths); + INIT_LIST_HEAD(&pg->paths); return pg; } @@ -146,8 +149,7 @@ kfree(ps); } - free_paths(&pg->valid_paths, ti); - free_paths(&pg->invalid_paths, ti); + free_paths(&pg->paths, ti); kfree(pg); } @@ -158,9 +160,8 @@ m = kmalloc(sizeof(*m), GFP_KERNEL); if (m) { memset(m, 0, sizeof(*m)); - m->path_lock = SPIN_LOCK_UNLOCKED; INIT_LIST_HEAD(&m->priority_groups); - m->failed_lock = SPIN_LOCK_UNLOCKED; + m->lock = SPIN_LOCK_UNLOCKED; m->min_io = MPATH_MIN_IO; } @@ -182,50 +183,54 @@ /*----------------------------------------------------------------- * All paths should be tested periodically. *---------------------------------------------------------------*/ -static void __fail_path(struct path *path) +static void fail_path(struct path *path) { - if (path->has_failed) - return; + unsigned long flags; - /* FIXME: this is brain dead */ - if (!atomic_dec_and_test(&path->fail_count)) - return; + spin_lock_irqsave(&path->failed_lock, flags); - path->has_failed = 1; -// path->fail_time = jiffies; - atomic_inc(&path->fail_total); - list_move(&path->list, &path->pg->invalid_paths); - path->pg->ps->type->set_path_state(path->pg->ps, path, 0); - path->pg->m->trigger_event = 1; + if (!path->has_failed) { + /* FIXME: this is brain dead */ + if (!--path->fail_count) { + path->has_failed = 1; + path->fail_total++; + path->pg->ps->type->set_path_state(path->pg->ps, + path, 0); + } + + path->pg->m->trigger_event = 1; + } + + spin_unlock_irqrestore(&path->failed_lock, flags); } -static void __recover_path(struct path *path) +static void recover_path(struct path *path) { - if (!path->has_failed) - return; + unsigned long flags; + + spin_lock_irqsave(&path->failed_lock, flags); - atomic_set(&path->fail_count, MPATH_FAIL_COUNT); - list_move(&path->list, &path->pg->valid_paths); - path->pg->ps->type->set_path_state(path->pg->ps, path, 1); - path->has_failed = 0; - path->pg->m->trigger_event = 1; + if (path->has_failed) { + path->has_failed = 0; + path->fail_count = MPATH_FAIL_COUNT; + path->pg->ps->type->set_path_state(path->pg->ps, path, 1); + path->pg->m->trigger_event = 1; + } + + spin_unlock_irqrestore(&path->failed_lock, flags); } static int test_endio(struct bio *bio, unsigned int done, int error) { struct path *path = (struct path *) bio->bi_private; - struct multipath *m = path->pg->m; - unsigned long flags; if (bio->bi_size) return 1; - spin_lock_irqsave(&m->path_lock, flags); if (error) - __fail_path(path); + fail_path(path); else - __recover_path(path); - spin_unlock_irqrestore(&m->path_lock, flags); + recover_path(path); up(&path->test_lock); return 0; @@ -233,8 +238,12 @@ static void test_path(struct path *p) { + /* + * If we get this lock we're allowed to issue a test io + * for this path. + */ if (down_trylock(&p->test_lock)) - return; /* last test io still pending */ + return; p->test_bio->bi_sector = 0; p->test_bio->bi_bdev = p->dev->bdev; @@ -255,6 +264,7 @@ static void submit_ios(struct bio *bio) { struct bio *next; + while (bio) { next = bio->bi_next; bio->bi_next = NULL; @@ -268,9 +278,9 @@ unsigned long flags; struct bio *bio; - spin_lock_irqsave(&m->failed_lock, flags); + spin_lock_irqsave(&m->lock, flags); bio = bio_list_get(&m->failed_ios); - spin_unlock_irqrestore(&m->failed_lock, flags); + spin_unlock_irqrestore(&m->lock, flags); submit_ios(bio); } @@ -279,20 +289,17 @@ { struct priority_group *pg; struct path *p; - unsigned long flags; - spin_lock_irqsave(&m->path_lock, flags); list_for_each_entry (pg, &m->priority_groups, list) { - list_for_each_entry (p, &pg->valid_paths, list) - fn(p); - - list_for_each_entry (p, &pg->invalid_paths, list) + list_for_each_entry (p, &pg->paths, list) fn(p); } - spin_unlock_irqrestore(&m->path_lock, flags); } -/* Multipathd does this every time it runs, returns a sleep duration hint */ +/* + * Multipathd does this every time it runs, returning a sleep + * duration hint. + */ static jiffy_t do_work(void) { unsigned long flags; @@ -303,14 +310,13 @@ list_for_each_entry (m, &_mpaths, list) { dispatch_failed_ios(m); iterate_paths(m, test_path); - submit_ios(bio_list_get(&m->test_ios)); - spin_lock_irqsave(&m->path_lock, flags); + spin_lock_irqsave(&m->lock, flags); if (m->trigger_event) { dm_table_event(m->ti->table); m->trigger_event = 0; } - spin_unlock_irqrestore(&m->path_lock, flags); + spin_unlock_irqrestore(&m->lock, flags); interval = min_not_zero(interval, m->test_interval); } @@ -422,9 +428,6 @@ return NULL; } -/* - * Returns the number of arguments consumed, or an error. - */ static struct priority_group *parse_priority_group(struct arg_set *as, struct multipath *m, struct dm_target *ti) @@ -436,7 +439,7 @@ }; int r; - unsigned i, nr_paths, nr_selector_args, nr_params; + unsigned i, nr_selector_args, nr_params; struct priority_group *pg; struct path_selector_type *pst; @@ -470,11 +473,10 @@ } pg->ps->type = pst; - /* * read the paths */ - r = read_param(_params + 1, shift(as), &nr_paths, &ti->error); + r = read_param(_params + 1, shift(as), &pg->nr_paths, &ti->error); if (r) goto bad; @@ -483,7 +485,7 @@ goto bad; nr_params = 1 + nr_selector_args; - for (i = 0; i < nr_paths; i++) { + for (i = 0; i < pg->nr_paths; i++) { struct path *path; struct arg_set path_args; @@ -498,7 +500,7 @@ goto bad; path->pg = pg; - list_add_tail(&path->list, &pg->valid_paths); + list_add_tail(&path->list, &pg->paths); consume(as, nr_params); } @@ -548,7 +550,7 @@ {1, 1024, ESTR("invalid number of priority groups")}, }; - int r, pg_count; + int r; struct multipath *m; struct arg_set as; @@ -565,7 +567,7 @@ if (r) goto bad; - r = read_param(_params + 1, shift(&as), &pg_count, &ti->error); + r = read_param(_params + 1, shift(&as), &m->nr_priority_groups, &ti->error); if (r) goto bad; @@ -595,7 +597,6 @@ { struct multipath *m = (struct multipath *) ti->private; -// wait_for_scrub_ios(m); spin_lock(&_mpath_lock); list_del(&m->list); spin_unlock(&_mpath_lock); @@ -603,113 +604,83 @@ free_multipath(m); } -static struct priority_group *__find_priority_group(struct list_head *pgs) +static int __choose_path(struct multipath *m) { struct priority_group *pg; + struct path *path = NULL; - list_for_each_entry (pg, pgs, list) { - if (!list_empty(&pg->valid_paths)) - return pg; + /* loop through the priority groups until we find a valid path. */ + list_for_each_entry (pg, &m->priority_groups, list) { + path = pg->ps->type->select_path(pg->ps); + if (path) + break; } - return NULL; + m->current_path = path; + m->current_count = m->min_io; + return 0; } -static int __choose_path(struct multipath *m) +static struct path *get_current_path(struct multipath *m) { - struct priority_group *pg; struct path *path; + unsigned long flags; - /* get the priority group */ - pg = __find_priority_group(&m->priority_groups); - if (!pg) - return -EIO; + spin_lock_irqsave(&m->lock, flags); + + /* Do we need to select a new path? */ + if (!m->current_path || --m->current_count == 0) + __choose_path(m); + + path = m->current_path; - /* select a path */ - path = pg->ps->type->select_path(pg->ps); + spin_unlock_irqrestore(&m->lock, flags); + + return path; +} + +static int map_io(struct multipath *m, struct bio *bio) +{ + struct path *path; + + path = get_current_path(m); if (!path) - return -EIO; /* No valid path found */ + return -EIO; - m->current_path = path; - atomic_set(&m->count, m->min_io); + bio->bi_bdev = path->dev->bdev; return 0; } static int multipath_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { + int r; struct multipath *m = (struct multipath *) ti->private; - struct path *path; - unsigned long flags; - - spin_lock_irqsave(&m->path_lock, flags); - /* - * Do we need to choose a new path? - */ - if (!m->current_path || atomic_dec_and_test(&m->count)) { - if (__choose_path(m)) { - /* no paths */ - spin_unlock_irqrestore(&m->path_lock, flags); - return -EIO; - } - } - - path = m->current_path; - spin_unlock_irqrestore(&m->path_lock, flags); - - /* map */ bio->bi_rw |= (1 << BIO_RW_FAILFAST); - bio->bi_bdev = path->dev->bdev; + r = map_io(m, bio); + if (r) + return r; + return 1; } /* * Only called on the error path. */ -static struct path *__lookup_path(struct list_head *head, - struct block_device *bdev) -{ - struct path *p; - - list_for_each_entry (p, head, list) - if (p->dev->bdev == bdev) - return p; - - return NULL; -} - -static struct path *__find_path(struct multipath *m, struct block_device *bdev) +static struct path *find_path(struct multipath *m, struct block_device *bdev) { struct path *p; struct priority_group *pg; - list_for_each_entry (pg, &m->priority_groups, list) { - p = __lookup_path(&pg->valid_paths, bdev); - if (p) - return p; - - p = __lookup_path(&pg->invalid_paths, bdev); - if (p) - return p; - } + list_for_each_entry (pg, &m->priority_groups, list) + list_for_each_entry (p, &pg->paths, list) + if (p->dev->bdev == bdev) + return p; return NULL; } -static int __remap_io(struct multipath *m, struct bio *bio) -{ - int r; - - r = __choose_path(m); - if (r) - return r; - - /* remap */ - bio->bi_bdev = m->current_path->dev->bdev; - return 0; -} - static int multipath_end_io(struct dm_target *ti, struct bio *bio, int error, union map_info *map_context) { @@ -720,17 +691,15 @@ if (error) { struct path *path; - spin_lock_irqsave(&m->path_lock, flags); - path = __find_path(m, bio->bi_bdev); - __fail_path(path); - r = __remap_io(m, bio); - spin_unlock_irqrestore(&m->path_lock, flags); + path = find_path(m, bio->bi_bdev); + fail_path(path); + r = map_io(m, bio); if (!r) { /* queue for the daemon to resubmit */ - spin_lock_irqsave(&m->failed_lock, flags); + spin_lock_irqsave(&m->lock, flags); bio_list_add(&m->failed_ios, bio); - spin_unlock_irqrestore(&m->failed_lock, flags); + spin_unlock_irqrestore(&m->lock, flags); dm_daemon_wake(&_kmpathd); r = 1; /* io not complete */ @@ -755,23 +724,6 @@ iterate_paths(m, unlock_path); } -static inline int __pg_count(struct multipath *m) -{ - int count = 0; - struct priority_group *pg; - list_for_each_entry(pg, &m->priority_groups, list) count++; - return count; -} - -static inline int __path_count(struct priority_group *pg) -{ - int count = 0; - struct path *p; - list_for_each_entry(p, &pg->valid_paths, list) count++; - list_for_each_entry(p, &pg->invalid_paths, list) count++; - return count; -} - /* * Info string has the following format: * num_groups [num_paths num_selector_args [path_dev A|F fail_count fail_total [selector_args]* ]+ ]+ @@ -789,38 +741,25 @@ struct path *p; char buffer[32]; - spin_lock_irqsave(&m->path_lock, flags); - switch (type) { case STATUSTYPE_INFO: - sz += snprintf(result + sz, maxlen - sz, "%u ", __pg_count(m)); + sz += snprintf(result + sz, maxlen - sz, "%u ", m->nr_priority_groups); list_for_each_entry(pg, &m->priority_groups, list) { sz += snprintf(result + sz, maxlen - sz, "%u %u ", - __path_count(pg), + pg->nr_paths, pg->ps->type->info_args); - list_for_each_entry(p, &pg->valid_paths, list) { + list_for_each_entry(p, &pg->paths, list) { format_dev_t(buffer, p->dev->bdev->bd_dev); + spin_lock_irqsave(&p->failed_lock, flags); sz += snprintf(result + sz, maxlen - sz, - "%s A %u %u ", buffer, - atomic_read(&p->fail_count), - atomic_read(&p->fail_total)); - pg->ps->type->status(pg->ps, p, type, - result + sz, maxlen - sz); - - sz = strlen(result); - if (sz >= maxlen) - break; - } - list_for_each_entry(p, &pg->invalid_paths, list) { - format_dev_t(buffer, p->dev->bdev->bd_dev); - sz += snprintf(result + sz, maxlen - sz, - "%s F %u %u ", buffer, - atomic_read(&p->fail_count), - atomic_read(&p->fail_total)); + "%s %s %u %u ", buffer, + p->has_failed ? "F" : "A", + p->fail_count, p->fail_total); pg->ps->type->status(pg->ps, p, type, result + sz, maxlen - sz); + spin_unlock_irqrestore(&p->failed_lock, flags); sz = strlen(result); if (sz >= maxlen) @@ -832,25 +771,14 @@ case STATUSTYPE_TABLE: sz += snprintf(result + sz, maxlen - sz, "%u %u ", - m->test_interval, __pg_count(m)); + m->test_interval, m->nr_priority_groups); list_for_each_entry(pg, &m->priority_groups, list) { sz += snprintf(result + sz, maxlen - sz, "%u %s %u %u ", pg->priority, pg->ps->type->name, - __path_count(pg), pg->ps->type->table_args); - - list_for_each_entry(p, &pg->valid_paths, list) { - format_dev_t(buffer, p->dev->bdev->bd_dev); - sz += snprintf(result + sz, maxlen - sz, - "%s ", buffer); - pg->ps->type->status(pg->ps, p, type, - result + sz, maxlen - sz); + pg->nr_paths, pg->ps->type->table_args); - sz = strlen(result); - if (sz >= maxlen) - break; - } - list_for_each_entry(p, &pg->invalid_paths, list) { + list_for_each_entry(p, &pg->paths, list) { format_dev_t(buffer, p->dev->bdev->bd_dev); sz += snprintf(result + sz, maxlen - sz, "%s ", buffer); @@ -866,8 +794,6 @@ break; } - spin_unlock_irqrestore(&m->path_lock, flags); - return 0; } @@ -876,7 +802,7 @@ *---------------------------------------------------------------*/ static struct target_type multipath_target = { .name = "multipath", - .version = {1, 0, 1}, + .version = {1, 0, 2}, .module = THIS_MODULE, .ctr = multipath_ctr, .dtr = multipath_dtr,