Defer erroring an io until we know that all the paths have been tested *since* the last path failed. Large patch because some functions moved around. --- diff/drivers/md/dm-mpath.c 2004-02-03 11:58:50.000000000 +0000 +++ source/drivers/md/dm-mpath.c 2004-02-06 15:57:19.000000000 +0000 @@ -30,6 +30,8 @@ struct priority_group *pg; spinlock_t failed_lock; + /* FIXME: put the next 2 fields in a bitset */ + int tested; int has_failed; unsigned fail_count; unsigned fail_total; @@ -55,10 +57,14 @@ struct list_head list; struct dm_target *ti; + unsigned nr_paths; unsigned nr_priority_groups; struct list_head priority_groups; spinlock_t lock; + unsigned nr_valid_paths; + unsigned nr_tested_paths; + struct path *current_path; unsigned current_count; unsigned min_io; @@ -183,22 +189,53 @@ /*----------------------------------------------------------------- * All paths should be tested periodically. *---------------------------------------------------------------*/ +static void iterate_paths(struct multipath *m, void (*fn)(struct path *p)) +{ + struct priority_group *pg; + struct path *p; + + list_for_each_entry (pg, &m->priority_groups, list) { + list_for_each_entry (p, &pg->paths, list) + fn(p); + } +} + +static void clear_tested(struct path *p) +{ + p->tested = 0; +} + static void fail_path(struct path *path) { unsigned long flags; + struct multipath *m; spin_lock_irqsave(&path->failed_lock, flags); - if (!path->has_failed) { - /* FIXME: this is brain dead */ - if (!--path->fail_count) { - path->has_failed = 1; - path->fail_total++; - path->pg->ps->type->set_path_state(path->pg->ps, - path, 0); + /* FIXME: path->fail_count is brain dead */ + if (!path->has_failed && !--path->fail_count) { + path->has_failed = 1; + path->fail_total++; + path->pg->ps->type->set_path_state(path->pg->ps, path, 0); + path->pg->m->trigger_event = 1; + + m = path->pg->m; + spin_lock(&m->lock); + m->nr_valid_paths--; + if (!m->nr_valid_paths) { + iterate_paths(m, clear_tested); + m->nr_tested_paths = 0; } + spin_unlock(&m->lock); + } - path->pg->m->trigger_event = 1; + if (!path->tested) { + path->tested = 1; + + m = path->pg->m; + spin_lock(&m->lock); + m->nr_tested_paths++; + spin_unlock(&m->lock); } spin_unlock_irqrestore(&path->failed_lock, flags); @@ -207,6 +244,7 @@ static void recover_path(struct path *path) { unsigned long flags; + struct multipath *m = path->pg->m; spin_lock_irqsave(&path->failed_lock, flags); @@ -214,7 +252,11 @@ path->has_failed = 0; path->fail_count = MPATH_FAIL_COUNT; path->pg->ps->type->set_path_state(path->pg->ps, path, 1); - path->pg->m->trigger_event = 1; + m->trigger_event = 1; + + spin_lock(&m->lock); + m->nr_valid_paths++; + spin_unlock(&m->lock); } spin_unlock_irqrestore(&path->failed_lock, flags); @@ -273,26 +315,84 @@ } } -static void dispatch_failed_ios(struct multipath *m) +static int __choose_path(struct multipath *m) { + struct priority_group *pg; + struct path *path = NULL; + + if (m->nr_valid_paths) { + /* loop through the priority groups until we find a valid path. */ + list_for_each_entry (pg, &m->priority_groups, list) { + path = pg->ps->type->select_path(pg->ps); + if (path) + break; + } + } + + m->current_path = path; + m->current_count = m->min_io; + return 0; +} + +static struct path *get_current_path(struct multipath *m) +{ + struct path *path; unsigned long flags; - struct bio *bio; spin_lock_irqsave(&m->lock, flags); - bio = bio_list_get(&m->failed_ios); + + /* Do we need to select a new path? */ + if (!m->current_path || --m->current_count == 0) + __choose_path(m); + + path = m->current_path; + spin_unlock_irqrestore(&m->lock, flags); - submit_ios(bio); + return path; } -static void iterate_paths(struct multipath *m, void (*fn)(struct path *p)) +static int map_io(struct multipath *m, struct bio *bio) { - struct priority_group *pg; - struct path *p; + struct path *path; - list_for_each_entry (pg, &m->priority_groups, list) { - list_for_each_entry (p, &pg->paths, list) - fn(p); + path = get_current_path(m); + if (!path) + return -EIO; + + bio->bi_bdev = path->dev->bdev; + return 0; +} + +static void dispatch_failed_ios(struct multipath *m) +{ + int r; + unsigned long flags; + struct bio *bio = NULL, *next; + + spin_lock_irqsave(&m->lock, flags); + if (m->nr_valid_paths || (m->nr_tested_paths == m->nr_paths)) + bio = bio_list_get(&m->failed_ios); + spin_unlock_irqrestore(&m->lock, flags); + + + while (bio) { + next = bio->bi_next; + bio->bi_next = NULL; + + r = map_io(m, bio); + if (r) + /* + * This wont loop forever because the + * end_io function will fail the ios if + * we've tested all the paths. + */ + bio_io_error(bio, bio->bi_size); + + else + generic_make_request(bio); + + bio = next; } } @@ -310,6 +410,7 @@ list_for_each_entry (m, &_mpaths, list) { dispatch_failed_ios(m); iterate_paths(m, test_path); + submit_ios(bio_list_get(&m->test_ios)); spin_lock_irqsave(&m->lock, flags); if (m->trigger_event) { @@ -575,9 +676,12 @@ while (as.argc) { struct priority_group *pg; pg = parse_priority_group(&as, m, ti); - if (pg) + if (pg) { + m->nr_paths += pg->nr_paths; __insert_priority_group(m, pg); + } } + m->nr_valid_paths = m->nr_paths; ti->private = m; m->ti = ti; @@ -604,53 +708,6 @@ free_multipath(m); } -static int __choose_path(struct multipath *m) -{ - struct priority_group *pg; - struct path *path = NULL; - - /* loop through the priority groups until we find a valid path. */ - list_for_each_entry (pg, &m->priority_groups, list) { - path = pg->ps->type->select_path(pg->ps); - if (path) - break; - } - - m->current_path = path; - m->current_count = m->min_io; - return 0; -} - -static struct path *get_current_path(struct multipath *m) -{ - struct path *path; - unsigned long flags; - - spin_lock_irqsave(&m->lock, flags); - - /* Do we need to select a new path? */ - if (!m->current_path || --m->current_count == 0) - __choose_path(m); - - path = m->current_path; - - spin_unlock_irqrestore(&m->lock, flags); - - return path; -} - -static int map_io(struct multipath *m, struct bio *bio) -{ - struct path *path; - - path = get_current_path(m); - if (!path) - return -EIO; - - bio->bi_bdev = path->dev->bdev; - return 0; -} - static int multipath_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { @@ -684,29 +741,30 @@ static int multipath_end_io(struct dm_target *ti, struct bio *bio, int error, union map_info *map_context) { - int r = 0; - unsigned long flags; + struct path *path; struct multipath *m = (struct multipath *) ti->private; if (error) { - struct path *path; + spin_lock(&m->lock); + if (!m->nr_valid_paths && (m->nr_tested_paths == m->nr_paths)) { + spin_unlock(&m->lock); + return -EIO; + } + spin_unlock(&m->lock); path = find_path(m, bio->bi_bdev); fail_path(path); - r = map_io(m, bio); - if (!r) { - /* queue for the daemon to resubmit */ - spin_lock_irqsave(&m->lock, flags); - bio_list_add(&m->failed_ios, bio); - spin_unlock_irqrestore(&m->lock, flags); + /* queue for the daemon to resubmit */ + spin_lock(&m->lock); + bio_list_add(&m->failed_ios, bio); + spin_unlock(&m->lock); - dm_daemon_wake(&_kmpathd); - r = 1; /* io not complete */ - } + dm_daemon_wake(&_kmpathd); + return 1; /* io not complete */ } - return r; + return 0; } static void lock_path(struct path *p) {down(&p->test_lock);}