background path testing --- diff/drivers/md/dm-mpath.c 2003-12-29 10:16:45.000000000 +0000 +++ source/drivers/md/dm-mpath.c 2003-12-29 10:16:52.000000000 +0000 @@ -25,18 +25,20 @@ struct priority_group *pg; int has_failed; - jiffy_t fail_time; atomic_t fail_count; atomic_t fail_total; + struct semaphore test_lock; sector_t test_sector; + struct bio *test_bio; + struct page *test_page; /* FIXME: share this between all paths ? */ }; struct priority_group { struct list_head list; unsigned priority; - struct path_selector ps; + struct path_selector *ps; struct list_head valid_paths; struct list_head invalid_paths; }; @@ -56,8 +58,7 @@ struct bio *failed_ios; unsigned test_interval; -// atomic_t suspended; /* device suspension */ -// int throw_event; + atomic_t trigger_event; }; static struct path *alloc_path(void) @@ -68,6 +69,20 @@ memset(path, 0, sizeof(*path)); atomic_set(&path->fail_count, 0); atomic_set(&path->fail_total, 0); + init_MUTEX_LOCKED(&path->test_lock); /* resume will unlock */ + + path->test_bio = bio_alloc(GFP_KERNEL, 1); + if (!path->test_bio) { + kfree(path); + return NULL; + } + + path->test_page = alloc_page(GFP_KERNEL); + if (!p->test_page) { + bio_put(path->test_bio); + kfree(path); + return NULL; + } } return path; @@ -75,6 +90,9 @@ static inline void free_path(struct path *p) { + ClearPageLocked(p->test_page); + __free_page(p->test_page); + bio_put(&p->test_bio); kfree(p); } @@ -146,6 +164,47 @@ } /*----------------------------------------------------------------- + * All paths should be tested periodically. + *---------------------------------------------------------------*/ +static void __fail_path(struct path *path) +{ + if (path->has_failed) + return; + + /* FIXME: this is brain dead */ + if (!atomic_dec_and_test(&path->fail_count)) + return; + + path->has_failed = 1; +// path->fail_time = jiffies; + atomic_inc(&path->fail_total); + list_del(&path->list); + list_add(&path->list, &path->pg->invalid_paths); + path->pg->ps.type->set_path_state(&path->pg->ps, path, 0); +} + +static void test_endio(struct bio *bio, unsigned int done, int error) +{ + struct path *path = (struct path *) bio->bi_private; + + if (bio->bi_size) + return 1; + + if (error) + __fail_path(path); + + up(&p->test_lock); +} + +static void test_path(struct path *p) +{ + if (down_trylock(&p->test_lock)) + return; /* last test io still pending */ + + submit_bio(p->test_bio); +} + +/*----------------------------------------------------------------- * The multipath daemon is responsible for resubmitting failed ios. *---------------------------------------------------------------*/ static struct dm_daemon _kmpathd; @@ -171,26 +230,39 @@ } } -/* Requeue error ios */ -static void do_ios(void) +static void iterate_paths(struct multipath *m, void (*fn)(struct path *p)) { - struct multipath *m; + struct priority_group *pg; + struct path *p; - spin_lock(&_mpath_lock); - list_for_each_entry (m, &_mpaths, list) - dispatch_failed_ios(m); - spin_unlock(&_mpath_lock); + down_read(&m->path_lock); + list_for_each_entry (pg, &m->priority_groups, list) { + list_for_each_entry (p, &pg->valid_paths, list) + fn(p); - blk_run_queues(); + list_for_each_entry (p, &pg->invalid_paths, list) + fn(p); + } + up_read(&m->path_lock); } /* Multipathd does this every time it runs, returns a sleep duration hint */ static jiffy_t do_work(void) { - do_ios(); -// do_table_events(); -// return do_scrubbing(); - return 0; + struct multipath *m; + + spin_lock(&_mpath_lock); + list_for_each_entry (m, &_mpaths, list) { + dispatch_failed_ios(m); + iterate_paths(m, test_path); + + if (atomic_dec_and_test(&m->trigger_event)) + dm_table_event(m->ti); + } + spin_unlock(&_mpath_lock); + + blk_run_queues(); + return (jiffy_t) 0; } /*----------------------------------------------------------------- @@ -241,6 +313,18 @@ as->argv += n; } +static void init_test_bio(struct path *p) +{ + struct bio *bio = p->test_bio; + + bio->bi_sector = 0 + bio->bi_rw |= (1 << BIO_RW_FAILFAST); + bio->bi_bdev = path->dev->bdev; + bio->bi_end_io = test_endio; + bio->bi_private = p; + bio_add_page(bio, p->test_page, ??? hard sect size ???, 0); +} + static struct path *parse_path(struct arg_set *as, struct path_selector *ps, struct dm_target *ti) { @@ -263,6 +347,7 @@ ti->error = ESTR("error getting device"); goto bad; } + init_test_bio(p); r = ps->type->add_path(ps, p, as->argc, as->argv, &ti->error); if (r) { @@ -556,23 +641,6 @@ return NULL; } -static void __fail_path(struct path *path) -{ - if (path->has_failed) - return; - - if (!atomic_dec_and_test(&path->fail_count)) - return; - - path->has_failed = 1; - path->fail_time = jiffies; - atomic_inc(&path->fail_total); -// path->test_sector = sector; - list_del(&path->list); - list_add(&path->list, &path->pg->invalid_paths); - path->pg->ps.type->set_path_state(&path->pg->ps, path, 0); -} - static int __resubmit_io(struct multipath *m, struct bio *bio) { int r; @@ -615,7 +683,21 @@ return r; } -/* Multipath status */ +static void lock_path(struct path *p) {down(&path->test_lock);} +static void unlock_path(struct path *p) {up(&path->test_lock);} + +static void multipath_suspend(struct dm_target *ti) +{ + struct multipath *m = (struct multipath *) ti->private; + iterate_paths(m, lock_path); +} + +static void multipath_resume(struct dm_target *ti) +{ + struct multipath *m = (struct multipath *) ti->private; + iterate_paths(m, unlock_path); +} + static int multipath_status(struct dm_target *ti, status_type_t type, char *result, unsigned int maxlen) { @@ -683,266 +765,6 @@ MODULE_AUTHOR("Sistina software "); MODULE_LICENSE("GPL"); - - - - - - - - - - - - - -#ifdef SCRUB_STUFF -/* Reset failure information on a path */ -static inline void reset_failures(struct path *path) -{ - struct path_selector *ps = &path->m->ps; - - path->test_sector = 0; - atomic_set(&path->fail, path->fail_limit); - clear_bit(FAILED, &path->flags); - ps->type->set_path_state(path->ps_private, 0); -} - -/* Reset a "failed" path - * (IOW: set it to operational so that it can be selected for IO submission) - */ -static void reset_failed(struct multipath_io *io) -{ - struct path *path = io->path; - - if (is_failed(path)) { - reset_failures(path); - queue_table_event(io); - } -} - -/* Scrub IO handling */ -static inline void reset_scrub_io(struct path *path) -{ - clear_bit(SCRUB_IO, &path->flags); -} - - -/* Scrub timeout calculation */ -static inline unsigned long get_reactivation_timeout(struct path *path) -{ - return path->reactivation_interval * HZ; -} - -static inline unsigned long get_scrub_timeout(struct path *path) -{ - return path->m->scrub_interval * HZ; -} - -/* Calculate scrubbing sleep timeout for deamon */ -static int scrub_timeout(struct path *path, long *timeout) -{ - int ret = 0; - jiffy_t j = get_fail_time(path); - jiffy_t t = is_failed(path) ? get_reactivation_timeout(path) : - get_scrub_timeout(path); - - if (t) { - /* Jiffies wrap around check */ - if (jiffies < j) { - *timeout = HZ; - set_fail_time(path); - return 1; - } - - j += t; - if (jiffies < j) - j -= jiffies; - else { - j = t; - ret = 1; - } - - if (*timeout > j) - *timeout = (long) j; - } - - return ret; -} - -/* Allocate a scrubing IO buffer_head and page */ -static struct buffer_head *alloc_scrub_bh(void) -{ - struct buffer_head *bh = kmalloc(sizeof(*bh), GFP_NOIO); - - if (bh) { - memset(bh, 0, sizeof(*bh)); - - /* Well, might be (a little) too large but it's easy */ - bh->b_page = alloc_page(GFP_NOIO); - if (!bh->b_page) { - kfree(bh); - return NULL; - } - - LockPage(bh->b_page); - set_bit(BH_Lock, &bh->b_state); - set_bit(BH_Mapped, &bh->b_state); - bh->b_data = page_address(bh->b_page); - bh->b_size = PAGE_SIZE; - } - - return bh; -} - -/* Free a scrubing IO page and buffer_head */ -static void free_scrub_bh(struct buffer_head *bh) -{ - UnlockPage(bh->b_page); - __free_page(bh->b_page); - kfree(bh); -} - -/* Scrubbing end IO function */ -static void multipath_scrub_end_io(struct buffer_head *bh, int uptodate) -{ - struct multipath *m = (struct multipath *) io->m; - - if (uptodate) { - unsigned long flags; - - spin_lock_irqsave(&m->lock, flags); - reset_failed(io); - spin_unlock_irqrestore(&m->lock, flags); - - dm_daemon_wake(&_kmultipathd); - } - - reset_scrub_io(io->path); - free_scrub_bh(io->bh); -} - -/* - * Queue a test read IO to a path (path scrubbing) - * - * Returns - * - * 0: scrub IO already in progress or error (retry later) - * 1: scrub IO queued - * - */ -static int queue_scrub_io(struct path *path) -{ - struct multipath_io *io; - struct buffer_head *bh; - - if (test_and_set_bit(SCRUB_IO, &path->flags)) - goto out; - - bh = alloc_scrub_bh(); - if (!bh) - goto retry; /* just retry later */ - - /* - * No need to set b_dev, b_blocknr, b_count or initialize - * the wait queue here. - */ - bh->b_rdev = path->dev->dev; - bh->b_rsector = path->test_sector; - bh->b_end_io = multipath_scrub_end_io; - bh->b_private = io; - - make_request(io); - run_task_queue(&tq_disk); - - return 1; - -retry: - reset_scrub_io(path); - -out: - return 0; -} - -/* - * Check if paths need to get a test io queued either for - * automatic failure recovery or scrubbing of idle paths. - */ -static long do_scrubbing(void) -{ - unsigned long flags; - long timeout = MAX_SCHEDULE_TIMEOUT; - struct multipath *m; - - /* FIXME: optimize this in case no scrubbing is needed */ - spin_lock_irqsave(&_mpath_lock, flags); - list_for_each_entry (m, &_mpaths, list) { - struct path *path; - - /* Don't scrub suspended ms */ - if (atomic_read(&m->suspended)) - continue; - - list_for_each_entry (path, &m->paths, list) { - if (scrub_timeout(path, &timeout)) - queue_scrub_io(path); - } - } - spin_unlock_irqrestore(&_mpath_lock, flags); - - return timeout; -} - -static void wait_for_scrub_ios(struct multipath *m) -{ - struct path *path; - - list_for_each_entry (path, &m->paths, list) { - while (test_bit(SCRUB_IO, &path->flags)) - schedule_timeout(HZ / 2); - } -} - - -#endif - - - - -#ifdef EVENT_STUFF -/* "Queue" an event on a table in order to process - dm_table_event() calls in task context */ -static inline void queue_table_event(struct multipath_io *io) -{ - struct multipath *m = (struct multipath *) io->m; - - atomic_inc(&m->events); -} - -/* Work all table events thrown */ -static void do_table_events(void) -{ - unsigned long flags; - struct multipath *m; - - /* FIXME: optimize this in case no events need to be thrown - (which is most of the time) */ - spin_lock_irqsave(&_mpath_lock, flags); - list_for_each_entry (m, &_mpaths, list) { - - /* Throw all events queued */ - while (atomic_read(&m->events)) { - dm_table_event(m->ti->table); - atomic_dec(&m->events); - } - } - spin_unlock_irqrestore(&_mpath_lock, flags); -} - - -#endif - - #ifdef STATUS_FOO int sz = 0; @@ -1002,26 +824,3 @@ return 0; #endif - - - -#if 0 -/* Suspend */ -static void multipath_suspend(struct dm_target *ti) -{ - struct multipath *m = (struct multipath *) ti->private; - - //atomic_set(&m->suspended, 1); - //wait_for_scrub_ios(m); -} - -/* Resume */ -static void multipath_resume(struct dm_target *ti) -{ - struct multipath *m = (struct multipath *) ti->private; - - //atomic_set(&m->suspended, 0); - dm_daemon_wake(&_kmpathd); -} - -#endif