Import the 2.4 multipath code (no porting as yet). --- diff/drivers/md/dm-latency-ps.c 1970-01-01 01:00:00.000000000 +0100 +++ source/drivers/md/dm-latency-ps.c 2003-11-26 10:20:34.000000000 +0000 @@ -0,0 +1,327 @@ +/* + * Copyright (C) 2003 Sistina Software. + * + * Module Author: Heinz Mauelshagen + * + * This file is released under the GPL. + * + * + * "Latency" Path Selector + * + * Returns path with minimum latency in the same priority group + * of the one handed in unless failed. + * If all paths in the priority group got failed, returns the first + * operational one in the group with the next lower prioity. + * + */ + +#include "dm.h" +#include "dm-path-selector.h" +#include + +/* Path selector context */ +struct latency_c { + struct list_head paths; /* List of operational paths */ + struct list_head failed_paths; /* List of failed paths */ + + spinlock_t lock; + + int prio_group; /* Actual priority group to select + path from */ +}; + +/* Path info */ +struct path_c { + struct list_head list; /* Linked list to latency_c */ + + spinlock_t lock; + + struct path *path; /* Opaque pointer to caller path info */ + struct latency_c *lc; /* Back pointer to latency context */ + + /* Set by add_path() arguments */ + int priority; + int queue_min; + + /* Internal use */ + int io_count; + unsigned long long latency; +}; + +/* Allocate latency context */ +static struct latency_c *_alloc_latency_c(void) +{ + struct latency_c *lc = kmalloc(sizeof(*lc), GFP_KERNEL); + + if (lc) { + INIT_LIST_HEAD(&lc->paths); + INIT_LIST_HEAD(&lc->failed_paths); + lc->lock = SPIN_LOCK_UNLOCKED; + lc->prio_group = -1; + } + + return lc; +} + +/* Allocate path context */ +static struct path_c *_alloc_path_c(void) +{ + struct path_c *pc = kmalloc(sizeof(*pc), GFP_KERNEL); + + if (pc) { + memset(pc, 0, sizeof(*pc)); + pc->lock = SPIN_LOCK_UNLOCKED; + } + + return pc; +} + +/* Path selector constructor */ +static int latency_ctr(struct path_selector *ps, + int argc, char **argv, char **error) +{ + struct latency_c *lc; + + if (argc) { + *error = "latency path selector: Invalid number " + "of arguments"; + return -EINVAL; + } + + lc = _alloc_latency_c(); + if (!lc) { + *error = "latency path selector: Error allocating context"; + return -ENOMEM; + } + + ps->context = (void *) lc; + + return 0; +} + +/* Path selector destructor */ +static void latency_dtr(struct path_selector *ps) +{ + struct latency_c *lc = (struct latency_c *) ps->context; + struct list_head *lists[] = { + &lc->paths, + &lc->failed_paths, + }; + int i = ARRAY_SIZE(lists); + + spin_lock(&lc->lock); + while (i--) { + struct list_head *elem, *tmp; + + list_for_each_safe(elem, tmp, lists[i]) { + struct path_c *pc = + list_entry(elem, struct path_c, list); + + list_del(elem); + kfree(pc); + } + } + spin_unlock(&lc->lock); + + kfree(lc); + ps->context = NULL; +} + +/* Path add */ +#define xx(a, s, c, v) \ + { \ + int tmp; \ + if (sscanf(argv[a], "%d", &tmp) != 1 || \ + tmp < c ## _MIN || \ + tmp > c ## _MAX) { \ + *error = "latency path selector: Invalid " s; \ + return NULL; \ + } \ + v = tmp; \ + } + +#define PRIORITY_MIN 0 +#define PRIORITY_MAX 1024*1024 +#define QUEUE_MIN 0 +#define QUEUE_MAX 1024*1024 +static void *latency_add_path(struct path_selector *ps, struct path *path, + int argc, char **argv, char **error) +{ + struct latency_c *lc = (struct latency_c *) ps->context; + struct path_c *pc; + + if (argc != 2) { + *error = "latency path selector: Invalid number of arguments"; + return NULL; + } + + pc = _alloc_path_c(); + if (!pc) { + *error = "latency path selector: Error allocating path context"; + return NULL; + } + + pc->path = path; + pc->lc = lc; + xx(0, "priority", PRIORITY, pc->priority); + xx(1, "queue min", QUEUE, pc->queue_min); + pc->io_count = pc->queue_min; + spin_lock(&lc->lock); + list_add_tail(&pc->list, &lc->paths); + spin_unlock(&lc->lock); + + return (void *) pc; +} +#undef xx + +/* Path set state */ +static void latency_set_path_state(void *ps_private, unsigned long state) +{ + unsigned long flags; + struct path_c *path = (struct path_c *) ps_private; + struct latency_c *lc = path->lc; + + spin_lock_irqsave(&lc->lock, flags); + /* Fail path */ + if (state) + list_move_tail(&path->list, &lc->failed_paths); + else { + list_move_tail(&path->list, &lc->paths); + list_for_each_entry(path, &lc->paths, list) + path->latency = 0; + } + spin_unlock_irqrestore(&lc->lock, flags); +} + +/* Helper function path selector */ +static struct path_c *_path(struct latency_c *lc) +{ + struct path_c *path, *high_path = NULL, *ret = NULL; + int high_prio = INT_MAX; + unsigned long long latency = ~0ULL; + + /* Any operational paths ? */ + list_for_each_entry(path, &lc->paths, list) { + /* Find path with highest riority */ + if (high_prio > path->priority) { + high_prio = path->priority; + high_path = path; + } + + /* Skip paths which aren't members of this priority group */ + if (path->priority != lc->prio_group) + continue; + + /* Ensure minimum IO queue */ + if (path->io_count) { + path->io_count--; + ret = path; + break; + } + + /* Select path with less/equal latency */ + if (path->latency <= latency) { + latency = path->latency; + ret = path; + break; + } + } + + /* There's still at least one with this group priority */ + if (ret) { + if (!path->io_count) + path->io_count = path->queue_min; + /* None with this group priority available, + try another priority group */ + } else if (high_path) { + lc->prio_group = high_prio; + ret = high_path; + } + + return ret; +} + +/* Path selector */ +static struct path *latency_select_path(struct path_selector *ps, + struct buffer_head *bh, int rw, + struct path_info *path_context) +{ + unsigned long flags; + struct latency_c *lc = (struct latency_c *) ps->context; + struct path_c *path; + + spin_lock_irqsave(&lc->lock, flags); + path = _path(lc); + spin_unlock_irqrestore(&lc->lock, flags); + + if (path) { + path_context->ll = jiffies; + path_context->ptr = (void *) path; + return path->path; /* Return opaque caller path */ + } + + return NULL; +} + +/* Path end IO */ +static void latency_endio(struct path_selector *ps, + struct buffer_head *bh, + int rw, int error, + struct path_info *path_context) +{ + unsigned long flags; + typeof(jiffies) j; + struct path_c *path = path_context->ptr; + + BUG_ON(!path); + + j = jiffies - path_context->ll; + + /* Put heavy weight on long latencies */ + j *= j; + j *= j; + + spin_lock_irqsave(&path->lock, flags); + path->latency += j; + spin_unlock_irqrestore(&path->lock, flags); +} + +/* Path status */ +static int latency_status(void *context, status_type_t type, + char *result, unsigned int maxlen) +{ + struct path_c *path = + (struct path_c *) context; + + switch(type) { + case STATUSTYPE_INFO: + snprintf(result, maxlen, "%llu ", path->latency); + break; + + case STATUSTYPE_TABLE: + snprintf(result, maxlen, "%d %d ", + path->priority, path->queue_min); + break; + } + + return 0; +} + +static struct path_selector_type latency_ps = { + .name = "latency", + .ctr = latency_ctr, + .dtr = latency_dtr, + .add_path = latency_add_path, + .set_path_state = latency_set_path_state, + .select_path = latency_select_path, + .endio = latency_endio, + .status = latency_status, +}; + +int dm_register_latency_ps(void) { + return dm_register_path_selector(&latency_ps); +} + +void dm_unregister_latency_ps(void) { + dm_unregister_path_selector(&latency_ps); +} --- diff/drivers/md/dm-mpath.c 1970-01-01 01:00:00.000000000 +0100 +++ source/drivers/md/dm-mpath.c 2003-11-26 10:20:34.000000000 +0000 @@ -0,0 +1,1000 @@ +/* + * Copyright (C) 2003 Sistina Software Limited. + * + * Module Author: Heinz Mauelshagen + * + * This file is released under the GPL. + * + * device-mapper multipathing target + * + */ + +#include "dm.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include "dm-path-selector.h" + +/* Multipath context */ +struct multipath_c { + struct list_head list; + + struct list_head paths; /* List of paths */ + struct list_head io_jobs; /* IO jobs */ + + struct path_selector ps; + struct dm_target *ti; + + spinlock_t lock; /* Lock access to this context */ + atomic_t io_count; /* IOs in flight for this context */ + + unsigned int scrub_interval; /* Set in constructor */ + + atomic_t events; /* # if table events to throw */ + atomic_t suspended; /* device suspension */ +}; + +/* Multipath io job */ +struct path; +struct multipath_io { + struct list_head list; + + struct multipath_c *mc; + struct path *path; + + struct buffer_head *bh; + int rw; + + /* Path selector context between ps->type->select_path() + and ps->type->endio() */ + struct path_info path_context; +}; + + +/* Path flags */ +enum { + FAILED, + SCRUB_IO, +}; + +/* Path properties */ +struct path { + struct list_head list; + + struct dm_dev *dev; + struct multipath_c *mc; /* Back pointer to multipath context */ + + unsigned long flags; /* See path flags above */ + + /* set in target ctr */ + int reactivation_interval; /* Automatic reactivation interval */ + int fail_max; /* Maximum failures allowed */ + + typeof(jiffies) io_jiffies; /* Jiffies of last IO queued */ + atomic_t fail; /* actual failure count vs. fail_max */ + atomic_t fail_total; /* Total failures on this path */ + + void *ps_private; /* Opaque pointer to path selector object */ + unsigned long test_sector; /* Path scrubbing sector */ +}; + +/* + * Various functions to set a single/all path(s) (in)operational, + * check if path(s) is/are operational and (un)fail a path, allocate + * and deallocate io job memory... + */ + +/* Set/Rretrieve jiffies of last IO on this path */ +static inline void _set_io_jiffies(struct path *path) +{ + path->io_jiffies = jiffies; +} + +static inline typeof(jiffies) _get_io_jiffies(struct path *path) +{ + return path->io_jiffies; +} + +/* "Queue" an event on a table in order to process + dm_table_event() calls in task context */ +static inline void _queue_table_event(struct multipath_io *io) +{ + struct multipath_c *mc = (struct multipath_c *) io->mc; + + atomic_inc(&mc->events); +} + +/* Check path failed */ +static inline int _is_failed(struct path *path) +{ + return test_bit(FAILED, &path->flags); +} + +/* Set a path to "failed" */ +static inline void _set_failed(struct multipath_io *io) +{ + struct path *path = io->path; + struct path_selector *ps = &path->mc->ps; + + if (_is_failed(path)) + return; + + atomic_inc(&path->fail_total); + io->path->test_sector = io->bh->b_rsector; + ps->type->set_path_state(path->ps_private, 1); + _queue_table_event(io); +} + +/* Reset failure information on a path */ +static inline void _reset_failures(struct path *path) +{ + struct path_selector *ps = &path->mc->ps; + + path->test_sector = 0; + atomic_set(&path->fail, path->fail_max); + clear_bit(FAILED, &path->flags); + ps->type->set_path_state(path->ps_private, 0); +} + +/* Reset a "failed" path + * (IOW: set it to operational so that it can be selected for IO submission) + */ +static inline void _reset_failed(struct multipath_io *io) +{ + struct path *path = io->path; + + if (_is_failed(path)) { + _reset_failures(path); + _queue_table_event(io); + } +} + +/* Scrub IO handling */ +static inline void _reset_scrub_io(struct path *path) +{ + clear_bit(SCRUB_IO, &path->flags); +} + + +/* Scrub timeout calculation */ +static inline unsigned long _get_reactivation_timeout(struct path *path) +{ + return path->reactivation_interval * HZ; +} + +static inline unsigned long _get_scrub_timeout(struct path *path) +{ + return path->mc->scrub_interval * HZ; +} + +/* Calculate scrubbing sleep timeout for deamon */ +static inline int _scrub_timeout(struct path *path, long *timeout) +{ + int ret = 0; + typeof(jiffies) j = _get_io_jiffies(path); + typeof(jiffies) t = _is_failed(path) ? _get_reactivation_timeout(path) : + _get_scrub_timeout(path); + + if (t) { + /* Jiffies wrap around check */ + if (jiffies < j) { + *timeout = HZ; + _set_io_jiffies(path); + return 1; + } + + j += t; + if (jiffies < j) + j -= jiffies; + else { + j = t; + ret = 1; + } + + if (*timeout > j) + *timeout = (long) j; + } + + return ret; +} + +/* Push a job onto the tail of a job queue */ +static inline void push(struct list_head *joblist, + struct list_head *job, + spinlock_t *lock) +{ + unsigned long flags; + + spin_lock_irqsave(lock, flags); + list_add_tail(job, joblist); + spin_unlock_irqrestore(lock, flags); +} + +/* Pop an IO job off a job queue */ +static inline struct multipath_io *pop(struct list_head *jobs, + spinlock_t *lock) +{ + unsigned long flags; + struct multipath_io *io; + + spin_lock_irqsave(lock, flags); + if (list_empty(jobs)) + io = NULL; + else { + io = list_entry(jobs->next, struct multipath_io, list); + list_del(jobs->next); + } + spin_unlock_irqrestore(lock, flags); + + return io; +} + + +/* + * IO job allocation/deallocation + */ + +/* Slab for the io jobs */ +static kmem_cache_t *_multipath_cache; +mempool_t *_multipath_pool; + +static int ios = 0; +#define DEFAULT_IOS 256 +#define MIN_IOS 16 +#define MAX_IOS 32768 /* maximum on 32 bit hw with mempool_create */ + +static inline struct multipath_io *alloc_io(void) +{ + return mempool_alloc(_multipath_pool, GFP_NOIO); +} + +static inline void free_io(struct multipath_io *io) +{ + mempool_free(io, _multipath_pool); +} + +/* Multipath context allocation */ +static inline struct multipath_c *_alloc_context(void) +{ + struct multipath_c *mc = kmalloc(sizeof(*mc), GFP_KERNEL); + + if (mc) { + memset(mc, 0, sizeof(*mc)); + INIT_LIST_HEAD(&mc->io_jobs); + INIT_LIST_HEAD(&mc->paths); + mc->lock = SPIN_LOCK_UNLOCKED; + atomic_set(&mc->io_count, 0); + atomic_set(&mc->events, 0); + atomic_set(&mc->suspended, 0); + } + + return mc; +} + +/* Path context allocation */ +static inline struct path *_alloc_path(void) +{ + struct path *path = kmalloc(sizeof(*path), GFP_KERNEL); + + if (path) { + memset(path, 0, sizeof(*path)); + atomic_set(&path->fail_total, 0); + } + + return path; +} + +static void _free_context(struct multipath_c *mc) +{ + struct list_head *elem, *tmp; + struct path_selector *ps = &mc->ps; + + if (!mc) + return; + + ps->type->dtr(ps); + dm_put_path_selector(ps->type); + + list_for_each_safe(elem, tmp, &mc->paths) { + struct path *path = list_entry(elem, struct path, list); + + list_del(elem); + if (path->dev) + dm_put_device(mc->ti, path->dev); + + kfree(path); + } + + kfree(mc); +} + +/******************************************************************** + * + * Daemon (FIXME: use dm-daemon.c) + */ + +static LIST_HEAD(_mc_jobs); +static spinlock_t _job_lock = SPIN_LOCK_UNLOCKED; +static DECLARE_WAIT_QUEUE_HEAD(_multipathd_queue); + +/* Wake daemon up */ +static inline void wake_multipathd(void) +{ + wake_up_interruptible(&_multipathd_queue); +} + +/* Submit an IO and store the IO timestamp */ +static inline void _make_request(struct multipath_io *io) +{ + _set_io_jiffies(io->path); + generic_make_request(io->rw, io->bh); +} + +/* Requeue error ios */ +static inline void _do_ios(void) +{ + unsigned long flags; + struct multipath_c *mc; + struct multipath_io *io; + + spin_lock_irqsave(&_job_lock, flags); + list_for_each_entry(mc, &_mc_jobs, list) { + while ((io = pop(&mc->io_jobs, &mc->lock))) + _make_request(io); + } + spin_unlock_irqrestore(&_job_lock, flags); + + run_task_queue(&tq_disk); +} + +/* Work all table events thrown */ +static inline void _do_table_events(void) +{ + unsigned long flags; + struct multipath_c *mc; + + /* FIXME: optimize this in case no events need to be thrown + (which is most of the time) */ + spin_lock_irqsave(&_job_lock, flags); + list_for_each_entry(mc, &_mc_jobs, list) { + /* Throw all events queued */ + while (atomic_read(&mc->events)) { + dm_table_event(mc->ti->table); + atomic_dec(&mc->events); + } + } + spin_unlock_irqrestore(&_job_lock, flags); +} + +/* Allocate a scrubing IO buffer_head and page */ +static inline struct buffer_head *_alloc_scrub_bh(void) +{ + struct buffer_head *bh = kmalloc(sizeof(*bh), GFP_NOIO); + + if (bh) { + memset(bh, 0, sizeof(*bh)); + + /* Well, might be (a little) too large but it's easy */ + bh->b_page = alloc_page(GFP_NOIO); + if (!bh->b_page) { + kfree(bh); + return NULL; + } + + LockPage(bh->b_page); + set_bit(BH_Lock, &bh->b_state); + set_bit(BH_Mapped, &bh->b_state); + bh->b_data = page_address(bh->b_page); + bh->b_size = PAGE_SIZE; + } + + return bh; +} + +/* Free a scrubing IO page and buffer_head */ +static inline void _free_scrub_bh(struct buffer_head *bh) +{ + UnlockPage(bh->b_page); + __free_page(bh->b_page); + kfree(bh); +} + +/* Scrubbing end IO function */ +static void multipath_scrub_end_io(struct buffer_head *bh, int uptodate) +{ + struct multipath_io *io = (struct multipath_io *) bh->b_private; + struct multipath_c *mc = (struct multipath_c *) io->mc; + + if (uptodate) { + unsigned long flags; + + spin_lock_irqsave(&mc->lock, flags); + _reset_failed(io); + spin_unlock_irqrestore(&mc->lock, flags); + + wake_multipathd(); + } + + _reset_scrub_io(io->path); + _free_scrub_bh(io->bh); + free_io(io); +} + +/* + * Queue a test read IO to a path (path scrubbing) + * + * Returns + * + * 0: scrub IO already in progress or error (retry later) + * 1: scrub IO queued + * + */ +static inline int _queue_scrub_io(struct path *path) +{ + struct multipath_io *io; + struct buffer_head *bh; + + if (test_and_set_bit(SCRUB_IO, &path->flags)) + goto out; + + bh = _alloc_scrub_bh(); + if (!bh) + goto retry; /* just retry later */ + + /* Setup io */ + io = alloc_io(); + + io->mc = path->mc; + io->path = path; + io->bh = bh; + io->rw = READ; + + /* no need to set b_dev, b_blocknr, b_count + or initialize the wait queue here */ + bh->b_rdev = path->dev->dev; + bh->b_rsector = path->test_sector; + bh->b_end_io = multipath_scrub_end_io; + bh->b_private = io; + + _make_request(io); + run_task_queue(&tq_disk); + + return 1; + +retry: + _reset_scrub_io(path); + +out: + return 0; +} + +/* Check if paths need to get a test io queued either for + automatic failure recovery or scrubbing of idle paths */ +static inline long _do_scrubbing(void) +{ + unsigned long flags; + long timeout = MAX_SCHEDULE_TIMEOUT; + struct multipath_c *mc; + + /* FIXME: optimize this in case no scrubbing is needed */ + spin_lock_irqsave(&_job_lock, flags); + list_for_each_entry(mc, &_mc_jobs, list) { + struct path *path; + + /* Don't scrub suspended mcs */ + if (atomic_read(&mc->suspended)) + continue; + + list_for_each_entry(path, &mc->paths, list) { + if (_scrub_timeout(path, &timeout)) + _queue_scrub_io(path); + } + } + spin_unlock_irqrestore(&_job_lock, flags); + + return timeout; +} + +/* Multipathd does this every time it runs */ +static inline long _do_work(void) +{ + _do_ios(); + _do_table_events(); + return _do_scrubbing(); +} + +static DECLARE_MUTEX(_start_lock); +static DECLARE_MUTEX(_run_lock); +static unsigned long _multipathd_flags = 0; +#define RUN 1 + +/* The multipath daemon core */ +static int multipathd(void *arg) +{ + DECLARE_WAITQUEUE(wq, current); + + set_current_state(TASK_RUNNING); + daemonize(); + strcpy(current->comm, "multipathd"); + + add_wait_queue(&_multipathd_queue, &wq); + + down(&_run_lock); + up(&_start_lock); + + while (test_bit(RUN, &_multipathd_flags)) { + long timeout = _do_work(); + + if (timeout) + interruptible_sleep_on_timeout(&_multipathd_queue, + timeout); + } + + remove_wait_queue(&_multipathd_queue, &wq); + + up(&_run_lock); + + return 0; +} + +static inline int start_daemon(void) +{ + static pid_t pid = 0; + + down(&_start_lock); + + set_bit(RUN, &_multipathd_flags); + pid = kernel_thread(multipathd, NULL, 0); + if (pid <= 0) { + DMERR("Failed to start multipathd thread"); + return -EAGAIN; + } + + /* Wait for the daemon to up this mutex */ + down(&_start_lock); + up(&_start_lock); + + return 0; +} + +static inline void stop_daemon(void) +{ + clear_bit(RUN, &_multipathd_flags); + wake_multipathd(); + + /* Wait for thread exit (Don't need to up mutex. We exit anyway) */ + down(&_run_lock); +} +/* End daemon code */ + + +#define ARG_FORMAT "%d" + +/* range checks for target definition in _get_path() */ +#define PARM_MIN 0 /* mininum parameters */ +#define PARM_MAX 1024 /* maximum " */ + +#define PATH_PARM_MIN 2 /* min path parameters */ +#define PATH_PARM_MAX 2 /* max " */ + +#define SCRUB_MIN 1 /* min scrubbing interval in seconds */ +#define SCRUB_MAX 24*60*60 /* max " */ + +/* Path flags */ +#define PATHS_MIN 2 /* min number of paths */ +#define PATHS_MAX 1024 /* max " */ + +#define xx(av, a, s, c, v) \ + if (sscanf(av[a], ARG_FORMAT, &tmp) != 1 || \ + tmp < c ## _MIN || \ + tmp > c ## _MAX) { \ + _free_context(mc); \ + ti->error = "dm-multipath: Invalid " s; \ + return -EINVAL; \ + } \ + v = tmp; + +/* + * Parse a + * + * + * + * [ + * + * {2,num_paths}] + * + * parameter set and construct a multipath context + * + */ +#define MIN_PARMS 5 +static int multipath_ctr(struct dm_target *ti, unsigned int argc, + char **argv) +{ + int a, parms, paths, path_parms, scrub_interval, ps_parms, tmp; + char **av; + struct multipath_c *mc = NULL; + struct path_selector_type *pst; + struct path *path; + + if (argc < MIN_PARMS) /* Check minimum argument count */ + goto bad_parms; + + xx(argv, 0, "number of paths", PATHS, paths); + if (paths < 2) + goto bad_paths; + xx(argv, 1, "number of path parameters", PATH_PARM, path_parms); + xx(argv, 2, "path scrubbing interval", SCRUB, scrub_interval); + xx(argv, 4, "path selector parameters", PARM, ps_parms); + + parms = path_parms + ps_parms; + if (MIN_PARMS + paths * parms != argc) + goto bad_parms; + + mc = _alloc_context(); + if (!mc) + goto bad_context; + + pst = dm_get_path_selector(argv[3]); + if (!pst) + goto bad_ps; + + if (pst->ctr(&mc->ps, 0, NULL, &ti->error)) + goto bad_ps_ctr; + + mc->scrub_interval = scrub_interval; + + /* Loop through all paths parsing their parameters */ + av = &argv[MIN_PARMS]; + for (a = MIN_PARMS; a < argc; a += parms, av += parms) { + void *path_c; + + path = _alloc_path(); + if (!path) + goto bad_alloc_path; + + /* Add path to the list first, so that _free_context() + is able to free it on error */ + list_add_tail(&path->list, &mc->paths); + + xx(av, 1, "path reactivation interval", PARM, + path->reactivation_interval); + xx(av, 2, "maximum path failures", PARM, path->fail_max); + + if (dm_get_device(ti, av[0], ti->begin, ti->len, + dm_table_get_mode(ti->table), &path->dev)) + goto bad_dm_get_device; + + path_c = mc->ps.type->add_path(&mc->ps, path, + ps_parms, &av[3], &ti->error); + if (!path_c) + goto bad_ps_add; + + path->ps_private = path_c; + path->mc = mc; + _reset_failures(path); + } + + ti->private = mc; + ti->error = NULL; + mc->ti = ti; + push(&_mc_jobs, &mc->list, &_job_lock); + + return 0; + +bad_parms: + ti->error = "dm-multipath: not enough arguments"; + return -EINVAL; + +bad_paths: + ti->error = "dm-multipath: not enough paths"; + return -EINVAL; + +bad_context: + ti->error = "dm-multipath: can't allocate multipath context"; + return -ENOMEM; + +bad_ps: + _free_context(mc); + ti->error = "dm-multipath: invalid path selector"; + return -EINVAL; + +bad_ps_ctr: + _free_context(mc); + ti->error = "dm-multipath: error path selector constructor"; + return -ENXIO; + +bad_alloc_path: + _free_context(mc); + ti->error = "dm-multipath: can't allocate path context"; + return -ENOMEM; + +bad_dm_get_device: + _free_context(mc); + ti->error = "dm-multipath: error getting device"; + return -ENXIO; + +bad_ps_add: + _free_context(mc); + ti->error = "dm-multipath: error add path"; + return -ENXIO; +} +#undef xx + +static void _wait_for_scrub_ios(struct multipath_c *mc) +{ + struct path *path; + + list_for_each_entry(path, &mc->paths, list) { + while (test_bit(SCRUB_IO, &path->flags)) + schedule_timeout(HZ / 2); + } +} + +static inline void _remove_mc_job(struct multipath_c *mc) +{ + unsigned long flags; + struct multipath_c *mc_tmp; + + spin_lock_irqsave(&_job_lock, flags); + list_for_each_entry(mc_tmp, &_mc_jobs, list) { + if (mc == mc_tmp) { + list_del(&mc->list); + break; + } + } + spin_unlock_irqrestore(&_job_lock, flags); +} + +/* Destruct a multipath mapping */ +static void multipath_dtr(struct dm_target *ti) +{ + struct multipath_c *mc = (struct multipath_c *) ti->private; + + _wait_for_scrub_ios(mc); + _remove_mc_job(mc); + _free_context(mc); +} + +static inline void _map(struct multipath_io *io, struct path *path) +{ + io->path = path; + io->bh->b_rdev = path->dev->dev; + _set_io_jiffies(path); +} + +static int multipath_end_io(struct dm_target *ti, struct buffer_head *bh, + int rw, int error, union map_info *map_context) +{ + int r = 0; + struct multipath_io *io = (struct multipath_io *) map_context->ptr; + struct multipath_c *mc = (struct multipath_c *) io->mc; + struct path_selector *ps = &mc->ps; + struct path *path = io->path; + struct path_selector_type *pst = ps->type; + ps_endio_fn ps_endio = pst->endio; + + if (error) { + if (atomic_dec_and_test(&path->fail)) + _set_failed(io); + + path = pst->select_path(ps, io->bh, io->rw, &io->path_context); + if (path) { + /* Map the IO to this new path */ + _map(io, path); + push(&mc->io_jobs, &io->list, &mc->lock); + wake_multipathd(); + + return 1; /* Handle later */ + } + } + + /* Call path selector end IO method if registered */ + if (ps_endio) + ps_endio(ps, io->bh, io->rw, error, &io->path_context); + + free_io(io); + + return r; +} + +/* Suspend */ +static void multipath_suspend(struct dm_target *ti) +{ + struct multipath_c *mc = (struct multipath_c *) ti->private; + + atomic_set(&mc->suspended, 1); + _wait_for_scrub_ios(mc); +} + +/* Resume */ +static void multipath_resume(struct dm_target *ti) +{ + struct multipath_c *mc = (struct multipath_c *) ti->private; + + atomic_set(&mc->suspended, 0); + wake_multipathd(); +} + +/* Multipath mapping */ +static int multipath_map(struct dm_target *ti, struct buffer_head *bh, + int rw, union map_info *map_context) +{ + struct multipath_c *mc = (struct multipath_c *) ti->private; + struct path_selector *ps = &mc->ps; + struct multipath_io *io = alloc_io(); + struct path *path; + + /* Ask path selector for a path */ + path = ps->type->select_path(ps, bh, rw, &io->path_context); + if (!path) { /* No valid path found */ + free_io(io); + return -1; /* Error */ + } + + io->mc = mc; + io->bh = bh; + io->rw = rw; + + _map(io, path); /* Map the IO to this path */ + map_context->ptr = (void *) io; /* Save for multipath_end_io() */ + + return 1; /* Normal map */ +} + +/* Multipath status */ +static int multipath_status(struct dm_target *ti, status_type_t type, + char *result, unsigned int maxlen) +{ + int sz = 0; + struct multipath_c *mc = (struct multipath_c *) ti->private; + struct path *path; + ps_status_fn ps_status = mc->ps.type->status; + + switch (type) { + case STATUSTYPE_INFO: + list_for_each_entry(path, &mc->paths, list) { + sz += snprintf(result + sz, maxlen - sz, "%s ", + dm_kdevname(to_kdev_t(path->dev->dev))); + if (_is_failed(path)) { + struct timespec fail; + + jiffies_to_timespec(jiffies - path->io_jiffies, &fail); + sz += snprintf(result + sz, maxlen - sz, "I(%lu/" ARG_FORMAT ")", (unsigned long) fail.tv_sec, path->reactivation_interval); + } else { + sz += snprintf(result + sz, maxlen - sz, "O"); + + if (atomic_read(&path->fail_total)) + sz += snprintf(result + sz, maxlen - sz, "[" ARG_FORMAT "]", atomic_read(&path->fail_total)); + + sz += snprintf(result + sz, maxlen - sz, " "); + } + + if (ps_status) { + ps_status(path->ps_private, type, + result + sz, maxlen - sz); + sz += strlen(result); + } + } + break; + + case STATUSTYPE_TABLE: + list_for_each_entry(path, &mc->paths, list) { + sz += snprintf(result + sz, maxlen - sz, + "%s " ARG_FORMAT ARG_FORMAT " ", + dm_kdevname(to_kdev_t(path->dev->dev)), + path->reactivation_interval, + path->fail_max); + + if (ps_status) { + ps_status(path->ps_private, type, + result + sz, maxlen - sz); + sz += strlen(result); + } + + if (sz >= maxlen) + break; + } + break; + + } + + return 0; +} + +static struct target_type multipath_target = { + .name = "multipath", + .module = THIS_MODULE, + .ctr = multipath_ctr, + .dtr = multipath_dtr, + .map = multipath_map, + .end_io = multipath_end_io, + .suspend = multipath_suspend, + .resume = multipath_resume, + .status = multipath_status, +}; + +int __init dm_multipath_init(void) +{ + int r = -EINVAL; + + if (!ios) + ios = DEFAULT_IOS; + else if (ios < MIN_IOS || ios > MAX_IOS) + goto bad; + + r = -ENOMEM; + + /* Create multipath io slab */ + _multipath_cache = kmem_cache_create("dm multipath io", + sizeof(struct multipath_io), + 0, 0, NULL, NULL); + if (!_multipath_cache) + goto bad; + + /* Create multipath io mempool */ + _multipath_pool = mempool_create(ios, mempool_alloc_slab, + mempool_free_slab, + _multipath_cache); + if (!_multipath_pool) + goto bad_pool; + + r = dm_register_target(&multipath_target); + if (r < 0) { + DMERR("%s: register failed %d", multipath_target.name, r); + goto bad_target; + } + + r = dm_register_path_selectors(); + if (r && r != -EEXIST) + goto bad_ps; + + r = start_daemon(); + if (!r) { + DMINFO("dm_multipath v0.2.0 (%d io contexts preallocated)", + ios); + return 0; + } + +bad_ps: + dm_unregister_target(&multipath_target); + +bad_target: + mempool_destroy(_multipath_pool); + +bad_pool: + kmem_cache_destroy(_multipath_cache); + +bad: + return r; +} + +void __exit dm_multipath_exit(void) +{ + int r; + + stop_daemon(); + dm_unregister_path_selectors(); + r = dm_unregister_target(&multipath_target); + if (r < 0) + DMERR("%s: target unregister failed %d", + multipath_target.name, r); + + mempool_destroy(_multipath_pool); + kmem_cache_destroy(_multipath_cache); +} + +/* Module hooks */ +module_init(dm_multipath_init); +module_exit(dm_multipath_exit); + +MODULE_DESCRIPTION(DM_NAME " multipath target"); +MODULE_AUTHOR("Heinz Mauelshagen "); +MODULE_LICENSE("GPL"); +MODULE_PARM(ios, "i"); +MODULE_PARM_DESC(ios, "number of preallocated io contexts"); --- diff/drivers/md/dm-null-ps.c 1970-01-01 01:00:00.000000000 +0100 +++ source/drivers/md/dm-null-ps.c 2003-11-26 10:20:34.000000000 +0000 @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2003 Sistina Software. + * + * Module Author: Heinz Mauelshagen + * + * This file is released under the GPL. + * + * + * "Null" Path Selector + * + * Returns any path unless failed. + * + */ + +#include "dm.h" +#include "dm-path-selector.h" +#include + +/* Path selector context */ +struct null_c { + struct list_head paths; /* List of operational paths */ + struct list_head failed_paths; /* List of failed paths */ + + spinlock_t lock; +}; + +/* Path info */ +struct path_c { + struct list_head list; /* Linked list to null_c */ + + struct path *path; /* Opaque pointer to caller path info */ + struct null_c *nc; /* Back pointer path selector context */ +}; + +/* Allocate null context */ +static struct null_c *_alloc_null_c(void) +{ + struct null_c *nc = kmalloc(sizeof(*nc), GFP_KERNEL); + + if (nc) { + INIT_LIST_HEAD(&nc->paths); + INIT_LIST_HEAD(&nc->failed_paths); + nc->lock = SPIN_LOCK_UNLOCKED; + } + + return nc; +} + +/* Allocate path context */ +static struct path_c *_alloc_path_c(void) +{ + struct path_c *pc = kmalloc(sizeof(*pc), GFP_KERNEL); + + if (pc) + memset(pc, 0, sizeof(*pc)); + + return pc; +} + + +/* Path selector constructor */ +static int null_ctr(struct path_selector *ps, + int argc, char **argv, char **error) +{ + struct null_c *nc; + + if (argc) { + *error = "null path selector: No arguments allowed"; + return -EINVAL; + } + + nc = _alloc_null_c(); + if (!nc) { + *error = "null path selector: Error allocating context"; + return -ENOMEM; + } + + ps->context = (void *) nc; + + return 0; +} + +/* Path selector destructor */ +static void null_dtr(struct path_selector *ps) +{ + struct null_c *nc = (struct null_c *) ps->context; + struct list_head *lists[] = { + &nc->paths, + &nc->failed_paths, + }; + int i = ARRAY_SIZE(lists); + + spin_lock(&nc->lock); + while (i--) { + struct list_head *elem, *tmp; + + list_for_each_safe(elem, tmp, lists[i]) { + struct path_c *path = + list_entry(elem, struct path_c, list); + + list_del(elem); + kfree(path); + } + } + spin_unlock(&nc->lock); + + kfree(nc); + ps->context = NULL; +} + +/* Path add context */ +static void *null_add_path(struct path_selector *ps, struct path *path, + int argc, char **argv, char **error) +{ + struct null_c *nc = (struct null_c *) ps->context; + struct path_c *pc; + + if (argc) { + *error = "null path selector: No path arguments allowd"; + return NULL; + } + + pc = _alloc_path_c(); + if (!pc) { + *error = "null path selector: Error allocating path context"; + return NULL; + } + + pc->path = path; + pc->nc = nc; + spin_lock(&nc->lock); + list_add_tail(&pc->list, &nc->paths); + spin_unlock(&nc->lock); + + return (void *) pc; +} +#undef xx + +/* Path set state (state = 0 : operational; state != 0 : failed */ +static void null_set_path_state(void *ps_private, unsigned long state) +{ + unsigned long flags; + struct path_c *path = (struct path_c *) ps_private; + struct null_c *nc = path->nc; + + spin_lock_irqsave(&nc->lock, flags); + list_move_tail(&path->list, state ? &nc->failed_paths : &nc->paths); + spin_unlock_irqrestore(&nc->lock, flags); +} + +/* Path selector */ +static struct path *null_select_path(struct path_selector *ps, + struct buffer_head *bh, int rw, + struct path_info *path_context) +{ + unsigned long flags; + struct null_c *nc = (struct null_c *) ps->context; + struct list_head *list = &nc->paths; + struct path_c *path = NULL; + + spin_lock_irqsave(&nc->lock, flags); + if (!list_empty(list)) + path = list_entry(list->next, struct path_c, list); + spin_unlock_irqrestore(&nc->lock, flags); + + /* Return opaque pointer to caller path object or NULL */ + return path ? path->path : NULL; +} + +static struct path_selector_type null_ps = { + .name = "null", + .ctr = null_ctr, + .dtr = null_dtr, + .add_path = null_add_path, + .set_path_state = null_set_path_state, + .select_path = null_select_path, + .endio = NULL, + .status = NULL, +}; + +int dm_register_null_ps(void) { + return dm_register_path_selector(&null_ps); +} + +void dm_unregister_null_ps(void) { + dm_unregister_path_selector(&null_ps); +} --- diff/drivers/md/dm-path-selector.c 1970-01-01 01:00:00.000000000 +0100 +++ source/drivers/md/dm-path-selector.c 2003-11-26 10:20:34.000000000 +0000 @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2003 Sistina Software. + * + * Module Author: Heinz Mauelshagen + * + * This file is released under the GPL. + * + * Path selector housekeeping (register/unregister/...) + * + */ + +#include "dm.h" +#include "dm-path-selector.h" + +#include + +struct ps_internal { + struct path_selector_type pt; + + struct list_head list; + long use; +}; + +static LIST_HEAD(_path_selectors); +static DECLARE_MUTEX(_lock); + +struct path_selector_type *__find_path_selector_type(const char *name) +{ + struct ps_internal *li; + + list_for_each_entry(li, &_path_selectors, list) { + if (!strcmp(name, li->pt.name)) + return &li->pt; + } + + return NULL; +} + +struct path_selector_type *dm_get_path_selector(const char *name) +{ + struct path_selector_type *lb; + + down(&_lock); + lb = __find_path_selector_type(name); + if (lb) { + struct ps_internal *li = (struct ps_internal *) lb; + li->use++; + } + up(&_lock); + + return lb; +} + +void dm_put_path_selector(struct path_selector_type *l) +{ + struct ps_internal *li = (struct ps_internal *) l; + + down(&_lock); + if (--li->use < 0) + BUG(); + up(&_lock); + + return; +} + +static struct ps_internal *_alloc_path_selector(struct path_selector_type *pt) +{ + struct ps_internal *psi = kmalloc(sizeof(*psi), GFP_KERNEL); + + if (psi) { + memset(psi, 0, sizeof(*psi)); + memcpy(psi, pt, sizeof(*pt)); + } + + return psi; +} + +int dm_register_path_selector(struct path_selector_type *pst) +{ + int r = 0; + struct ps_internal *psi = _alloc_path_selector(pst); + + if (!psi) + return -ENOMEM; + + down(&_lock); + if (__find_path_selector_type(pst->name)) { + kfree(psi); + r = -EEXIST; + } else + list_add(&psi->list, &_path_selectors); + + up(&_lock); + + return r; +} + +int dm_unregister_path_selector(struct path_selector_type *pst) +{ + struct ps_internal *psi; + + down(&_lock); + psi = (struct ps_internal *) __find_path_selector_type(pst->name); + if (!psi) { + up(&_lock); + return -EINVAL; + } + + if (psi->use) { + up(&_lock); + return -ETXTBSY; + } + + list_del(&psi->list); + up(&_lock); + + kfree(psi); + + return 0; +} + +/* + * (Un)register all path selectors (FIXME: remove this after tests) + */ +void dm_unregister_path_selectors(void) +{ + dm_unregister_null_ps(); + dm_unregister_latency_ps(); +} + +int dm_register_path_selectors(void) +{ + int r; + + r = dm_register_null_ps(); + if (!r || r == -EEXIST) + dm_register_latency_ps(); + + return r; +} --- diff/drivers/md/dm-path-selector.h 1970-01-01 01:00:00.000000000 +0100 +++ source/drivers/md/dm-path-selector.h 2003-11-26 10:20:34.000000000 +0000 @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2003 Sistina Software. + * + * Module Author: Heinz Mauelshagen + * + * This file is released under the GPL. + * + * Path-Selector interface/registration/unregistration definitions + * + */ + +#ifndef DM_PATH_SELECTOR_H +#define DM_PATH_SELECTOR_H + +struct path; +struct path_info { + void *ptr; + unsigned long long ll; +}; + +struct path_selector_type; +struct path_selector { + struct path_selector_type *type; + void *context; +}; + +/* + * Constructs a path selector object, takes custom arguments + */ +typedef int (*ps_ctr_fn) (struct path_selector *ps, + int argc, char **argv, + char **error); +typedef void (*ps_dtr_fn) (struct path_selector *ps); + +/* + * Add an opaque path object, along with some selector specific + * path args (eg, path priority). + */ +typedef void * (*ps_add_path_fn) (struct path_selector *ps, + struct path *path, + int argc, char **argv, char **error); + +/* + * Chooses a path for this io, if no paths are available then + * NULL will be returned. Can take path_info over to ps_endio_fn below. + * + * Must ensure that _any_ dynamically allocated selection context is + * reused or reallocated because an endio call (which needs to free it) + * might happen after a couple of select calls. + */ +typedef struct path * (*ps_select_path_fn) (struct path_selector *ps, + struct bio *bio, + struct path_info *path_context); + +/* + * Hook the end of the io, path throughput/failure can be + * detected through this. Must ensure, that any dynamically allocted + * IO context gets freed. + */ +typedef void (*ps_endio_fn) (struct path_selector *ps, + struct bio *bio, int error, + struct path_info *path_context); + +/* + * Set path state (eg, failed/operational) + */ +typedef void (*ps_set_path_state_fn) (void *context, + unsigned long state); + +/* + * Table content based on parameters added in ps_add_path_fn + * or path selector status + */ +typedef int (*ps_status_fn) (void *context, + status_type_t type, + char *result, unsigned int maxlen); + +/* Information about a path selector type */ +struct path_selector_type { + char *name; + ps_ctr_fn ctr; + ps_dtr_fn dtr; + + ps_add_path_fn add_path; + ps_set_path_state_fn set_path_state; + + ps_select_path_fn select_path; + ps_endio_fn endio; + + ps_status_fn status; +}; + +/* Register a path selector */ +int dm_register_path_selector(struct path_selector_type *type); + +/* Unregister a path selector */ +int dm_unregister_path_selector(struct path_selector_type *type); + +/* Returns a registered path selector type */ +struct path_selector_type *dm_get_path_selector(const char *name); + +/* Releases a path selector */ +void dm_put_path_selector(struct path_selector_type *pst); + +/* FIXME: remove these 6 after tests */ +int dm_register_path_selectors(void); +void dm_unregister_path_selectors(void); +int dm_register_null_ps(void); +void dm_unregister_null_ps(void); +int dm_register_latency_ps(void); +void dm_unregister_latency_ps(void); + +#endif