Index: linux-2.6.16-rc5/drivers/md/dm-mpath.c =================================================================== --- linux-2.6.16-rc5.orig/drivers/md/dm-mpath.c 2006-03-12 21:59:27.000000000 +0000 +++ linux-2.6.16-rc5/drivers/md/dm-mpath.c 2006-03-13 11:48:25.000000000 +0000 @@ -52,6 +52,12 @@ struct priority_group { struct list_head pgpaths; }; +enum pg_timeout_state { + PG_TIMEOUT_NONE = 0, + PG_TIMEOUT_IN_PROGRESS, + PG_TIMEOUT_FAILED +}; + /* Multipath context */ struct multipath { struct list_head list; @@ -75,6 +81,10 @@ struct multipath { unsigned queue_if_no_path; /* Queue I/O if last path fails? */ unsigned saved_queue_if_no_path;/* Saved state during suspension */ + unsigned pg_timeout; /* time to wait before switching PGs */ + enum pg_timeout_state pg_timeout_status; + struct timer_list pg_timer; + struct work_struct process_queued_ios; struct bio_list queued_ios; unsigned queue_size; @@ -105,6 +115,7 @@ static kmem_cache_t *_mpio_cache; struct workqueue_struct *kmultipathd; static void process_queued_ios(void *data); static void trigger_event(void *data); +static void pg_timeout_event(unsigned long data); /*----------------------------------------------- @@ -177,6 +188,9 @@ static struct multipath *alloc_multipath INIT_LIST_HEAD(&m->priority_groups); spin_lock_init(&m->lock); m->queue_io = 1; + init_timer(&m->pg_timer); + m->pg_timer.function = pg_timeout_event; + m->pg_timer.data = (unsigned long)m; INIT_WORK(&m->process_queued_ios, process_queued_ios, m); INIT_WORK(&m->trigger_event, trigger_event, m); m->mpio_pool = mempool_create(MIN_IOS, mempool_alloc_slab, @@ -214,6 +228,25 @@ static void free_multipath(struct multip * Path selection *-----------------------------------------------*/ +static void pg_timeout_event(unsigned long data) +{ + unsigned long flags; + struct multipath *m = (struct multipath *)data; + + spin_lock_irqsave(&m->lock, flags); + if (m->pg_timeout_status == PG_TIMEOUT_NONE) + goto out; + m->pg_timeout_status = PG_TIMEOUT_FAILED; + if (!m->pg_init_required && !m->pg_init_in_progress){ + m->queue_io = 0; + if (m->queue_size) + queue_work(kmultipathd, &m->process_queued_ios); + } + +out: + spin_unlock_irqrestore(&m->lock, flags); +} + static void __switch_pg(struct multipath *m, struct pgpath *pgpath) { struct hw_handler *hwh = &m->hw_handler; @@ -259,12 +292,26 @@ static void __choose_pgpath(struct multi pg = m->next_pg; m->next_pg = NULL; if (!__choose_path_in_pg(m, pg)) - return; + goto out; } /* Don't change PG until it has no remaining paths */ - if (m->current_pg && !__choose_path_in_pg(m, m->current_pg)) - return; + if (m->current_pg) { + if (!__choose_path_in_pg(m, m->current_pg)) + goto out; + if (m->pg_timeout) { + if (m->pg_timeout_status == PG_TIMEOUT_IN_PROGRESS) + return; + if (m->pg_timeout_status == PG_TIMEOUT_NONE) { + m->queue_io = 1; + m->current_pgpath = NULL; + m->pg_timeout_status = PG_TIMEOUT_IN_PROGRESS; + mod_timer(&m->pg_timer, + jiffies + m->pg_timeout * HZ); + return; + } + } + } /* * Loop through priority groups until we find a valid path. @@ -276,13 +323,20 @@ static void __choose_pgpath(struct multi if (pg->bypassed == bypassed) continue; if (!__choose_path_in_pg(m, pg)) - return; + goto out; } } while (bypassed--); failed: m->current_pgpath = NULL; m->current_pg = NULL; + return; + +out: + if (m->pg_timeout_status != PG_TIMEOUT_NONE) { + del_timer(&m->pg_timer); + m->pg_timeout_status = PG_TIMEOUT_NONE; + } } static int map_io(struct multipath *m, struct bio *bio, struct mpath_io *mpio, @@ -305,6 +359,7 @@ static int map_io(struct multipath *m, s m->queue_size--; if ((pgpath && m->queue_io) || + (m->pg_timeout_status == PG_TIMEOUT_IN_PROGRESS) || (!pgpath && m->queue_if_no_path)) { /* Queue for the daemon to resubmit */ bio_list_add(&m->queued_ios, bio); @@ -401,7 +456,8 @@ static void process_queued_ios(void *dat pgpath = m->current_pgpath; if ((pgpath && !m->queue_io) || - (!pgpath && !m->queue_if_no_path)) + (!pgpath && !m->queue_if_no_path && + (m->pg_timeout_status != PG_TIMEOUT_IN_PROGRESS))) must_queue = 0; if (m->pg_init_required && !m->pg_init_in_progress) { @@ -855,7 +911,9 @@ static int reinstate_path(struct pgpath pgpath->path.is_active = 1; m->current_pgpath = NULL; - if (!m->nr_valid_paths++ && m->queue_size) + if ((!m->nr_valid_paths++ || + (m->pg_timeout_status == PG_TIMEOUT_IN_PROGRESS)) && + m->queue_size) queue_work(kmultipathd, &m->process_queued_ios); queue_work(kmultipathd, &m->trigger_event); @@ -930,6 +988,8 @@ static int switch_pg_num(struct multipat m->current_pg = NULL; m->next_pg = pg; } + if ((m->pg_timeout_status == PG_TIMEOUT_IN_PROGRESS) && m->queue_size) + queue_work(kmultipathd, &m->process_queued_ios); spin_unlock_irqrestore(&m->lock, flags); queue_work(kmultipathd, &m->trigger_event); @@ -960,6 +1020,35 @@ static int bypass_pg_num(struct multipat return 0; } +static int set_pg_timeout(struct multipath *m, const char *timeoutstr) +{ + unsigned timeout; + unsigned long flags; + + if (!timeoutstr || (sscanf(timeoutstr, "%u", &timeout) != 1)) { + DMWARN("invalid timeout number supplied to set_pg_timeout"); + return -EINVAL; + } + spin_lock_irqsave(&m->lock, flags); + m->pg_timeout = timeout; + + if (!timeout) { + enum pg_timeout_state status = m->pg_timeout_status; + m->pg_timeout_status = PG_TIMEOUT_NONE; + if (status == PG_TIMEOUT_IN_PROGRESS) { + del_timer(&m->pg_timer); + if (!m->pg_init_required && !m->pg_init_in_progress){ + m->queue_io = 0; + if (m->queue_size) + queue_work(kmultipathd, + &m->process_queued_ios); + } + } + } + spin_unlock_irqrestore(&m->lock, flags); + return 0; +} + /* * pg_init must call this when it has completed its initialisation */ @@ -984,7 +1073,8 @@ void dm_pg_init_complete(struct path *pa if (err_flags) { m->current_pgpath = NULL; m->current_pg = NULL; - } else if (!m->pg_init_required) + } else if (!m->pg_init_required && + (m->pg_timeout_status != PG_TIMEOUT_IN_PROGRESS)) m->queue_io = 0; m->pg_init_in_progress = 0; @@ -1241,6 +1331,8 @@ static int multipath_message(struct dm_t return bypass_pg_num(m, argv[1], 0); else if (!strnicmp(argv[0], MESG_STR("switch_group"))) return switch_pg_num(m, argv[1]); + else if (!strnicmp(argv[0], MESG_STR("set_pg_timeout"))) + return set_pg_timeout(m, argv[1]); else if (!strnicmp(argv[0], MESG_STR("reinstate_path"))) action = reinstate_path; else if (!strnicmp(argv[0], MESG_STR("fail_path")))