Add endio error handler. --- diff/drivers/md/dm-hw-handler.c 2004-10-29 15:37:46.000000000 +0100 +++ source/drivers/md/dm-hw-handler.c 2004-10-29 15:38:04.000000000 +0100 @@ -27,7 +27,7 @@ { struct hwh_internal *hwhi; - list_for_each_entry (hwhi, &_hw_handlers, list) { + list_for_each_entry(hwhi, &_hw_handlers, list) { if (!strcmp(name, hwhi->hwht.name)) return hwhi; } @@ -86,7 +86,7 @@ if (hwhi->use < 0) BUG(); -out: + out: up_read(&_hwh_lock); } @@ -149,5 +149,68 @@ return 0; } +unsigned dm_hw_handler_err(struct hw_handler *hwh, struct bio *bio) +{ + int sense_key, asc, ascq; + +#if 0 + if (bio->bi_error & BIO_SENSE) { + /* FIXME: This is just an initial guess. */ + /* key / asc / ascq */ + sense_key = (bio->bi_error >> 16) & 0xff; + asc = (bio->bi_error >> 8) & 0xff; + ascq = bio->bi_error & 0xff; + + switch (sense_key) { + /* This block as a whole comes from the device. + * So no point retrying on another path. */ + case 0x03: /* Medium error */ + case 0x05: /* Illegal request */ + case 0x07: /* Data protect */ + case 0x08: /* Blank check */ + case 0x0a: /* copy aborted */ + case 0x0c: /* obsolete - no clue ;-) */ + case 0x0d: /* volume overflow */ + case 0x0e: /* data miscompare */ + case 0x0f: /* reserved - no idea either. */ + return 0; + + /* For these errors it's unclear whether they + * come from the device or the controller. + * So just lets try a different path, and if + * it eventually succeeds, user-space will clear + * the paths again... */ + case 0x02: /* Not ready */ + case 0x04: /* Hardware error */ + case 0x09: /* vendor specific */ + case 0x0b: /* Aborted command */ + return MP_FAIL_PATH | MP_RETRY_IO; + + case 0x06: /* Unit attention - might want to decode */ + if (asc == 0x04 && ascq == 0x01) + /* "Unit in the process of + * becoming ready" */ + return MP_RETRY_IO; + return MP_FAIL_PATH | MP_RETRY_IO; + + /* FIXME: For Unit Not Ready we may want + * to have a generic pg activation + * feature (START_UNIT). */ + + /* Should these two ever end up in the + * error path? I don't think so. */ + case 0x00: /* No sense */ + case 0x01: /* Recovered error */ + return MP_RETRY_IO; + } + } +#endif + + /* We got no idea how to decode the other kinds of errors -> + * assume generic error condition. */ + return MP_FAIL_PATH | MP_RETRY_IO; +} + EXPORT_SYMBOL(dm_register_hw_handler); EXPORT_SYMBOL(dm_unregister_hw_handler); +EXPORT_SYMBOL(dm_hw_handler_err); --- diff/drivers/md/dm-hw-handler.h 2004-10-29 15:37:46.000000000 +0100 +++ source/drivers/md/dm-hw-handler.h 2004-10-29 15:38:04.000000000 +0100 @@ -23,6 +23,7 @@ typedef int (*hwh_ctr_fn) (struct hw_handler *hwh, unsigned arc, char **argv); typedef void (*hwh_dtr_fn) (struct hw_handler *hwh); +typedef unsigned (*hwh_err_fn) (struct hw_handler *hwh, struct bio *bio); typedef int (*hwh_status_fn) (struct hw_handler *hwh, status_type_t type, char *result, unsigned int maxlen); @@ -35,6 +36,7 @@ hwh_ctr_fn ctr; hwh_dtr_fn dtr; + hwh_err_fn err; hwh_status_fn status; }; @@ -50,4 +52,12 @@ /* Releases a hardware handler */ void dm_put_hw_handler(struct hw_handler_type *hwht); +/* Default hwh_err_fn */ +unsigned dm_hw_handler_err(struct hw_handler *hwh, struct bio *bio); + +/* hwh_err_fn return flags */ +#define MP_FAIL_PATH 1 +#define MP_BYPASS_PG 2 +#define MP_RETRY_IO 4 + #endif --- diff/drivers/md/dm-mpath.c 2004-10-29 15:37:58.000000000 +0100 +++ source/drivers/md/dm-mpath.c 2004-10-29 15:38:04.000000000 +0100 @@ -634,15 +634,25 @@ return r; } +static void bypass_pg(struct multipath *m, struct priority_group *pg, + int bypass) +{ + unsigned long flags; + + spin_lock_irqsave(&m->lock, flags); + pg->bypass = bypass; + m->current_path = NULL; + spin_unlock_irqrestore(&m->lock, flags); +} + /* * Set/clear bypass status of a PG. * PG numbering goes 1, 2, 3... */ -static int bypass_pg(struct multipath *m, const char *pgstr, int bypass) +static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypass) { struct priority_group *pg; unsigned pgnum; - unsigned long flags; if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum) goto error; @@ -651,10 +661,7 @@ if (--pgnum) continue; - spin_lock_irqsave(&m->lock, flags); - pg->bypass = bypass; - m->current_path = NULL; - spin_unlock_irqrestore(&m->lock, flags); + bypass_pg(m, pg, bypass); return 0; } @@ -668,6 +675,8 @@ int error, struct mpath_io *mpio) { int r; + struct hw_handler *hwh = &m->hw_handler; + unsigned err_flags = MP_FAIL_PATH; /* Default behavior */ if (error) { spin_lock(&m->lock); @@ -677,7 +686,17 @@ } spin_unlock(&m->lock); - fail_path(mpio->path); + if (hwh && hwh->type->err) + err_flags = hwh->type->err(hwh, bio); + + if (err_flags & MP_FAIL_PATH) + fail_path(mpio->path); + + if (err_flags & MP_BYPASS_PG) + bypass_pg(m, mpio->path->pg, 1); + + if (!(err_flags & MP_RETRY_IO)) + return -EIO; /* remap */ dm_bio_restore(&mpio->details, bio); @@ -781,9 +800,9 @@ goto error; if (!strnicmp(argv[0], "disable_group", 14)) - return bypass_pg(m, argv[1], 0); + return bypass_pg_num(m, argv[1], 0); else if (!strnicmp(argv[0], "enable_group", 13)) - return bypass_pg(m, argv[1], 1); + return bypass_pg_num(m, argv[1], 1); else if (!strnicmp(argv[0], "reinstate", 10)) action = reinstate_path; else if (!strnicmp(argv[0], "fail", 5))