Change the target->end_io function to take the rw param. Use target->end_io rather than hooking in dm-iostats.c Instead of taking the average of the individual io latencies calculate the 'average end_io time' - 'average start time'. This removes the need for passing context (ie. start time) between the map function and the end_io function. --- diff/drivers/md/dm-iostats.c 2002-12-16 15:22:03.000000000 +0000 +++ source/drivers/md/dm-iostats.c 2002-12-16 15:32:46.000000000 +0000 @@ -12,65 +12,33 @@ #include #include #include +#include -static kmem_cache_t *_iostats_cache; -mempool_t *_iostats_pool; typedef unsigned long jiffies_t; - -struct dm_iostats { - struct dm_target *ti; - - int rw; - jiffies_t start_io; - - void (*end_io) (struct buffer_head * bh, int uptodate); - void *context; -}; - -/* - * io status for a linear range of a device. - */ -typedef uint16_t count_t; -#define MAX_COUNT ((count_t) -1) -#define MIN_IOSTATS 128 #define IOF_LATENCY 1 struct iostats_c { unsigned long flags; struct dm_dev *dev; - atomic_t reads; - atomic_t writes; + atomic_t ios[2]; /* * These fields are only present if we are recording the - * average completion times. + * io latency. */ spinlock_t lock; - jiffies_t total_r; - jiffies_t total_w; - count_t count_r; - count_t count_w; + struct timespec start[2]; + struct timespec end[2]; }; - -static inline struct dm_iostats *alloc_io(void) -{ - return mempool_alloc(_iostats_pool, GFP_NOIO); -} - -static inline void free_io(struct dm_iostats *io) -{ - mempool_free(io, _iostats_pool); -} - /* - * Construct a io status mapping: + * Construct a io status mapping: */ static int iostats_ctr(struct dm_target *ti, int argc, char **argv) { size_t size; - int averaging; + int latency; struct iostats_c *ic; if (argc != 2) { @@ -83,9 +51,9 @@ return -EINVAL; } - averaging = (toupper(argv[1][0]) == 'Y'); + latency = (toupper(argv[1][0]) == 'Y'); - if (averaging) + if (latency) size = sizeof(*ic); else size = (size_t) &((struct iostats_c *) NULL)->lock; @@ -98,7 +66,7 @@ } memset(ic, 0, size); - if (averaging) { + if (latency) { set_bit(IOF_LATENCY, &ic->flags); ic->lock = SPIN_LOCK_UNLOCKED; } @@ -109,8 +77,8 @@ kfree(ic); return -ENXIO; } - atomic_set(&ic->reads, 0); - atomic_set(&ic->writes, 0); + atomic_set(&ic->ios[READ], 0); + atomic_set(&ic->ios[WRITE], 0); return 0; } @@ -122,122 +90,112 @@ kfree(ic); } -/* - * average latency stats end_io function */ -static void iostats_avg_end_io(struct buffer_head *bh, int uptodate) +static void timespec_sum(struct timespec *lhs, struct timespec *rhs) { - int flags; - jiffies_t j; - struct dm_iostats *io = bh->b_private; - struct iostats_c *ic = io->ti->private; - - spin_lock_irqsave(&ic->lock, flags); - - /* average io latency; ignore an io on jiffies overflow */ - if (jiffies >= io->start_io) { - j = jiffies - io->start_io; + lhs->tv_sec += rhs->tv_sec; + lhs->tv_nsec += rhs->tv_nsec; - if (io->rw == WRITE) { - ic->total_w += j; - ic->count_w++; - if (ic->count_w > MAX_COUNT) { - ic->total_w /= ic->count_w; - ic->count_w = 1; - } - } else { - ic->total_r += j; - ic->count_r++; - if (ic->count_r > MAX_COUNT) { - ic->total_r /= ic->count_r; - ic->count_r = 1; - } - } + if (lhs->tv_nsec > 1000000000L) { + lhs->tv_sec++; + lhs->tv_nsec -= 1000000000L; } - - spin_unlock_irqrestore(&ic->lock, flags); - - bh->b_end_io = io->end_io; - bh->b_private = io->context; - - free_io(io); - - bh->b_end_io(bh, uptodate); } -/* - * Hook the io so we can work get the io completion time. - */ -static int _hook_io(struct iostats_c *stats, struct dm_target *ti, - struct buffer_head *bh, int rw) +static void reset_latency(struct iostats_c *ic, int rw) { - struct dm_iostats *io; - - io = alloc_io(); - if (!io) - return -ENOMEM; - - io->ti = ti; - io->rw = rw; - io->start_io = jiffies; - - /* hook the end io request fn */ - io->end_io = bh->b_end_io; - io->context = bh->b_private; - bh->b_end_io = iostats_avg_end_io; - bh->b_private = io; + int flags; - return 0; + spin_lock_irqsave(&ic->lock, flags); + memset(&ic->start[rw], 0, sizeof(struct timespec)); + memset(&ic->end[rw], 0, sizeof(struct timespec)); + spin_unlock_irqrestore(&ic->lock, flags); } /* read/write statistics mapping */ static int iostats_map(struct dm_target *ti, struct buffer_head *bh, int rw) { + int flags; + struct timespec now; struct iostats_c *ic = (struct iostats_c *) ti->private; - /* FIXME: what do we do when these counters wrap ? */ - if (rw == WRITE) - atomic_inc(&ic->writes); + if (!test_bit(IOF_LATENCY, &ic->flags)) + atomic_inc(&ic->ios[rw]); - else - atomic_inc(&ic->reads); + else { + if (atomic_inc_and_test(&ic->ios[rw])) + reset_latency(ic, rw); - if (test_bit(IOF_LATENCY, &ic->flags)) - _hook_io(ic, ti, bh, rw); + jiffies_to_timespec(jiffies, &now); + spin_lock_irqsave(&ic->lock, flags); + timespec_sum(&ic->start[rw], &now); + spin_unlock_irqrestore(&ic->lock, flags); + } /* map */ bh->b_rdev = ic->dev->dev; - return 1; } -static inline uint _calc(jiffies_t j, count_t c) +/* + * If we're measuring io latency, this function adds on the time + * that the io completed. + */ +static void iostats_end_io(struct dm_target *ti, struct buffer_head *bh, + int rw, int error) { - const jiffies_t m = 1000 / HZ; + int flags; + struct timespec now; + struct iostats_c *ic = ti->private; - return (c ? (j * m / c) : (j * m)); + if (test_bit(IOF_LATENCY, &ic->flags)) { + jiffies_to_timespec(jiffies, &now); + + spin_lock_irqsave(&ic->lock, flags); + timespec_sum(&ic->end[rw], &now); + spin_unlock_irqrestore(&ic->lock, flags); + } +} + +/* + * Converts a timespec to milliseconds + */ +static inline unsigned long to_milli(struct timespec *ts) +{ + return (ts->tv_sec * 1000) + (ts->tv_nsec / 1000000); +} + +/* + * Calculates the average latency in milliseconds. + */ +static unsigned long calc_latency(struct iostats_c *ic, int rw) +{ + unsigned long start_total = to_milli(&ic->start[rw]); + unsigned long end_total = to_milli(&ic->end[rw]); + + return (end_total - start_total) / + (unsigned long) atomic_read(&ic->ios[rw]); } -/* read/write statistics mapping */ static int iostats_status(struct dm_target *ti, status_type_t type, - char *result, int maxlen) + char *result, int maxlen) { struct iostats_c *ic = (struct iostats_c *) ti->private; switch (type) { case STATUSTYPE_INFO: - if (test_bit(IOF_LATENCY, &ic->flags)) { + if (!test_bit(IOF_LATENCY, &ic->flags)) { snprintf(result, maxlen, "%s %u %u", kdevname(to_kdev_t(ic->dev->bdev->bd_dev)), - atomic_read(&ic->reads), atomic_read(&ic->writes)); - } else { - uint avg_r, avg_w; + atomic_read(&ic->ios[READ]), + atomic_read(&ic->ios[WRITE])); - avg_r = _calc(ic->total_r, ic->count_r); - avg_w = _calc(ic->total_w, ic->count_w); - snprintf(result, maxlen, "%s %u %u %u %u", + } else { + snprintf(result, maxlen, "%s %u %u %lu %lu", kdevname(to_kdev_t(ic->dev->bdev->bd_dev)), - atomic_read(&ic->reads), atomic_read(&ic->writes), - avg_r, avg_w); + atomic_read(&ic->ios[READ]), + atomic_read(&ic->ios[WRITE]), + calc_latency(ic, READ), + calc_latency(ic, WRITE)); } break; @@ -256,37 +214,13 @@ .ctr = iostats_ctr, .dtr = iostats_dtr, .map = iostats_map, + .end_io = iostats_end_io, .status = iostats_status, }; int __init dm_iostats_init(void) { - int r; - - /* allocate a slab for the dm_iostats */ - _iostats_cache = kmem_cache_create("dm iostats io", - sizeof(struct dm_iostats), 0, 0, - NULL, NULL); - - if (!_iostats_cache) - return -ENOMEM; - - /* Create _iostats_pool mempool */ - _iostats_pool = mempool_create(MIN_IOSTATS, mempool_alloc_slab, - mempool_free_slab, _iostats_cache); - - if (!_iostats_pool) { - kmem_cache_destroy(_iostats_cache); - return -ENOMEM; - } - - r = dm_register_target(&_iostats_target); - if (r) { - mempool_destroy(_iostats_pool); - kmem_cache_destroy(_iostats_cache); - } - - return 0; + return dm_register_target(&_iostats_target); } void __exit dm_iostats_exit(void) @@ -294,16 +228,6 @@ int r = dm_unregister_target(&_iostats_target); if (r) DMERR("dm-iostats unregister failed %d", r); - - if (_iostats_pool) { - mempool_destroy(_iostats_pool); - _iostats_pool = NULL; - } - - if (_iostats_cache) { - kmem_cache_destroy(_iostats_cache); - _iostats_cache = NULL; - } } /* --- diff/drivers/md/dm.c 2002-12-12 17:22:27.000000000 +0000 +++ source/drivers/md/dm.c 2002-12-16 15:34:33.000000000 +0000 @@ -29,6 +29,7 @@ struct mapped_device *md; struct dm_target *ti; + int rw; void (*end_io) (struct buffer_head * bh, int uptodate); void *context; }; @@ -321,7 +322,8 @@ wake_up(&io->md->wait); if (io->ti->type->end_io) - io->ti->type->end_io(io->ti, bh, uptodate ? 0 : -EIO); + io->ti->type->end_io(io->ti, bh, io->rw, + uptodate ? 0 : -EIO); } bh->b_end_io = io->end_io; @@ -352,6 +354,7 @@ atomic_inc(&md->pending); io->md = md; io->ti = ti; + io->rw = rw; io->end_io = bh->b_end_io; io->context = bh->b_private; bh->b_end_io = dec_pending; --- diff/include/linux/device-mapper.h 2002-12-12 17:17:48.000000000 +0000 +++ source/include/linux/device-mapper.h 2002-12-16 15:33:29.000000000 +0000 @@ -34,8 +34,8 @@ * > 0: simple remap complete */ typedef int (*dm_map_fn) (struct dm_target *ti, struct buffer_head *bh, int rw); -typedef int (*dm_endio_fn) (struct dm_target *ti, - struct buffer_head *bh, int error); +typedef void (*dm_endio_fn) (struct dm_target *ti, + struct buffer_head *bh, int rw, int error); typedef int (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, char *result, int maxlen);