When devices are stacked, one device's merge_bvec_fn may need to perform the mapping and then call one or more functions for its underlying devices. The following bio fields are used: bio->bi_sector bio->bi_bdev bio->bi_size bio->bi_rw using bio_data_dir() This patch passes in copies of those fields to avoid having to change them directly in the struct bio when going down the stack only to have to change them back again on the way back up the stack. (And then when the bio gets mapped for real, the whole exercise gets repeated, but that's a problem for another day...) Cc: Neil Brown Cc: Jens Axboe --- drivers/md/linear.c | 14 ++++++++++---- drivers/md/raid0.c | 14 ++++++++++---- drivers/md/raid10.c | 19 ++++++++++++------- drivers/md/raid5.c | 11 +++++++---- fs/bio.c | 7 +++++-- include/linux/blkdev.h | 4 +++- 6 files changed, 47 insertions(+), 22 deletions(-) Index: linux-2.6.21-rc5/drivers/md/linear.c =================================================================== --- linux-2.6.21-rc5.orig/drivers/md/linear.c 2007-04-04 19:26:48.000000000 +0100 +++ linux-2.6.21-rc5/drivers/md/linear.c 2007-04-04 19:28:49.000000000 +0100 @@ -50,17 +50,23 @@ static inline dev_info_t *which_dev(mdde /** * linear_mergeable_bvec -- tell bio layer if two requests can be merged * @q: request queue - * @bio: the buffer head that's been built up so far + * @bi_bdev: the block device to be used for the I/O + * @bi_sector: the sector where the I/O will start + * @bi_size: bi_size of new bio + * @bi_rw: bi_rw of new bio * @biovec: the request that could be merged to it. * * Return amount of bytes we can take at this offset */ -static int linear_mergeable_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *biovec) +static int linear_mergeable_bvec(request_queue_t *q, + struct block_device *bi_bdev, + sector_t bi_sector, unsigned bi_size, + unsigned long bi_rw, struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; dev_info_t *dev0; - unsigned long maxsectors, bio_sectors = bio->bi_size >> 9; - sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); + unsigned long maxsectors, bio_sectors = bi_size >> 9; + sector_t sector = bi_sector + get_start_sect(bi_bdev); dev0 = which_dev(mddev, sector); maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1)); Index: linux-2.6.21-rc5/drivers/md/raid0.c =================================================================== --- linux-2.6.21-rc5.orig/drivers/md/raid0.c 2007-04-04 19:26:48.000000000 +0100 +++ linux-2.6.21-rc5/drivers/md/raid0.c 2007-04-04 19:28:49.000000000 +0100 @@ -263,18 +263,24 @@ static int create_strip_zones (mddev_t * /** * raid0_mergeable_bvec -- tell bio layer if a two requests can be merged * @q: request queue - * @bio: the buffer head that's been built up so far + * @bi_bdev: the block device to be used for the I/O + * @bi_sector: the sector where the I/O will start + * @bi_size: bi_size of new bio + * @bi_rw: bi_rw of new bio * @biovec: the request that could be merged to it. * * Return amount of bytes we can accept at this offset */ -static int raid0_mergeable_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *biovec) +static int raid0_mergeable_bvec(request_queue_t *q, + struct block_device *bi_bdev, + sector_t bi_sector, unsigned bi_size, + unsigned long bi_rw, struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; - sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); + sector_t sector = bi_sector + get_start_sect(bi_bdev); int max; unsigned int chunk_sectors = mddev->chunk_size >> 9; - unsigned int bio_sectors = bio->bi_size >> 9; + unsigned int bio_sectors = bi_size >> 9; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; if (max < 0) max = 0; /* bio_add cannot handle a negative return */ Index: linux-2.6.21-rc5/drivers/md/raid10.c =================================================================== --- linux-2.6.21-rc5.orig/drivers/md/raid10.c 2007-04-04 19:26:48.000000000 +0100 +++ linux-2.6.21-rc5/drivers/md/raid10.c 2007-04-04 19:28:49.000000000 +0100 @@ -446,26 +446,31 @@ static sector_t raid10_find_virt(conf_t /** * raid10_mergeable_bvec -- tell bio layer if a two requests can be merged * @q: request queue - * @bio: the buffer head that's been built up so far + * @bi_bdev: the block device to be used for the I/O + * @bi_sector: the sector where the I/O will start + * @bi_size: bi_size of new bio + * @bi_rw: bi_rw of new bio * @biovec: the request that could be merged to it. * * Return amount of bytes we can accept at this offset * If near_copies == raid_disk, there are no striping issues, * but in that case, the function isn't called at all. */ -static int raid10_mergeable_bvec(request_queue_t *q, struct bio *bio, - struct bio_vec *bio_vec) +static int raid10_mergeable_bvec(request_queue_t *q, + struct block_device *bi_bdev, + sector_t bi_sector, unsigned bi_size, + unsigned long bi_rw, struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; - sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); + sector_t sector = bi_sector + get_start_sect(bi_bdev); int max; unsigned int chunk_sectors = mddev->chunk_size >> 9; - unsigned int bio_sectors = bio->bi_size >> 9; + unsigned int bio_sectors = bi_size >> 9; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; if (max < 0) max = 0; /* bio_add cannot handle a negative return */ - if (max <= bio_vec->bv_len && bio_sectors == 0) - return bio_vec->bv_len; + if (max <= biovec->bv_len && bio_sectors == 0) + return biovec->bv_len; else return max; } Index: linux-2.6.21-rc5/drivers/md/raid5.c =================================================================== --- linux-2.6.21-rc5.orig/drivers/md/raid5.c 2007-04-04 19:26:48.000000000 +0100 +++ linux-2.6.21-rc5/drivers/md/raid5.c 2007-04-04 19:28:49.000000000 +0100 @@ -2636,15 +2636,18 @@ static int raid5_congested(void *data, i /* We want read requests to align with chunks where possible, * but write requests don't need to. */ -static int raid5_mergeable_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *biovec) +static int raid5_mergeable_bvec(request_queue_t *q, + struct block_device *bi_bdev, + sector_t bi_sector, unsigned bi_size, + unsigned long bi_rw, struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; - sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); + sector_t sector = bi_sector + get_start_sect(bi_bdev); int max; unsigned int chunk_sectors = mddev->chunk_size >> 9; - unsigned int bio_sectors = bio->bi_size >> 9; + unsigned int bio_sectors = bi_size >> 9; - if (bio_data_dir(bio) == WRITE) + if ((bi_rw & 1) == WRITE) return biovec->bv_len; /* always allow writes to be mergeable */ max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; Index: linux-2.6.21-rc5/fs/bio.c =================================================================== --- linux-2.6.21-rc5.orig/fs/bio.c 2007-04-04 19:26:48.000000000 +0100 +++ linux-2.6.21-rc5/fs/bio.c 2007-04-04 19:28:49.000000000 +0100 @@ -343,7 +343,9 @@ static int __bio_add_page(request_queue_ offset == prev->bv_offset + prev->bv_len) { prev->bv_len += len; if (q->merge_bvec_fn && - q->merge_bvec_fn(q, bio, prev) < len) { + q->merge_bvec_fn(q, bio->bi_bdev, bio->bi_sector, + bio->bi_size, bio->bi_rw, + prev) < len) { prev->bv_len -= len; return 0; } @@ -390,7 +392,8 @@ static int __bio_add_page(request_queue_ * merge_bvec_fn() returns number of bytes it can accept * at this offset */ - if (q->merge_bvec_fn(q, bio, bvec) < len) { + if (q->merge_bvec_fn(q, bio->bi_bdev, bio->bi_sector, + bio->bi_size, bio->bi_rw, bvec) < len) { bvec->bv_page = NULL; bvec->bv_len = 0; bvec->bv_offset = 0; Index: linux-2.6.21-rc5/include/linux/blkdev.h =================================================================== --- linux-2.6.21-rc5.orig/include/linux/blkdev.h 2007-04-04 19:26:48.000000000 +0100 +++ linux-2.6.21-rc5/include/linux/blkdev.h 2007-04-04 19:28:49.000000000 +0100 @@ -336,7 +336,9 @@ typedef int (prep_rq_fn) (request_queue_ typedef void (unplug_fn) (request_queue_t *); struct bio_vec; -typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *); +typedef int (merge_bvec_fn) (request_queue_t *, struct block_device *bi_bdev, + sector_t bi_sector, unsigned bi_size, + unsigned long bi_rw, struct bio_vec *); typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *); typedef void (prepare_flush_fn) (request_queue_t *, struct request *); typedef void (softirq_done_fn)(struct request *);