Index: linux-2.6.14-rc2/drivers/md/dm-snap.c =================================================================== --- linux-2.6.14-rc2.orig/drivers/md/dm-snap.c 2006-01-09 15:42:33.000000000 +0000 +++ linux-2.6.14-rc2/drivers/md/dm-snap.c 2006-01-10 23:32:04.000000000 +0000 @@ -40,7 +40,7 @@ #define SNAPSHOT_PAGES 256 struct workqueue_struct *ksnapd; -static void process_snapshot_bios(void *data); +static void process_queued_bios(void *data); struct pending_exception { struct exception e; @@ -53,6 +53,11 @@ struct pending_exception { struct bio_list snapshot_bios; /* + * Short-term queue of pending exceptions prior to submission. + */ + struct list_head list; + + /* * Other pending_exceptions that are processing this * chunk. When this list is empty, we know we can * complete the origins. @@ -97,6 +102,9 @@ struct origin { /* List of snapshots for this origin */ struct list_head snapshots; + + /* Locks changes to siblings and origin_bios in pending exceptions */ + spinlock_t sibling_lock; }; /* @@ -177,11 +185,13 @@ static int register_snapshot(struct dm_s /* Initialise the struct */ INIT_LIST_HEAD(&o->snapshots); + spin_lock_init(&o->sibling_lock); o->bdev = bdev; __insert_origin(o); } + snap->originp = o; list_add_tail(&snap->list, &o->snapshots); up_write(&_origins_lock); @@ -525,7 +535,7 @@ static int snapshot_ctr(struct dm_target /* Metadata must only be loaded into one table at once */ read_snapshot_metadata(s); - INIT_WORK(&s->process_snapshot_bios, process_snapshot_bios, s); + INIT_WORK(&s->process_queued_bios, process_queued_bios, s); /* Add snapshot to the list of snapshots for this origin */ /* Exceptions aren't triggered till snapshot_resume() is called */ @@ -596,17 +606,17 @@ static void flush_bios(struct bio *bio) } } -static void process_snapshot_bios(void *data) +static void process_queued_bios(void *data) { struct dm_snapshot *s = (struct dm_snapshot *) data; - struct bio *snapshot_bios; + struct bio *queued_bios; unsigned long flags; spin_lock_irqsave(&s->pe_lock, flags); - snapshot_bios = bio_list_get(&s->snapshot_bios); + queued_bios = bio_list_get(&s->queued_bios); spin_unlock_irqrestore(&s->pe_lock, flags); - flush_bios(snapshot_bios); + flush_bios(queued_bios); } /* @@ -624,24 +634,19 @@ static void error_bios(struct bio *bio) } } -static struct bio *__flush_bios(struct pending_exception *pe) +void __merge_origin_bios(struct pending_exception *pe) { struct pending_exception *sibling; if (list_empty(&pe->siblings)) - return bio_list_get(&pe->origin_bios); + return; sibling = list_entry(pe->siblings.next, struct pending_exception, siblings); list_del(&pe->siblings); - /* This is fine as long as kcopyd is single-threaded. If kcopyd - * becomes multi-threaded, we'll need some locking here. - */ bio_list_merge(&sibling->origin_bios, &pe->origin_bios); - - return NULL; } static void pending_complete(struct pending_exception *pe, int success) @@ -659,7 +664,11 @@ static void pending_complete(struct pend down_write(&s->lock); s->store.drop_snapshot(&s->store); s->valid = 0; - flush = __flush_bios(pe); + spin_lock_irqsave(&s->originp->sibling_lock, flags); + __merge_origin_bios(pe); + if (list_empty(&pe->siblings)) + flush = bio_list_get(&pe->origin_bios); + spin_unlock_irqrestore(&s->originp->sibling_lock, flags); up_write(&s->lock); error_bios(bio_list_get(&pe->snapshot_bios)); @@ -673,23 +682,28 @@ static void pending_complete(struct pend */ down_write(&s->lock); insert_exception(&s->complete, e); - remove_exception(&pe->e); - flush = __flush_bios(pe); - /* Submit any pending write bios */ - up_write(&s->lock); + spin_lock_irqsave(&s->pe_lock, flags); + remove_exception(&pe->e); + spin_lock(&s->originp->sibling_lock); + __merge_origin_bios(pe); /* * We must wait for any outstanding snapshot reads to complete * before we can change the origin. */ - spin_lock_irqsave(&s->pe_lock, flags); if (pe->read_count) { pe->flush_required = 1; free_pe = 0; - } + } else if (list_empty(&pe->siblings)) + flush = bio_list_get(&pe->origin_bios); + + spin_unlock(&s->originp->sibling_lock); spin_unlock_irqrestore(&s->pe_lock, flags); + up_write(&s->lock); + + /* Submit any pending write bios */ if (free_pe) flush_bios(bio_list_get(&pe->snapshot_bios)); } else { @@ -700,7 +714,11 @@ static void pending_complete(struct pend s->store.drop_snapshot(&s->store); s->valid = 0; remove_exception(&pe->e); - flush = __flush_bios(pe); + spin_lock_irqsave(&s->originp->sibling_lock, flags); + __merge_origin_bios(pe); + if (list_empty(&pe->siblings)) + flush = bio_list_get(&pe->origin_bios); + spin_unlock_irqrestore(&s->originp->sibling_lock, flags); up_write(&s->lock); error_bios(bio_list_get(&pe->snapshot_bios)); @@ -929,6 +947,7 @@ static void snapshot_end_io(struct dm_ta struct pending_exception *pe = (struct pending_exception *) map_context->ptr; unsigned long flags; + int free_pe = 0; if (bio_rw(bio) == WRITE) return error; @@ -936,16 +955,27 @@ static void snapshot_end_io(struct dm_ta spin_lock_irqsave(&s->pe_lock, flags); if (!--pe->read_count) { if (pe->flush_required) { - bio_list_merge(&s->snapshot_bios, &pe->snapshot_bios); - - queue_work(ksnapd, &process_snapshot_bios); - } else if (!pe->write_count) + bio_list_merge(&s->queued_bios, &pe->snapshot_bios); + /* The last sibling releases origin bios too */ + spin_lock_irqsave(&s->pe_lock, flags); + spin_lock(&s->originp->sibling_lock); + if (list_empty(&pe->siblings)) + bio_list_merge(&s->queued_bios, + &pe->origin_bios); + spin_unlock(&s->originp->sibling_lock); + spin_unlock_irqrestore(&s->pe_lock, flags); + queue_work(ksnapd, &process_queued_bios); + free_pe = 1; + } else if (!pe->write_count) { /* No conflicting writes so we remove the pe. */ remove_exception(&pe->e); + free_pe = 1; + } } spin_unlock_irqrestore(&s->pe_lock, flags); - free_pending_exception(pe); + if (free_pe) + free_pending_exception(pe); return error; } @@ -1022,6 +1052,8 @@ static int __origin_write(struct list_he struct exception *e; struct pending_exception *pe, *last = NULL; chunk_t chunk; + LIST_HEAD(pe_queue); + unsigned long flags; /* Do all the snapshots on this origin */ list_for_each_entry (snap, snapshots, list) { @@ -1055,11 +1087,27 @@ static int __origin_write(struct list_he snap->valid = 0; } else { - if (last) + // FIXME Need to guarantee 'last' still valid + /* Ensure pe and last are joined as siblings */ + spin_lock_irqsave(&s->originp->lock, flags); + if (last && list_empty(&pe->siblings)) list_merge(&pe->siblings, &last->siblings); - last = pe; + + if (first) { + bio_list_add(&pe->origin_bios, bio); + first = 0; + } + spin_unlock_irqrestore(&s->originp->lock, flags); + + spin_lock_irqsave(&s->pe_lock, flags); + if (!pe->started) { + pe->started = 1; + list_add_tail(&pe_queue, &pe->list); + } + spin_unlock_irqrestore(&s->pe_lock, flags); + r = 0; } } @@ -1070,24 +1118,8 @@ static int __origin_write(struct list_he /* * Now that we have a complete pe list we can start the copying. */ - if (last) { - pe = last; - do { - down_write(&pe->snap->lock); - if (first) - bio_list_add(&pe->origin_bios, bio); - if (!pe->started) { - pe->started = 1; - up_write(&pe->snap->lock); - start_copy(pe); - } else - up_write(&pe->snap->lock); - first = 0; - pe = list_entry(pe->siblings.next, - struct pending_exception, siblings); - - } while (pe != last); - } + list_for_each_entry(pe, pe_queue, list) + start_copy(pe); return r; } Index: linux-2.6.14-rc2/drivers/md/dm-snap.h =================================================================== --- linux-2.6.14-rc2.orig/drivers/md/dm-snap.h 2006-01-09 15:42:27.000000000 +0000 +++ linux-2.6.14-rc2/drivers/md/dm-snap.h 2006-01-10 18:57:30.000000000 +0000 @@ -94,6 +94,8 @@ struct dm_snapshot { /* List of snapshots per Origin */ struct list_head list; + struct origin *originp; + /* Size of data blocks saved - must be a power of 2 */ chunk_t chunk_size; chunk_t chunk_mask; @@ -125,8 +127,8 @@ struct dm_snapshot { struct kcopyd_client *kcopyd_client; - struct work_struct process_snapshot_bios; - struct bio_list snapshot_bios; + struct work_struct process_bios; + struct bio_list queued_bios; }; /*