Index: linux-2.6.17/drivers/md/dm-snap.c =================================================================== --- linux-2.6.17.orig/drivers/md/dm-snap.c 2006-08-07 16:06:03.000000000 +0100 +++ linux-2.6.17/drivers/md/dm-snap.c 2006-08-07 19:50:18.000000000 +0100 @@ -60,20 +60,20 @@ struct pending_exception { /* * The primary pending_exception is the one that holds - * the sibling_count and the list of origin_bios for a + * the ref_count and the list of origin_bios for a * group of pending_exceptions. It is always last to get freed. * These fields get set up when writing to the origin. */ struct pending_exception *primary_pe; /* - * Number of pending_exceptions processing this chunk. + * Number of exception copies or snapshot reads processing this chunk. * When this drops to zero we must complete the origin bios. * If incrementing or decrementing this, hold pe->snap->lock for * the sibling concerned and not pe->primary_pe->snap->lock unless * they are the same. */ - atomic_t sibling_count; + atomic_t ref_count; /* Pointer back to snapshot context */ struct dm_snapshot *snap; @@ -654,12 +654,58 @@ static void __invalidate_snapshot(struct dm_table_event(s->table); } +static void get_pending_exception(struct pending_exception *pe) +{ + atomic_inc(&pe->ref_count); + + if (pe->primary_pe && pe->primary_pe != pe) + atomic_inc(&pe->primary_pe->ref_count); +} + +static struct bio *put_pending_exception(struct pending_exception *pe) +{ + struct dm_snapshot *s = pe->snap; + struct pending_exception *primary_pe; + struct bio *origin_bios = NULL; + unsigned long flags; + + spin_lock_irqsave(&s->pe_lock, flags); + + primary_pe = pe->primary_pe; + + /* + * Free the pe if it's not linked to an origin write or if + * it's not itself a primary pe. + */ + if ((!primary_pe || primary_pe != pe) && + atomic_dec_and_test(&pe->ref_count)) { + remove_exception(&pe->e); + free_pending_exception(pe); + } + + /* + * If this pe is involved in a write to the origin and + * it is the last sibling to complete then release + * the bios for the original write to the origin. + */ + if (primary_pe && atomic_dec_and_test(&primary_pe->ref_count)) { + origin_bios = bio_list_get(&primary_pe->origin_bios); + remove_exception(&primary_pe->pe); + free_pending_exception(primary_pe); + } + + spin_lock_irqrestore(&s->pe_lock, flags); + + return origin_bios; +} + static void pending_complete(struct pending_exception *pe, int success) { struct exception *e; struct pending_exception *primary_pe; struct dm_snapshot *s = pe->snap; - struct bio *flush = NULL; + struct bio *origin_bios = NULL; + struct bio *snapshot_bios = NULL; int error = 0; if (!success) { @@ -691,43 +737,20 @@ static void pending_complete(struct pend * in-flight exception from the list. */ insert_exception(&s->complete, e); - remove_exception(&pe->e); out: - primary_pe = pe->primary_pe; - - /* - * If this pe is involved in a write to the origin and - * it is the last sibling to complete then release - * the bios for the original write to the origin. - */ - if (primary_pe && - atomic_dec_and_test(&primary_pe->sibling_count)) - flush = bio_list_get(&primary_pe->origin_bios); + snapshot_bios = bio_list_get(&pe->snapshot_bios); + origin_bios = put_pending_exception(pe); up_write(&s->lock); /* Submit any pending write bios */ if (!error) - flush_bios(bio_list_get(&pe->snapshot_bios)); + flush_bios(snapshot_bios); else - error_bios(bio_list_get(&pe->snapshot_bios)); + error_bios(snapshot_bios); - /* - * Free the pe if it's not linked to an origin write or if - * it's not itself a primary pe. - */ - if (!primary_pe || primary_pe != pe) - free_pending_exception(pe); - - /* - * Free the primary pe if nothing references it. - */ - if (primary_pe && !atomic_read(&primary_pe->sibling_count)) - free_pending_exception(primary_pe); - - if (flush) - flush_bios(flush); + flush_bios(origin_bios); } static void commit_callback(void *context, int success) @@ -793,6 +816,9 @@ __find_pending_exception(struct dm_snaps struct exception *e; struct pending_exception *pe; chunk_t chunk = sector_to_chunk(s, bio->bi_sector); + unsigned long flags; + + spin_lock_irqsave(&s->pe_lock, flags); /* * Is there a pending exception for this already ? @@ -808,13 +834,16 @@ __find_pending_exception(struct dm_snaps * Create a new pending exception, we don't want * to hold the lock while we do this. */ + spin_lock_irqrestore(&s->pe_lock, flags); up_write(&s->lock); pe = alloc_pending_exception(); down_write(&s->lock); + spin_lock_irqsave(&s->pe_lock, flags); if (!s->valid) { free_pending_exception(pe); - return NULL; + pe = NULL; + goto out; } e = lookup_exception(&s->pending, chunk); @@ -828,18 +857,22 @@ __find_pending_exception(struct dm_snaps bio_list_init(&pe->origin_bios); bio_list_init(&pe->snapshot_bios); pe->primary_pe = NULL; - atomic_set(&pe->sibling_count, 1); + atomic_set(&pe->ref_count, 0); pe->snap = s; pe->started = 0; if (s->store.prepare_exception(&s->store, &pe->e)) { free_pending_exception(pe); - return NULL; + pe = NULL; + goto out; } + get_pending_exception(pe); + insert_exception(&s->pending, &pe->e); out: + spin_lock_irqrestore(&s->pe_lock, flags); return pe; } @@ -1011,7 +1044,7 @@ static int __origin_write(struct list_he * is already remapped in this snapshot * and trigger an exception if not. * - * sibling_count is initialised to 1 so pending_complete() + * ref_count is initialised to 1 so pending_complete() * won't destroy the primary_pe while we're inside this loop. */ e = lookup_exception(&snap->complete, chunk); @@ -1036,14 +1069,18 @@ static int __origin_write(struct list_he first = 1; } + get_pending_exception(primary_pe); + bio_list_add(&primary_pe->origin_bios, bio); r = 0; } if (!pe->primary_pe) { - atomic_inc(&primary_pe->sibling_count); pe->primary_pe = primary_pe; + if (primary_pe != pe) + atomic_add(atomic_read(&pe->ref_count), + &primary_pe->ref_count); } if (!pe->started) { @@ -1056,20 +1093,20 @@ static int __origin_write(struct list_he } if (!primary_pe) - goto out; + return r; /* * If this is the first time we're processing this chunk and - * sibling_count is now 1 it means all the pending exceptions + * ref_count is now 1 it means all the pending exceptions * got completed while we were in the loop above, so it falls to * us here to remove the primary_pe and submit any origin_bios. */ - if (first && atomic_dec_and_test(&primary_pe->sibling_count)) { + if (first && atomic_dec_and_test(&primary_pe->ref_count)) { flush_bios(bio_list_get(&primary_pe->origin_bios)); free_pending_exception(primary_pe); /* If we got here, pe_queue is necessarily empty. */ - goto out; + return r; } /* @@ -1078,7 +1115,6 @@ static int __origin_write(struct list_he list_for_each_entry_safe(pe, next_pe, &pe_queue, list) start_copy(pe); - out: return r; }