--- diff/drivers/md/dm-log.c 2004-01-05 15:58:39.000000000 +0000 +++ source/drivers/md/dm-log.c 2004-01-07 10:26:43.000000000 +0000 @@ -553,7 +553,6 @@ .get_region_size = core_get_region_size, .is_clean = core_is_clean, .in_sync = core_in_sync, - .flush = core_flush, .mark_region = core_mark_region, .clear_region = core_clear_region, .get_resync_work = core_get_resync_work, @@ -568,7 +567,6 @@ .get_region_size = core_get_region_size, .is_clean = core_is_clean, .in_sync = core_in_sync, - .flush = disk_flush, .mark_region = core_mark_region, .clear_region = core_clear_region, .get_resync_work = core_get_resync_work, --- diff/drivers/md/dm-log.h 2003-12-29 10:17:03.000000000 +0000 +++ source/drivers/md/dm-log.h 2004-01-07 10:41:11.000000000 +0000 @@ -18,6 +18,11 @@ void *context; }; +struct region_list { + region_t key; + struct list_head list; +}; + struct dirty_log_type { struct list_head list; const char *name; @@ -29,8 +34,17 @@ void (*dtr)(struct dirty_log *log); /* + * To avoid proliferation of kernel threads, we provide + * this function which should be called periodically by + * the client. + */ + void (*do_work)(struct dirty_log *log); + + /* * There are times when we don't want the log to touch - * the disk. + * the disk. Someone else may touch the log while it is + * suspended, so the resume method should reread the log + * from disk/network etc. */ int (*suspend)(struct dirty_log *log); int (*resume)(struct dirty_log *log); @@ -41,12 +55,6 @@ */ sector_t (*get_region_size)(struct dirty_log *log); - /* - * A predicate to say whether a region is clean or not. - * May block. - */ - int (*is_clean)(struct dirty_log *log, region_t region); - /* * Returns: 0, 1, -EWOULDBLOCK, < 0 * @@ -61,18 +69,21 @@ int (*in_sync)(struct dirty_log *log, region_t region, int can_block); /* - * Flush the current log state (eg, to disk). This - * function may block. - */ - int (*flush)(struct dirty_log *log); + * Mark an area as dirty. The log will use the callback + * when the operation is complete. By using a callback + * we are also allowing the log a lot more freedom, for + * example 'working set' or delayed commit' algorithms + * become trivial to implement. All the log promises is + * that _eventually_ the callback will be made. + */ + void (*mark_region)(struct dirty_log *log, struct region_list *rl, + void (*callback)(int, struct region_list *)); /* - * Mark an area as clean or dirty. These functions may - * block, though for performance reasons blocking should - * be extremely rare (eg, allocating another chunk of - * memory for some reason). + * Mark an area as clean, no callback is needed since we + * really don't care if a clean region is accidentally + * considered dirty afer a crash. */ - void (*mark_region)(struct dirty_log *log, region_t region); void (*clear_region)(struct dirty_log *log, region_t region); /* --- diff/drivers/md/dm-raid1.c 2004-01-05 14:16:49.000000000 +0000 +++ source/drivers/md/dm-raid1.c 2004-01-07 13:07:40.000000000 +0000 @@ -132,16 +132,25 @@ struct region { struct region_hash *rh; /* FIXME: can we get rid of this ? */ - region_t key; int state; + struct region_list rl; struct list_head hash_list; - struct list_head list; atomic_t pending; struct bio *delayed_bios; }; +static inline struct region *rl_to_region(struct region_list *rl) +{ + return container_of(rl, struct region, rl); +} + +static inline struct region *l_to_region(struct list_head *l) +{ + return rl_to_region(container_of(l, struct region_list, list)); +} + /* * Conversion fns */ @@ -254,7 +263,7 @@ struct region *reg; list_for_each_entry (reg, rh->buckets + rh_hash(rh, region), hash_list) - if (reg->key == region) + if (reg->rl.key == region) return reg; return NULL; @@ -262,7 +271,7 @@ static void __rh_insert(struct region_hash *rh, struct region *reg) { - unsigned int h = rh_hash(rh, reg->key); + unsigned int h = rh_hash(rh, reg->rl.key); list_add(®->hash_list, rh->buckets + h); } @@ -275,9 +284,8 @@ nreg->state = rh->log->type->in_sync(rh->log, region, 1) ? RH_CLEAN : RH_NOSYNC; nreg->rh = rh; - nreg->key = region; - - INIT_LIST_HEAD(&nreg->list); + nreg->rl.key = region; + INIT_LIST_HEAD(&nreg->rl.list); atomic_set(&nreg->pending, 0); nreg->delayed_bios = NULL; @@ -292,7 +300,7 @@ __rh_insert(rh, nreg); if (nreg->state == RH_CLEAN) { spin_lock_irq(&rh->region_lock); - list_add(&nreg->list, &rh->clean_regions); + list_add(&nreg->rl.list, &rh->clean_regions); spin_unlock_irq(&rh->region_lock); } reg = nreg; @@ -374,8 +382,9 @@ list_splice(&rh->clean_regions, &clean); INIT_LIST_HEAD(&rh->clean_regions); - list_for_each_entry (reg, &clean, list) { - rh->log->type->clear_region(rh->log, reg->key); + list_for_each (tmp, &clean) { + reg = l_to_region(tmp); + rh->log->type->clear_region(rh->log, reg->rl.key); list_del(®->hash_list); } } @@ -384,8 +393,10 @@ list_splice(&rh->recovered_regions, &recovered); INIT_LIST_HEAD(&rh->recovered_regions); - list_for_each_entry (reg, &recovered, list) + list_for_each (tmp, &recovered) { + reg = l_to_region(tmp); list_del(®->hash_list); + } } spin_unlock(&rh->region_lock); write_unlock_irq(&rh->hash_lock); @@ -396,20 +407,28 @@ * any more locking. */ list_for_each_safe (tmp, tmp2, &recovered) { - reg = list_entry(tmp, struct region, list); + reg = l_to_region(tmp); - rh->log->type->complete_resync_work(rh->log, reg->key, 1); + rh->log->type->complete_resync_work(rh->log, reg->rl.key, 1); dispatch_bios(rh->ms, reg->delayed_bios); up(&rh->recovery_count); mempool_free(reg, rh->region_pool); } list_for_each_safe (tmp, tmp2, &clean) { - reg = list_entry(tmp, struct region, list); + reg = l_to_region(tmp); mempool_free(reg, rh->region_pool); } } +static void rh_mark_callback(int error, struct region_list *rl) +{ + spin_lock_irq(&rh->region_lock); + reg->state = RH_DIRTY; + list_del_init(®->rl.list); /* take off the clean list */ + spin_unlock_irq(&rh->region_lock); +} + static void rh_inc(struct region_hash *rh, region_t region) { struct region *reg; @@ -417,11 +436,11 @@ read_lock(&rh->hash_lock); reg = __rh_find(rh, region); if (reg->state == RH_CLEAN) { - rh->log->type->mark_region(rh->log, reg->key); + rh->log->type->mark_region(rh->log, ®->rl, rh_mark_callback); spin_lock_irq(&rh->region_lock); reg->state = RH_DIRTY; - list_del_init(®->list); /* take off the clean list */ + list_del_init(®->rl.list); /* take off the clean list */ spin_unlock_irq(&rh->region_lock); } @@ -450,10 +469,10 @@ if (atomic_dec_and_test(®->pending)) { spin_lock_irqsave(&rh->region_lock, flags); if (reg->state == RH_RECOVERING) { - list_add_tail(®->list, &rh->quiesced_regions); + list_add_tail(®->rl.list, &rh->quiesced_regions); } else { reg->state = RH_CLEAN; - list_add(®->list, &rh->clean_regions); + list_add(®->rl.list, &rh->clean_regions); } spin_unlock_irqrestore(&rh->region_lock, flags); wake = 1; @@ -492,11 +511,11 @@ /* Already quiesced ? */ if (atomic_read(®->pending)) - list_del_init(®->list); + list_del_init(®->rl.list); else { - list_del_init(®->list); - list_add(®->list, &rh->quiesced_regions); + list_del_init(®->rl.list); + list_add(®->rl.list, &rh->quiesced_regions); } spin_unlock_irq(&rh->region_lock); @@ -521,9 +540,8 @@ spin_lock_irq(&rh->region_lock); if (!list_empty(&rh->quiesced_regions)) { - reg = list_entry(rh->quiesced_regions.next, - struct region, list); - list_del_init(®->list); /* remove from the quiesced list */ + reg = l_to_region(rh->quiesced_regions.next); + list_del_init(®->rl.list); /* remove from the quiesced list */ } spin_unlock_irq(&rh->region_lock); @@ -536,17 +554,12 @@ struct region_hash *rh = reg->rh; spin_lock_irq(&rh->region_lock); - list_add(®->list, ®->rh->recovered_regions); + list_add(®->rl.list, ®->rh->recovered_regions); spin_unlock_irq(&rh->region_lock); dm_daemon_wake(&_kmirrord); } -static void rh_flush(struct region_hash *rh) -{ - rh->log->type->flush(rh->log); -} - static void rh_delay(struct region_hash *rh, struct bio *bio) { struct region *reg; @@ -657,8 +670,8 @@ /* fill in the source */ m = ms->mirror + DEFAULT_MIRROR; from.bdev = m->dev->bdev; - from.sector = m->offset + region_to_sector(reg->rh, reg->key); - if (reg->key == (ms->nr_regions - 1)) { + from.sector = m->offset + region_to_sector(reg->rh, reg->rl.key); + if (reg->rl.key == (ms->nr_regions - 1)) { /* * The final region may be smaller than * region_size. @@ -676,7 +689,7 @@ m = ms->mirror + i; dest->bdev = m->dev->bdev; - dest->sector = m->offset + region_to_sector(reg->rh, reg->key); + dest->sector = m->offset + region_to_sector(reg->rh, reg->rl.key); dest->count = from.count; dest++; } @@ -880,6 +893,7 @@ static void do_mirror(struct mirror_set *ms) { struct bio_list reads, writes; + struct dirty_log *log = &ms->rh.log; spin_lock(&ms->lock); memcpy(&reads, &ms->reads, sizeof(reads)); @@ -892,6 +906,11 @@ do_recovery(ms); do_reads(ms, &reads); do_writes(ms, &writes); + + /* let the log use our thread for a bit */ + if (log->type->do_work) + log->type->do_work(log); + blk_run_queues(); }