From: Milan Broz <mbroz@redhat.com>

Provided sector_t is 64 bits, reduce the in-memory footprint of the
snapshot exception table by the simple method of using unused bits of
the chunk number to combine adjacent entries.

> Because now it uses searchning in list even
> in insert_exception, there is an optimalization assuming
> that most writes goes from beginning to the end of device
> and searching in list backwards (list is sorted).

[still to edit]

[64 bit check not yet implemented]

Signed-off-by: Milan Broz <mbroz@redhat.com>
---
 drivers/md/dm-snap.c |   75 +++++++++++++++++++++++++++++++++++++++++----------
 drivers/md/dm-snap.h |   34 +++++++++++++++++++++--
 2 files changed, 93 insertions(+), 16 deletions(-)

Index: linux-2.6.24-rc2/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.24-rc2.orig/drivers/md/dm-snap.c	2007-11-21 16:50:24.000000000 +0000
+++ linux-2.6.24-rc2/drivers/md/dm-snap.c	2007-11-21 16:51:11.000000000 +0000
@@ -214,10 +214,13 @@ static void unregister_snapshot(struct d
 /*
  * Implementation of the exception hash tables.
  */
-static int init_exception_table(struct exception_table *et, uint32_t size)
+
+static int init_exception_table(struct exception_table *et, uint32_t size,
+				unsigned hash_shift)
 {
 	unsigned int i;
 
+	et->hash_shift = hash_shift;
 	et->hash_mask = size - 1;
 	et->table = dm_vcalloc(size, sizeof(struct list_head));
 	if (!et->table)
@@ -248,7 +251,8 @@ static void exit_exception_table(struct 
 
 static uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
 {
-	return chunk & et->hash_mask;
+	/* we need group sequences -> do not use hash_shift bits for hash */
+	return (chunk >> et->hash_shift) & et->hash_mask;
 }
 
 static void insert_exception(struct exception_table *eh,
@@ -275,7 +279,8 @@ static struct dm_snap_exception *lookup_
 
 	slot = &et->table[exception_hash(et, chunk)];
 	list_for_each_entry (e, slot, hash_list)
-		if (e->old_chunk == chunk)
+		if (e->old_chunk <= chunk &&
+		    (e->old_chunk + chunk_rep(e)) >= chunk)
 			return e;
 
 	return NULL;
@@ -307,6 +312,46 @@ static void free_pending_exception(struc
 	mempool_free(pe, pending_pool);
 }
 
+static void insert_completed_exception(struct dm_snapshot *s,
+				      struct dm_snap_exception *new_e)
+{
+	struct exception_table *eh = &s->complete;
+	struct list_head *l;
+	struct dm_snap_exception *e = NULL;
+	chunk_t offset_new, offset_old;
+
+	l = &eh->table[exception_hash(eh, new_e->old_chunk)];
+
+	/* writes goes from beginning, traverse list reversibly */
+	list_for_each_entry_reverse(e, l, hash_list) {
+		offset_old = e->old_chunk + chunk_rep(e);
+		offset_new = chunk_offset(e->new_chunk) + chunk_rep(e);
+
+		/* new_e has always zero repetition counter */
+		if (new_e->old_chunk == (offset_old + 1) &&
+		    new_e->new_chunk == (offset_new + 1)) {
+			chunk_rep_inc(e);
+			free_exception(new_e);
+			return;
+		}
+
+		if (new_e->old_chunk == (offset_old - 1) &&
+		    new_e->new_chunk == (offset_new - 1)) {
+			chunk_rep_inc(e);
+			e->old_chunk--;
+			e->new_chunk--;
+			free_exception(new_e);
+			return;
+		}
+
+		/* list is sorted by old_chunk */
+		if (new_e->old_chunk > e->old_chunk)
+			break;
+	}
+
+	list_add(&new_e->hash_list, e ? &e->hash_list : l);
+}
+
 int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new)
 {
 	struct dm_snap_exception *e;
@@ -315,9 +360,10 @@ int dm_add_exception(struct dm_snapshot 
 	if (!e)
 		return -ENOMEM;
 
+	/* implicit zeroes repetition bits */
 	e->old_chunk = old;
 	e->new_chunk = new;
-	insert_exception(&s->complete, e);
+	insert_completed_exception(s, e);
 	return 0;
 }
 
@@ -352,7 +398,7 @@ static int init_hash_tables(struct dm_sn
 	hash_size = min(hash_size, max_buckets);
 
 	hash_size = rounddown_pow_of_two(hash_size);
-	if (init_exception_table(&s->complete, hash_size))
+	if (init_exception_table(&s->complete, hash_size, SHIFT_COMPLETED))
 		return -ENOMEM;
 
 	/*
@@ -363,7 +409,7 @@ static int init_hash_tables(struct dm_sn
 	if (hash_size < 64)
 		hash_size = 64;
 
-	if (init_exception_table(&s->pending, hash_size)) {
+	if (init_exception_table(&s->pending, hash_size, 0)) {
 		exit_exception_table(&s->complete, exception_cache);
 		return -ENOMEM;
 	}
@@ -722,7 +768,7 @@ static void pending_complete(struct dm_s
 	 * Add a proper exception, and remove the
 	 * in-flight exception from the list.
 	 */
-	insert_exception(&s->complete, e);
+	insert_completed_exception(s, e);
 
  out:
 	remove_exception(&pe->e);
@@ -856,11 +902,12 @@ __find_pending_exception(struct dm_snaps
 }
 
 static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e,
-			    struct bio *bio)
+			    struct bio *bio, chunk_t chunk)
 {
 	bio->bi_bdev = s->cow->bdev;
-	bio->bi_sector = chunk_to_sector(s, e->new_chunk) +
-		(bio->bi_sector & s->chunk_mask);
+	bio->bi_sector = chunk_to_sector(s, chunk_offset(e->new_chunk) +
+			(chunk - e->old_chunk)) +
+			(bio->bi_sector & s->chunk_mask);
 }
 
 static int snapshot_map(struct dm_target *ti, struct bio *bio,
@@ -891,7 +938,7 @@ static int snapshot_map(struct dm_target
 	/* If the block is already remapped - use that, else remap it */
 	e = lookup_exception(&s->complete, chunk);
 	if (e) {
-		remap_exception(s, e, bio);
+		remap_exception(s, e, bio, chunk);
 		goto out_unlock;
 	}
 
@@ -908,7 +955,7 @@ static int snapshot_map(struct dm_target
 			goto out_unlock;
 		}
 
-		remap_exception(s, &pe->e, bio);
+		remap_exception(s, &pe->e, bio, chunk);
 		bio_list_add(&pe->snapshot_bios, bio);
 
 		r = DM_MAPIO_SUBMITTED;
@@ -1196,7 +1243,7 @@ static int origin_status(struct dm_targe
 
 static struct target_type origin_target = {
 	.name    = "snapshot-origin",
-	.version = {1, 5, 0},
+	.version = {1, 6, 0},
 	.module  = THIS_MODULE,
 	.ctr     = origin_ctr,
 	.dtr     = origin_dtr,
@@ -1207,7 +1254,7 @@ static struct target_type origin_target 
 
 static struct target_type snapshot_target = {
 	.name    = "snapshot",
-	.version = {1, 5, 0},
+	.version = {1, 6, 0},
 	.module  = THIS_MODULE,
 	.ctr     = snapshot_ctr,
 	.dtr     = snapshot_dtr,
Index: linux-2.6.24-rc2/drivers/md/dm-snap.h
===================================================================
--- linux-2.6.24-rc2.orig/drivers/md/dm-snap.h	2007-11-21 16:50:15.000000000 +0000
+++ linux-2.6.24-rc2/drivers/md/dm-snap.h	2007-11-21 16:51:11.000000000 +0000
@@ -16,6 +16,7 @@
 
 struct exception_table {
 	uint32_t hash_mask;
+	unsigned hash_shift;
 	struct list_head *table;
 };
 
@@ -23,12 +24,14 @@ struct exception_table {
  * The snapshot code deals with largish chunks of the disk at a
  * time. Typically 64k - 256k.
  */
-/* FIXME: can we get away with limiting these to a uint32_t ? */
-typedef sector_t chunk_t;
+typedef uint64_t chunk_t;
 
 /*
  * An exception is used where an old chunk of data has been
  * replaced by a new one.
+ * To save memory for large snapshots, consecutive chunk is allowed,
+ * initial chunk offset is stored in lowest 56 bits and
+ * repetition counter in high 8 bits of new_chunk
  */
 struct dm_snap_exception {
 	struct list_head hash_list;
@@ -38,6 +41,33 @@ struct dm_snap_exception {
 };
 
 /*
+ * Define how many chunks will be connected together
+ */
+#define SHIFT_COMPLETED	8
+/*
+ * Funtions for consecutive chunk manipulation
+ */
+static inline chunk_t chunk_offset(chunk_t chunk)
+{
+	return chunk & 0x00ffffffffffffffULL;
+}
+
+static inline unsigned chunk_rep(struct dm_snap_exception *e)
+{
+	return e->new_chunk >> 56;
+}
+
+static inline void chunk_rep_inc(struct dm_snap_exception *e)
+{
+	chunk_t rep = chunk_rep(e) + 1;
+
+	e->new_chunk = chunk_offset(e->new_chunk) | (rep << 56);
+
+	/* Code must not allow counter overflow */
+	BUG_ON(chunk_rep(e) == 0);
+}
+
+/*
  * Abstraction to handle the meta/layout of exception stores (the
  * COW device).
  */
