Misc
---
 drivers/md/dm-thin.c |  240 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 142 insertions(+), 98 deletions(-)

Index: linux-3.0-rc7/drivers/md/dm-thin.c
===================================================================
--- linux-3.0-rc7.orig/drivers/md/dm-thin.c
+++ linux-3.0-rc7/drivers/md/dm-thin.c
@@ -19,6 +19,14 @@
 #define	DM_MSG_PREFIX	"thin"
 
 /*
+ * Tunable constants
+ */
+#define ENDIO_HOOK_POOL_SIZE 10240
+#define DEFERRED_SET_SIZE 64
+#define MAPPING_POOL_SIZE 1024
+#define PRISON_CELLS 1024
+
+/*
  * How do we handle breaking sharing of data blocks?
  * =================================================
  *
@@ -112,8 +120,10 @@ struct bio_prison {
 static uint32_t calc_nr_buckets(unsigned nr_cells)
 {
 	uint32_t n = 128;
+
 	nr_cells /= 4;
 	nr_cells = min(nr_cells, 8192u);
+
 	while (n < nr_cells)
 		n <<= 1;
 
@@ -126,11 +136,12 @@ static uint32_t calc_nr_buckets(unsigned
  */
 static struct bio_prison *prison_create(unsigned nr_cells)
 {
-	int i;
+	unsigned i;
 	uint32_t nr_buckets = calc_nr_buckets(nr_cells);
 	size_t len = sizeof(struct bio_prison) +
 		(sizeof(struct hlist_head) * nr_buckets);
 	struct bio_prison *prison = kmalloc(len, GFP_KERNEL);
+
 	if (!prison)
 		return NULL;
 
@@ -154,8 +165,9 @@ static void prison_destroy(struct bio_pr
 
 static uint32_t hash_key(struct bio_prison *prison, struct cell_key *key)
 {
-	const unsigned BIG_PRIME = 4294967291UL;
+	const unsigned long BIG_PRIME = 4294967291UL;
 	uint64_t hash = key->block * BIG_PRIME;
+
 	return (uint32_t) (hash & prison->hash_mask);
 }
 
@@ -176,8 +188,8 @@ static struct cell *__search_bucket(stru
  * This may block if a new cell needs allocating.  You must ensure that
  * cells will be unlocked even if the calling thread is blocked.
  *
- * returns the number of entries in the cell prior to the new addition. or
- * < 0 on failure.
+ * Returns the number of entries in the cell prior to the new addition
+ * or < 0 on failure.
  */
 static int bio_detain(struct bio_prison *prison, struct cell_key *key,
 		      struct bio *inmate, struct cell **ref)
@@ -193,14 +205,16 @@ static int bio_detain(struct bio_prison 
 	cell = __search_bucket(prison->cells + hash, key);
 
 	if (!cell) {
-		/* allocate a new cell */
+		/*
+		 * Allocate a new cell
+		 */
 		spin_unlock_irqrestore(&prison->lock, flags);
 		cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO);
 		spin_lock_irqsave(&prison->lock, flags);
 
 		/*
 		 * We've been unlocked, so we have to double check that
-		 * nobody else has inserted this cell in the mean time.
+		 * nobody else has inserted this cell in the meantime.
 		 */
 		cell = __search_bucket(prison->cells + hash, key);
 
@@ -224,6 +238,7 @@ static int bio_detain(struct bio_prison 
 		mempool_free(cell2, prison->cell_pool);
 
 	*ref = cell;
+
 	return r;
 }
 
@@ -250,16 +265,22 @@ static int bio_detain_if_occupied(struct
 	spin_unlock_irqrestore(&prison->lock, flags);
 
 	*ref = cell;
+
 	return r;
 }
 
-/* @inmates must have been initialised prior to this call */
+/*
+ * @inmates must have been initialised prior to this call
+ */
 static void __cell_release(struct cell *cell, struct bio_list *inmates)
 {
 	struct bio_prison *prison = cell->prison;
+
 	hlist_del(&cell->list);
+
 	if (inmates)
 		bio_list_merge(inmates, &cell->bios);
+
 	mempool_free(cell, prison->cell_pool);
 }
 
@@ -298,7 +319,6 @@ static void cell_error(struct cell *cell
  * until these prior reads have completed.  Otherwise the insertion of the
  * new mapping could free the old block that the read bios are mapped to.
  */
-#define DEFERRED_SET_SIZE 64
 
 struct deferred_set;
 struct deferred_entry {
@@ -369,7 +389,9 @@ static void ds_dec(struct deferred_entry
 	spin_unlock_irqrestore(&entry->ds->lock, flags);
 }
 
-/* 1 if deferred, 0 if no pending items to delay job */
+/*
+ * Returns 1 if deferred or 0 if no pending items to delay job.
+ */
 static int ds_add_work(struct deferred_set *ds, struct list_head *work)
 {
 	int r = 1;
@@ -423,7 +445,7 @@ static void build_virtual_key(struct dm_
  */
 struct pool {
 	struct hlist_node hlist;
-	struct dm_target *ti;	/* only set if a pool target is bound */
+	struct dm_target *ti;	/* Only set if a pool target is bound */
 
 	struct mapped_device *pool_md;
 	struct dm_thin_metadata *tmd;
@@ -446,7 +468,7 @@ struct pool {
 	struct bio_list deferred_bios;
 	struct list_head prepared_mappings;
 
-	int triggered;		/* a dm event has been sent */
+	int triggered;		/* A dm event has been sent */
 	struct bio_list retry_list;
 
 	struct deferred_set ds;	/* FIXME: move to thin_c */
@@ -482,15 +504,17 @@ struct thin_c {
 	struct dm_thin_device *td;
 };
 
+/* FIXME: Can cells and new_mappings be combined? */
+
 struct endio_hook {
 	struct thin_c *tc;
-	bio_end_io_t *bi_end_io;
-	void *bi_private;
+
+	bio_end_io_t *saved_bi_end_io;
+	void *saved_bi_private;
+
 	struct deferred_entry *entry;
 };
 
-/* FIXME: can cells and new_mappings be combined? */
-
 struct new_mapping {
 	struct list_head list;
 
@@ -509,18 +533,32 @@ struct new_mapping {
 	 * the bio twice.
 	 */
 	struct bio *bio;
-	bio_end_io_t *bi_end_io;
-	void *bi_private;
+	bio_end_io_t *saved_bi_end_io;
+	void *saved_bi_private;
 };
 
+#define save_and_set_bio_endio(m, bio, endio)		\
+do {							\
+	(m)->saved_bi_end_io = (bio)->bi_end_io;	\
+	(m)->saved_bi_private = (bio)->bi_private;	\
+	(bio)->bi_end_io = (endio);			\
+	(bio)->bi_private = (m);			\
+} while (0)
+
+#define restore_bio_endio(m, bio)			\
+do {							\
+	(bio)->bi_end_io = (m)->saved_bi_end_io;	\
+	(bio)->bi_private = (m)->saved_bi_private;	\
+} while (0)
+
 /*----------------------------------------------------------------*/
 
 /*
  * A global table that uses a struct mapped_device as a key.
  */
 #define TABLE_SIZE 32
-#define TABLE_PRIME 27 /* Largest prime smaller than table size. */
-#define	TABLE_SHIFT 5  /* Shift fitting prime. */
+#define TABLE_PRIME 27	/* Largest prime smaller than table size. */
+#define	TABLE_SHIFT 5	/* Shift fitting prime. */
 
 static struct dm_thin_pool_table {
 	spinlock_t lock;
@@ -576,11 +614,15 @@ static struct pool *pool_table_lookup(st
 /*----------------------------------------------------------------*/
 
 /*
- * We need to maintain an association between a bio and a thin target. To
- * save lookups in an auxillary table, or wrapping bios in objects from a
- * mempool we hide this value in the bio->bi_bdev field, which we know is
- * not used while the bio is being processed.
+ * We need to maintain an association between a bio and a thin target
+ * when deferring a bio and handing it from the individual thin target to the
+ * workqueue which is shared across all thin targets.
+ *
+ * To avoid another mempool allocation or lookups in an auxillary table,
+ * we borrow the bi_bdev field for this purpose, as we know it is
+ * not used while the bio is being processed and we know the value it holds.
  */
+// FIXME Can this use map_context instead?
 static void set_tc(struct bio *bio, struct thin_c *tc)
 {
 	bio->bi_bdev = (struct block_device *)tc;
@@ -650,7 +692,7 @@ static void __maybe_add_mapping(struct n
 static void copy_complete(int read_err, unsigned long write_err, void *context)
 {
 	unsigned long flags;
-	struct new_mapping *m = (struct new_mapping *)context;
+	struct new_mapping *m = context;
 	struct pool *pool = m->tc->pool;
 
 	m->err = read_err || write_err ? -EIO : 0;
@@ -661,10 +703,10 @@ static void copy_complete(int read_err, 
 	spin_unlock_irqrestore(&pool->lock, flags);
 }
 
-static void overwrite_complete(struct bio *bio, int err)
+static void overwrite_endio(struct bio *bio, int err)
 {
 	unsigned long flags;
-	struct new_mapping *m = (struct new_mapping *)bio->bi_private;
+	struct new_mapping *m = bio->bi_private;
 	struct pool *pool = m->tc->pool;
 
 	m->err = err;
@@ -675,29 +717,29 @@ static void overwrite_complete(struct bi
 	spin_unlock_irqrestore(&pool->lock, flags);
 }
 
-static void shared_read_complete(struct bio *bio, int err)
+
+static void shared_read_endio(struct bio *bio, int err)
 {
 	struct list_head mappings;
 	struct new_mapping *m, *tmp;
-	struct endio_hook *h = (struct endio_hook *)bio->bi_private;
+	struct endio_hook *endio_hook = bio->bi_private;
 	unsigned long flags;
 
-	bio->bi_end_io = h->bi_end_io;
-	bio->bi_private = h->bi_private;
+	restore_bio_endio(endio_hook, bio);
 	bio_endio(bio, err);
 
 	INIT_LIST_HEAD(&mappings);
-	ds_dec(h->entry, &mappings);
+	ds_dec(endio_hook->entry, &mappings);
 
-	spin_lock_irqsave(&h->tc->pool->lock, flags);
+	spin_lock_irqsave(&endio_hook->tc->pool->lock, flags);
 	list_for_each_entry_safe(m, tmp, &mappings, list) {
 		list_del(&m->list);
 		INIT_LIST_HEAD(&m->list);
 		__maybe_add_mapping(m);
 	}
-	spin_unlock_irqrestore(&h->tc->pool->lock, flags);
+	spin_unlock_irqrestore(&endio_hook->tc->pool->lock, flags);
 
-	mempool_free(h, h->tc->pool->endio_hook_pool);
+	mempool_free(endio_hook, endio_hook->tc->pool->endio_hook_pool);
 }
 
 static int io_covers_block(struct pool *pool, struct bio *bio)
@@ -725,17 +767,15 @@ static void schedule_copy(struct thin_c 
 
 	ds_add_work(&pool->ds, &m->list);
 
+	/*
+	 * If the whole block of data is being overwritten, we can issue the
+	 * bio immediately. Otherwise we use kcopyd to clone the data first.
+	 */
 	if (io_covers_block(pool, bio)) {
-		/* no copy needed, since all data is going to change */
 		m->bio = bio;
-		m->bi_end_io = bio->bi_end_io;
-		m->bi_private = bio->bi_private;
-		bio->bi_end_io = overwrite_complete;
-		bio->bi_private = m;
+		save_and_set_bio_endio(m, bio, overwrite_endio);
 		remap_and_issue(tc, bio, data_dest);
-
 	} else {
-		/* use kcopyd */
 		struct dm_io_region from, to;
 
 		/* IO to pool_dev remaps to the pool target's data_dev */
@@ -773,15 +813,15 @@ static void schedule_zero(struct thin_c 
 	m->err = 0;
 	m->bio = NULL;
 
+	/*
+	 * If the whole block of data is being overwritten or we are not
+	 * zeroing pre-existing data, we can issue the bio immediately.
+	 * Otherwise we use kcopyd to zero the data first.
+	 */
 	if (!pool->zero_new_blocks || io_covers_block(pool, bio)) {
-		/* no copy needed, since all data is going to change */
 		m->bio = bio;
-		m->bi_end_io = bio->bi_end_io;
-		m->bi_private = bio->bi_private;
-		bio->bi_end_io = overwrite_complete;
-		bio->bi_private = m;
+		save_and_set_bio_endio(m, bio, overwrite_endio);
 		remap_and_issue(tc, bio, data_block);
-
 	} else {
 		int r;
 		struct dm_io_region to;
@@ -833,7 +873,6 @@ static void retry_later(struct bio *bio)
 	struct pool *pool = tc->pool;
 	unsigned long flags;
 
-	/* push it onto the retry list */
 	spin_lock_irqsave(&pool->lock, flags);
 	bio_list_add(&pool->retry_list, bio);
 	spin_unlock_irqrestore(&pool->lock, flags);
@@ -914,6 +953,7 @@ static void no_space(struct cell *cell)
 
 	bio_list_init(&bios);
 	cell_release(cell, &bios);
+
 	while ((bio = bio_list_pop(&bios)))
 		retry_later(bio);
 }
@@ -953,6 +993,7 @@ static void process_shared_bio(struct th
 	struct cell *cell;
 	struct cell_key key;
 	struct pool *pool = tc->pool;
+	struct endio_hook *endio_hook;
 
 	build_data_key(tc->td, lookup_result->block, &key);
 	if (bio_detain_if_occupied(pool->prison, &key, bio, &cell))
@@ -961,16 +1002,11 @@ static void process_shared_bio(struct th
 	if (bio_data_dir(bio) == WRITE)
 		break_sharing(tc, bio, block, &key, lookup_result);
 	else {
-		struct endio_hook *h = mempool_alloc(pool->endio_hook_pool,
-						     GFP_NOIO);
+		endio_hook = mempool_alloc(pool->endio_hook_pool, GFP_NOIO);
 
-		h->tc = tc;
-		h->bi_end_io = bio->bi_end_io;
-		h->bi_private = bio->bi_private;
-		h->entry = ds_inc(&pool->ds);
-
-		bio->bi_end_io = shared_read_complete;
-		bio->bi_private = h;
+		endio_hook->tc = tc;
+		endio_hook->entry = ds_inc(&pool->ds);
+		save_and_set_bio_endio(endio_hook, bio, shared_read_endio);
 
 		remap_and_issue(tc, bio, lookup_result->block);
 	}
@@ -1058,61 +1094,64 @@ static void process_bios(struct pool *po
 	}
 }
 
-static void process_prepared_mappings(struct pool *pool)
+static void process_prepared_mapping(struct new_mapping *m)
 {
+	struct thin_c *tc = m->tc;
+	struct bio *bio;
 	int r;
+
+	if (m->err) {
+		cell_error(m->cell);
+		return;
+	}
+
+	bio = m->bio;
+	if (bio)
+		restore_bio_endio(m, bio);
+
+	r = dm_thin_metadata_insert(tc->td, m->virt_block, m->data_block);
+	if (r) {
+		DMERR("dm_thin_metadata_insert() failed");
+		cell_error(m->cell);
+		return;
+	}
+
+	if (bio) {
+		cell_remap_and_issue_except(tc, m->cell, m->data_block, bio);
+		bio_endio(bio, 0);
+	} else
+		cell_remap_and_issue(tc, m->cell, m->data_block);
+
+	list_del(&m->list);
+	mempool_free(m, tc->pool->mapping_pool);
+}
+
+static void process_prepared_mappings(struct pool *pool)
+{
 	unsigned long flags;
 	struct list_head maps;
 	struct new_mapping *m, *tmp;
-	struct bio *bio;
 
 	INIT_LIST_HEAD(&maps);
 	spin_lock_irqsave(&pool->lock, flags);
 	list_splice_init(&pool->prepared_mappings, &maps);
 	spin_unlock_irqrestore(&pool->lock, flags);
 
-	list_for_each_entry_safe(m, tmp, &maps, list) {
-		struct thin_c *tc = m->tc;
-
-		if (m->err) {
-			cell_error(m->cell);
-			continue;
-		}
-
-		bio = m->bio;
-		if (bio) {
-			bio->bi_end_io = m->bi_end_io;
-			bio->bi_private = m->bi_private;
-		}
-
-		r = dm_thin_metadata_insert(tc->td, m->virt_block,
-					    m->data_block);
-		if (r) {
-			DMERR("dm_thin_metadata_insert() failed");
-			cell_error(m->cell);
-		} else {
-			if (m->bio) {
-				cell_remap_and_issue_except(tc, m->cell,
-							    m->data_block, bio);
-				bio_endio(bio, 0);
-			} else
-				cell_remap_and_issue(tc, m->cell, m->data_block);
-
-			list_del(&m->list);
-			mempool_free(m, pool->mapping_pool);
-		}
-	}
+	list_for_each_entry_safe(m, tmp, &maps, list)
+		process_prepared_mapping(m);
 }
 
 static void do_producer(struct work_struct *ws)
 {
 	struct pool *pool = container_of(ws, struct pool, producer);
+
 	process_bios(pool);
 }
 
 static void do_consumer(struct work_struct *ws)
 {
 	struct pool *pool = container_of(ws, struct pool, consumer);
+
 	process_prepared_mappings(pool);
 }
 
@@ -1144,7 +1183,7 @@ static int bio_map(struct dm_target *ti,
 	struct dm_thin_lookup_result result;
 
 	/*
-	 * XXX(hch): in theory higher level code should prevent this
+	 * FIXME(hch): in theory higher level code should prevent this
 	 * from happening, not sure why we ever get here.
 	 */
 	if ((bio->bi_rw & REQ_DISCARD) &&
@@ -1161,6 +1200,7 @@ static int bio_map(struct dm_target *ti,
 	}
 
 	r = dm_thin_metadata_lookup(td, block, 0, &result);
+
 	switch (r) {
 	case 0:
 		if (unlikely(result.shared)) {
@@ -1241,6 +1281,7 @@ static int bind_control_target(struct po
 					 pool->sectors_per_block);
 	pool->zero_new_blocks = pt->zero_new_blocks;
 	dm_thin_metadata_rebind_block_device(pool->tmd, pt->metadata_dev->bdev);
+
 	return 0;
 }
 
@@ -1300,7 +1341,7 @@ static struct pool *pool_create(struct b
 	pool->offset_mask = block_size - 1;
 	pool->low_water_mark = 0;
 	pool->zero_new_blocks = 1;
-	pool->prison = prison_create(1024); /* FIXME: magic number */
+	pool->prison = prison_create(PRISON_CELLS);
 	if (!pool->prison) {
 		*error = "Error creating pool's bio prison";
 		err_p = ERR_PTR(-ENOMEM);
@@ -1318,7 +1359,7 @@ static struct pool *pool_create(struct b
 	/* Create singlethreaded workqueue that will service all devices
 	 * that use this metadata.
 	 */
-	pool->producer_wq = alloc_ordered_workqueue(DM_MSG_PREFIX "-producer",
+	pool->producer_wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX "-producer",
 						    WQ_MEM_RECLAIM);
 	if (!pool->producer_wq) {
 		*error = "Error creating pool's producer workqueue";
@@ -1326,7 +1367,7 @@ static struct pool *pool_create(struct b
 		goto bad_producer_wq;
 	}
 
-	pool->consumer_wq = alloc_ordered_workqueue(DM_MSG_PREFIX "-consumer",
+	pool->consumer_wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX "-consumer",
 						    WQ_MEM_RECLAIM);
 	if (!pool->consumer_wq) {
 		*error = "Error creating pool's consumer workqueue";
@@ -1342,17 +1383,17 @@ static struct pool *pool_create(struct b
 	pool->triggered = 0;
 	bio_list_init(&pool->retry_list);
 	ds_init(&pool->ds);
-	/* FIXME: magic number */
+
 	pool->mapping_pool =
-		mempool_create_kmalloc_pool(1024, sizeof(struct new_mapping));
+		mempool_create_kmalloc_pool(MAPPING_POOL_SIZE, sizeof(struct new_mapping));
 	if (!pool->mapping_pool) {
 		*error = "Error creating pool's mapping mempool";
 		err_p = ERR_PTR(-ENOMEM);
 		goto bad_mapping_pool;
 	}
-	/* FIXME: magic number */
+
 	pool->endio_hook_pool =
-		mempool_create_kmalloc_pool(10240, sizeof(struct endio_hook));
+		mempool_create_kmalloc_pool(ENDIO_HOOK_POOL_SIZE, sizeof(struct endio_hook));
 	if (!pool->endio_hook_pool) {
 		*error = "Error creating pool's endio_hook mempool";
 		err_p = ERR_PTR(-ENOMEM);
@@ -1878,6 +1919,7 @@ static int pool_iterate_devices(struct d
 				iterate_devices_callout_fn fn, void *data)
 {
 	struct pool_c *pt = ti->private;
+
 	return fn(ti, pt->data_dev, 0, ti->len, data);
 }
 
@@ -1891,6 +1933,7 @@ static int pool_merge(struct dm_target *
 		return max_size;
 
 	bvm->bi_bdev = pt->data_dev->bdev;
+
 	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
 }
 
@@ -2023,6 +2066,7 @@ static int thin_map(struct dm_target *ti
 		    union map_info *map_context)
 {
 	bio->bi_sector -= ti->begin;
+
 	return bio_map(ti, bio);
 }
 
