From: Joe Thornber <ejt@redhat.com>

Signed-off-by: Joe Thornber <ejt@redhat.com>

FIXME Needs a patch header

---
 drivers/md/dm-thin-metadata.c                      |   25 ++++
 drivers/md/dm-thin-metadata.h                      |    7 +
 drivers/md/dm-thin.c                               |   69 +++++++++++-
 drivers/md/persistent-data/dm-space-map-disk.c     |    3 
 drivers/md/persistent-data/dm-space-map-metadata.c |  118 +++++++++++++++++++--
 drivers/md/persistent-data/dm-space-map.h          |   22 +++
 6 files changed, 232 insertions(+), 12 deletions(-)

Index: linux/drivers/md/dm-thin-metadata.c
===================================================================
--- linux.orig/drivers/md/dm-thin-metadata.c
+++ linux/drivers/md/dm-thin-metadata.c
@@ -1676,6 +1676,17 @@ int dm_pool_resize_data_dev(struct dm_po
 	return r;
 }
 
+int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
+{
+	int r;
+
+	down_write(&pmd->root_lock);
+	r = pmd->fail_io ? -EINVAL : __resize_space_map(pmd->metadata_sm, new_count);
+	up_write(&pmd->root_lock);
+
+	return r;
+}
+
 void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd)
 {
 	down_write(&pmd->root_lock);
@@ -1683,3 +1694,17 @@ void dm_pool_metadata_read_only(struct d
 	dm_bm_set_read_only(pmd->bm);
 	up_write(&pmd->root_lock);
 }
+
+int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
+					dm_block_t threshold,
+					dm_sm_threshold_fn fn,
+					void *context)
+{
+	int r;
+
+	down_write(&pmd->root_lock);
+	r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
+	up_write(&pmd->root_lock);
+
+	return r;
+}
Index: linux/drivers/md/dm-thin-metadata.h
===================================================================
--- linux.orig/drivers/md/dm-thin-metadata.h
+++ linux/drivers/md/dm-thin-metadata.h
@@ -8,6 +8,7 @@
 #define DM_THIN_METADATA_H
 
 #include "persistent-data/dm-block-manager.h"
+#include "persistent-data/dm-space-map.h"
 
 #define THIN_METADATA_BLOCK_SIZE 4096
 
@@ -185,6 +186,7 @@ int dm_pool_get_data_dev_size(struct dm_
  * blocks would be lost.
  */
 int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size);
+int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_size);
 
 /*
  * Flicks the underlying block manager into read only mode, so you know
@@ -192,6 +194,11 @@ int dm_pool_resize_data_dev(struct dm_po
  */
 void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd);
 
+int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
+					dm_block_t threshold,
+					dm_sm_threshold_fn fn,
+					void *context);
+
 /*----------------------------------------------------------------*/
 
 #endif
Index: linux/drivers/md/dm-thin.c
===================================================================
--- linux.orig/drivers/md/dm-thin.c
+++ linux/drivers/md/dm-thin.c
@@ -42,6 +42,14 @@ DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_P
 #define MAX_DEV_ID ((1 << 24) - 1)
 
 /*
+ * Metadata low water mark.
+ *
+ * If the free blocks in the metadata device pass this threshold an event
+ * will be generated.
+ */
+#define METADATA_LOW_WATER_MARK 64
+
+/*
  * How do we handle breaking sharing of data blocks?
  * =================================================
  *
@@ -1909,6 +1917,16 @@ static int parse_pool_features(struct dm
 	return r;
 }
 
+static void metadata_low_callback(void *context)
+{
+	struct pool *pool = context;
+
+	DMWARN("%s: reached low water mark for metadata device: sending event.",
+	       dm_device_name(pool->pool_md));
+
+	dm_table_event(pool->ti->table);
+}
+
 static dm_block_t get_metadata_dev_size(struct block_device *bdev)
 {
 	sector_t metadata_dev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
@@ -1964,7 +1982,6 @@ static int pool_ctr(struct dm_target *ti
 		ti->error = "Error opening metadata block device";
 		goto out_unlock;
 	}
-
 	(void) get_metadata_dev_size(metadata_dev->bdev);
 
 	r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev);
@@ -2049,6 +2066,13 @@ static int pool_ctr(struct dm_target *ti
 	}
 	ti->private = pt;
 
+	r = dm_pool_register_metadata_threshold(pt->pool->pmd,
+						METADATA_LOW_WATER_MARK,
+						metadata_low_callback,
+						pool);
+	if (r)
+		goto out_free_pt;
+
 	pt->callbacks.congested_fn = pool_is_congested;
 	dm_table_add_target_callbacks(ti->table, &pt->callbacks);
 
@@ -2124,6 +2148,41 @@ static int maybe_resize_data_dev(struct 
 	return 0;
 }
 
+static int maybe_resize_metadata_dev(struct dm_target *ti, int *need_commit)
+{
+	int r;
+	struct pool_c *pt = ti->private;
+	struct pool *pool = pt->pool;
+	dm_block_t metadata_dev_size, sb_metadata_dev_size;
+
+	metadata_dev_size = get_metadata_dev_size(pool->md_dev);
+
+	r = dm_pool_get_metadata_dev_size(pool->pmd, &sb_metadata_dev_size);
+	if (r) {
+		DMERR("failed to retrieve data device size");
+		return r;
+	}
+
+	if (metadata_dev_size < sb_metadata_dev_size) {
+		DMERR("metadata device too small, is %llu blocks (expected %llu)",
+		      metadata_dev_size, sb_metadata_dev_size);
+		return -EINVAL;
+
+	} else if (metadata_dev_size > sb_metadata_dev_size) {
+		r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size);
+		if (r) {
+			DMERR("failed to resize metadata device");
+			/* FIXME Stricter than necessary: Rollback transaction instead here */
+			set_pool_mode(pool, PM_READ_ONLY);
+			return r;
+		}
+
+		*need_commit = 1;
+	}
+
+	return 0;
+}
+
 /*
  * Retrieves the number of blocks of the data device from
  * the superblock and compares it to the actual device size,
@@ -2137,7 +2196,7 @@ static int maybe_resize_data_dev(struct 
  */
 static int pool_preresume(struct dm_target *ti)
 {
-	int r, need_commit1 = 0;
+	int r, need_commit1 = 0, need_commit2 = 0;
 	struct pool_c *pt = ti->private;
 	struct pool *pool = pt->pool;
 
@@ -2152,7 +2211,11 @@ static int pool_preresume(struct dm_targ
 	if (r)
 		return r;
 
-	if (need_commit1)
+	r = maybe_resize_metadata_dev(ti, &need_commit2);
+	if (r)
+		return r;
+
+	if (need_commit1 || need_commit2)
 		(void) commit_or_fallback(pool);
 
 	return 0;
Index: linux/drivers/md/persistent-data/dm-space-map-disk.c
===================================================================
--- linux.orig/drivers/md/persistent-data/dm-space-map-disk.c
+++ linux/drivers/md/persistent-data/dm-space-map-disk.c
@@ -248,7 +248,8 @@ static struct dm_space_map ops = {
 	.new_block = sm_disk_new_block,
 	.commit = sm_disk_commit,
 	.root_size = sm_disk_root_size,
-	.copy_root = sm_disk_copy_root
+	.copy_root = sm_disk_copy_root,
+	.register_threshold_callback = NULL
 };
 
 struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
Index: linux/drivers/md/persistent-data/dm-space-map-metadata.c
===================================================================
--- linux.orig/drivers/md/persistent-data/dm-space-map-metadata.c
+++ linux/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -17,6 +17,56 @@
 /*----------------------------------------------------------------*/
 
 /*
+ * An edge triggered threshold.
+ */
+struct threshold {
+	bool threshold_set;
+	bool value_set;
+	dm_block_t threshold;
+	dm_block_t current_value;
+	dm_sm_threshold_fn fn;
+	void *context;
+};
+
+static void threshold_init(struct threshold *t)
+{
+	t->threshold_set = false;
+	t->value_set = false;
+}
+
+static void set_threshold(struct threshold *t, dm_block_t value,
+			  dm_sm_threshold_fn fn, void *context)
+{
+	t->threshold_set = true;
+	t->threshold = value;
+	t->fn = fn;
+	t->context = context;
+}
+
+static bool below_threshold(struct threshold *t,
+			    dm_block_t value)
+{
+	return t->threshold_set && value <= t->threshold;
+}
+
+static bool threshold_already_triggered(struct threshold *t)
+{
+	return t->value_set && below_threshold(t, t->current_value);
+}
+
+static void check_threshold(struct threshold *t, dm_block_t value)
+{
+	if (below_threshold(t, value) &&
+	    !threshold_already_triggered(t))
+		t->fn(t->context);
+
+	t->value_set = true;
+	t->current_value = value;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
  * Space map interface.
  *
  * The low level disk format is written using the standard btree and
@@ -54,6 +104,8 @@ struct sm_metadata {
 	unsigned allocated_this_transaction;
 	unsigned nr_uncommitted;
 	struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS];
+
+	struct threshold threshold;
 };
 
 static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)
@@ -137,6 +189,8 @@ static int recursing(struct sm_metadata 
 	return smm->recursion_count;
 }
 
+/*----------------------------------------------------------------*/
+
 static void sm_metadata_destroy(struct dm_space_map *sm)
 {
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
@@ -144,12 +198,6 @@ static void sm_metadata_destroy(struct d
 	kfree(smm);
 }
 
-static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
-{
-	DMERR("doesn't support extend");
-	return -EINVAL;
-}
-
 static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
 {
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
@@ -335,9 +383,19 @@ static int sm_metadata_new_block_(struct
 
 static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
 {
+	dm_block_t count;
+	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
 	int r = sm_metadata_new_block_(sm, b);
 	if (r)
 		DMERR("unable to allocate new metadata block");
+
+	r = sm_metadata_get_nr_free(sm, &count);
+	if (r)
+		DMERR("couldn't get free block count");
+
+	check_threshold(&smm->threshold, count);
+
 	return r;
 }
 
@@ -357,6 +415,17 @@ static int sm_metadata_commit(struct dm_
 	return 0;
 }
 
+static int sm_metadata_register_threshold_callback(struct dm_space_map *sm,
+						   dm_block_t threshold,
+						   dm_sm_threshold_fn fn,
+						   void *context)
+{
+	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+	set_threshold(&smm->threshold, threshold, fn, context);
+	return 0;
+}
+
 static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result)
 {
 	*result = sizeof(struct disk_sm_root);
@@ -382,6 +451,8 @@ static int sm_metadata_copy_root(struct 
 	return 0;
 }
 
+static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks);
+
 static struct dm_space_map ops = {
 	.destroy = sm_metadata_destroy,
 	.extend = sm_metadata_extend,
@@ -395,7 +466,8 @@ static struct dm_space_map ops = {
 	.new_block = sm_metadata_new_block,
 	.commit = sm_metadata_commit,
 	.root_size = sm_metadata_root_size,
-	.copy_root = sm_metadata_copy_root
+	.copy_root = sm_metadata_copy_root,
+	.register_threshold_callback = sm_metadata_register_threshold_callback
 };
 
 /*----------------------------------------------------------------*/
@@ -517,11 +589,39 @@ static struct dm_space_map bootstrap_ops
 	.new_block = sm_bootstrap_new_block,
 	.commit = sm_bootstrap_commit,
 	.root_size = sm_bootstrap_root_size,
-	.copy_root = sm_bootstrap_copy_root
+	.copy_root = sm_bootstrap_copy_root,
+	.register_threshold_callback = NULL
 };
 
 /*----------------------------------------------------------------*/
 
+static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
+{
+	int r, i;
+	enum allocation_event ev;
+	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+	dm_block_t old_len = smm->ll.nr_blocks;
+
+	/*
+	 * We flick into a mode where all blocks get allocated in the new
+	 * area, ...
+	 */
+	smm->begin = old_len;
+	memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
+
+	/* ... extend, ... */
+	r = sm_ll_extend(&smm->ll, extra_blocks);
+
+	/* ... then switch back to normal behaviour. */
+	memcpy(&smm->sm, &ops, sizeof(smm->sm));
+	for (i = old_len; !r && i < smm->begin; i++)
+		r = sm_ll_inc(&smm->ll, i, &ev);
+
+	return r;
+}
+
+/*----------------------------------------------------------------*/
+
 struct dm_space_map *dm_sm_metadata_init(void)
 {
 	struct sm_metadata *smm;
@@ -549,6 +649,7 @@ int dm_sm_metadata_create(struct dm_spac
 	smm->recursion_count = 0;
 	smm->allocated_this_transaction = 0;
 	smm->nr_uncommitted = 0;
+	threshold_init(&smm->threshold);
 
 	memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
 
@@ -590,6 +691,7 @@ int dm_sm_metadata_open(struct dm_space_
 	smm->recursion_count = 0;
 	smm->allocated_this_transaction = 0;
 	smm->nr_uncommitted = 0;
+	threshold_init(&smm->threshold);
 
 	memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
 	return 0;
Index: linux/drivers/md/persistent-data/dm-space-map.h
===================================================================
--- linux.orig/drivers/md/persistent-data/dm-space-map.h
+++ linux/drivers/md/persistent-data/dm-space-map.h
@@ -9,6 +9,8 @@
 
 #include "dm-block-manager.h"
 
+typedef void (*dm_sm_threshold_fn)(void *context);
+
 /*
  * struct dm_space_map keeps a record of how many times each block in a device
  * is referenced.  It needs to be fixed on disk as part of the transaction.
@@ -53,6 +55,15 @@ struct dm_space_map {
 	int (*new_block)(struct dm_space_map *sm, dm_block_t *b);
 
 	/*
+	 * You can register 1 threshold callback.  This is edge triggered
+	 * when the free space in the space map drops below the threshold.
+	 */
+	int (*register_threshold_callback)(struct dm_space_map *sm,
+					   dm_block_t threshold,
+					   dm_sm_threshold_fn fn,
+					   void *context);
+
+	/*
 	 * The root contains all the information needed to fix the space map.
 	 * Generally this info is small, so squirrel it away in a disk block
 	 * along with other info.
@@ -121,6 +132,17 @@ static inline int dm_sm_new_block(struct
 	return sm->new_block(sm, b);
 }
 
+static inline int dm_sm_register_threshold_callback(struct dm_space_map *sm,
+						    dm_block_t threshold,
+						    dm_sm_threshold_fn fn,
+						    void *context)
+{
+	if (sm->register_threshold_callback)
+		return sm->register_threshold_callback(sm, threshold, fn, context);
+
+	return -EINVAL;
+}
+
 static inline int dm_sm_root_size(struct dm_space_map *sm, size_t *result)
 {
 	return sm->root_size(sm, result);
