From: Mike Snitzer <snitzer@redhat.com>

The discard limits that were established for a thin-pool or thin device
may have been incompatible with the pool's data device.  Fix this by
checking the discard limits of the pool's data device accordingly.  If
an incompatibility is found then the pool's 'discard passdown' feature
is disabled.

Change thin_io_hints to ensure that a thin device always uses the same
queue limits as its pool device.

Introduce no_discard_passdown_flag_used to track whether or not the
table line originally contained the no_discard_passdown flag and use
this directly for table output so that we can set discard_passdown
directly in bind_control_target (called from pool_io_hints) rather
than waiting until we have access to pool->pf in pool_preresume.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Joe Thornber <ejt@redhat.com>

---
drivers/md/dm-thin.c |  171 ++++++++++++++++++++++++++++++++++++--------------
 drivers/md/dm-thin.c |   89 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 58 insertions(+), 31 deletions(-)

Index: linux/drivers/md/dm-thin.c
===================================================================
--- linux.orig/drivers/md/dm-thin.c
+++ linux/drivers/md/dm-thin.c
@@ -511,7 +511,8 @@ struct pool_features {
 
 	bool zero_new_blocks:1;
 	bool discard_enabled:1;
-	bool discard_passdown:1;
+	bool no_discard_passdown_flag_used:1;	/* Did the table line contain "no_discard_passdown"? */
+	bool discard_passdown:1;	/* Are discards actually passed down? */
 };
 
 struct thin_c;
@@ -1848,21 +1849,36 @@ static bool data_dev_supports_discard(st
 
 /*
  * If discard_passdown was enabled verify that the data device
- * supports discards.  Disable discard_passdown if not; otherwise
- * -EOPNOTSUPP will be returned.
+ * supports discards.  Disable discard_passdown if not.
  */
-static void disable_passdown_if_not_supported(struct pool_c *pt,
-					      struct pool_features *pf)
+static void disable_passdown_if_not_supported(struct pool_c *pt)
 {
+	struct pool *pool = pt->pool;
+	struct block_device *data_bdev = pt->data_dev->bdev;
+	struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits;
+	sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT;
+	const char *reason = NULL;
 	char buf[BDEVNAME_SIZE];
 
-	if (!pf->discard_passdown || data_dev_supports_discard(pt))
+	if (!pt->pf.discard_passdown)
 		return;
 
-	DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.",
-	       bdevname(pt->data_dev->bdev, buf));
+	if (!data_dev_supports_discard(pt))
+		reason = "discard unsupported";
+
+	else if (data_limits->max_discard_sectors < pool->sectors_per_block)
+		reason = "max discard sectors smaller than a block";
+
+	else if (data_limits->discard_granularity > block_size)
+		reason = "discard granularity larger than a block";
 
-	pf->discard_passdown = false;
+	else if (block_size & (data_limits->discard_granularity - 1))
+		reason = "discard granularity not a factor of block size";
+
+	if (reason) {
+		DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason);
+		pt->pf.discard_passdown = false;
+	}
 }
 
 static int bind_control_target(struct pool *pool, struct dm_target *ti)
@@ -1882,7 +1898,6 @@ static int bind_control_target(struct po
 	pool->low_water_blocks = pt->low_water_blocks;
 	pool->pf = pt->pf;
 
-	disable_passdown_if_not_supported(pt, &pool->pf);
 	set_pool_mode(pool, new_mode);
 
 	return 0;
@@ -1904,6 +1919,7 @@ static void pool_features_init(struct po
 	pf->zero_new_blocks = true;
 	pf->discard_enabled = true;
 	pf->discard_passdown = true;
+	pf->no_discard_passdown_flag_used = false;
 }
 
 static void __pool_destroy(struct pool *pool)
@@ -2136,8 +2152,10 @@ static int parse_pool_features(struct dm
 		else if (!strcasecmp(arg_name, "ignore_discard"))
 			pf->discard_enabled = false;
 
-		else if (!strcasecmp(arg_name, "no_discard_passdown"))
+		else if (!strcasecmp(arg_name, "no_discard_passdown")) {
+			pf->no_discard_passdown_flag_used = true;
 			pf->discard_passdown = false;
+		}
 
 		else if (!strcasecmp(arg_name, "read_only"))
 			pf->mode = PM_READ_ONLY;
@@ -2613,7 +2631,7 @@ static void emit_flags(struct pool_featu
 		       unsigned sz, unsigned maxlen)
 {
 	unsigned count = !pf->zero_new_blocks + !pf->discard_enabled +
-		!pf->discard_passdown + (pf->mode == PM_READ_ONLY);
+		pf->no_discard_passdown_flag_used + (pf->mode == PM_READ_ONLY);
 	DMEMIT("%u ", count);
 
 	if (!pf->zero_new_blocks)
@@ -2622,7 +2640,7 @@ static void emit_flags(struct pool_featu
 	if (!pf->discard_enabled)
 		DMEMIT("ignore_discard ");
 
-	if (!pf->discard_passdown)
+	if (pf->no_discard_passdown_flag_used)
 		DMEMIT("no_discard_passdown ");
 
 	if (pf->mode == PM_READ_ONLY)
@@ -2747,19 +2765,18 @@ static int pool_merge(struct dm_target *
 	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
 }
 
-static void set_discard_limits(struct pool *pool, struct queue_limits *limits)
+static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits)
 {
-	/*
-	 * FIXME: these limits may be incompatible with the pool's data device
-	 */
+	struct pool *pool = pt->pool;
+	struct queue_limits *data_limits;
+
 	limits->max_discard_sectors = pool->sectors_per_block;
 
-	/*
-	 * This is just a hint, and not enforced.  We have to cope with
-	 * bios that cover a block partially.  A discard that spans a block
-	 * boundary is not sent to this target.
-	 */
-	limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
+	if (pt->pf.discard_passdown) {
+		data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits;
+		limits->discard_granularity = data_limits->discard_granularity;
+	} else
+		limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
 }
 
 static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
@@ -2769,15 +2786,25 @@ static void pool_io_hints(struct dm_targ
 
 	blk_limits_io_min(limits, 0);
 	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
-	if (pool->pf.discard_enabled)
-		set_discard_limits(pool, limits);
+
+	/*
+	 * pt->pf is used here because it reflects the configured features that
+	 * will get transferred to the live pool in bind_control_target()
+	 * called from pool_preresume().
+	 */
+	if (!pt->pf.discard_enabled)
+		return;
+
+	disable_passdown_if_not_supported(pt);
+
+	set_discard_limits(pt, limits);
 }
 
 static struct target_type pool_target = {
 	.name = "thin-pool",
 	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
 		    DM_TARGET_IMMUTABLE,
-	.version = {1, 3, 0},
+	.version = {1, 4, 0},
 	.module = THIS_MODULE,
 	.ctr = pool_ctr,
 	.dtr = pool_dtr,
@@ -3056,19 +3083,19 @@ static int thin_iterate_devices(struct d
 	return 0;
 }
 
+/*
+ * A thin device always inherits its queue limits from its pool.
+ */
 static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
 	struct thin_c *tc = ti->private;
-	struct pool *pool = tc->pool;
 
-	blk_limits_io_min(limits, 0);
-	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
-	set_discard_limits(pool, limits);
+	*limits = bdev_get_queue(tc->pool_dev->bdev)->limits;
 }
 
 static struct target_type thin_target = {
 	.name = "thin",
-	.version = {1, 3, 0},
+	.version = {1, 4, 0},
 	.module	= THIS_MODULE,
 	.ctr = thin_ctr,
 	.dtr = thin_dtr,