<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">From: Mike Snitzer &lt;snitzer@redhat.com&gt;

Permit in-use snapshot exception data to be 'handed over' from one
snapshot instance to another.  This is a pre-requisite for patches
that allow the changes made in a snapshot device to be merged back into
its origin device and also allows device resizing.

The basic call sequence is:

  dmsetup load new_snapshot (referencing the existing in-use cow device)
     - the ctr code detects that the cow is already in use and links the
       two snapshot target instances together
  dmsetup suspend original_snapshot
  dmsetup resume new_snapshot
     - the new_snapshot becomes live, and if anything now tries to access
       the original one it will receive EIO
  dmsetup remove original_snapshot

(There can only be two snapshot targets referencing the same cow device
simultaneously.)

Snapshot locking is such that:
0) snapshot that is passed to find_snapshot_using_cow() is not locked
1) only need handover-source lock to determine if handover is needed
   - handover-source lock is primary lock used in handover code paths
   - only need handover-destination lock before handover_exceptions()
2) handover-source lock is taken before handover-destination lock
   - but this is only ever needed before calling handover_exceptions()

Signed-off-by: Mike Snitzer &lt;snitzer@redhat.com&gt;

---
 drivers/md/dm-snap.c |  204 ++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 195 insertions(+), 9 deletions(-)

Index: linux-2.6.32-rc6/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.32-rc6.orig/drivers/md/dm-snap.c
+++ linux-2.6.32-rc6/drivers/md/dm-snap.c
@@ -75,6 +75,15 @@ struct dm_snapshot {
 	/* Whether or not owning mapped_device is suspended */
 	int suspended;
 
+	/*
+	 * Indicate that this snapshot will hand over its exception store
+	 * to another snapshot instance created using the same block
+	 * device as its cow.  It is set in snapshot_ctr and cleared
+	 * in snapshot_resume after the handover or in snapshot_dtr if
+	 * the handover is cancelled.
+	 */
+	int is_handover_source;
+
 	mempool_t *pending_pool;
 
 	atomic_t pending_exceptions_count;
@@ -362,7 +371,7 @@ static void unregister_snapshot(struct d
 	o = __lookup_origin(s-&gt;origin-&gt;bdev);
 
 	list_del(&amp;s-&gt;list);
-	if (list_empty(&amp;o-&gt;snapshots)) {
+	if (o &amp;&amp; list_empty(&amp;o-&gt;snapshots)) {
 		list_del(&amp;o-&gt;hash_list);
 		kfree(o);
 	}
@@ -540,6 +549,42 @@ static int dm_add_exception(void *contex
 	return 0;
 }
 
+/*
+ * Is there already an active snapshot using the same cow?
+ */
+static struct dm_snapshot *find_snapshot_using_cow(struct dm_snapshot *snap)
+{
+	struct dm_snapshot *s, *snap_src = NULL;
+	struct origin *o;
+	int active;
+
+	down_read(&amp;_origins_lock);
+
+	o = __lookup_origin(snap-&gt;origin-&gt;bdev);
+	if (!o)
+		goto out;
+
+	list_for_each_entry(s, &amp;o-&gt;snapshots, list) {
+		if (!bdev_equal(s-&gt;cow-&gt;bdev, snap-&gt;cow-&gt;bdev))
+			continue;
+
+		down_write(&amp;s-&gt;lock);
+		active = s-&gt;active;
+		up_write(&amp;s-&gt;lock);
+
+		if (!active)
+			continue;
+
+		snap_src = s;
+		break;
+	}
+
+out:
+	up_read(&amp;_origins_lock);
+
+	return snap_src;
+}
+
 #define min_not_zero(l, r) (((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r)))
 
 /*
@@ -615,7 +660,7 @@ static int init_hash_tables(struct dm_sn
  */
 static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
-	struct dm_snapshot *s;
+	struct dm_snapshot *s, *snap_src;
 	int i;
 	int r = -EINVAL;
 	char *origin_path, *cow_path;
@@ -671,7 +716,9 @@ static int snapshot_ctr(struct dm_target
 	s-&gt;active = 0;
 	s-&gt;suspended = 0;
 	atomic_set(&amp;s-&gt;pending_exceptions_count, 0);
+	s-&gt;is_handover_source = 0;
 	init_rwsem(&amp;s-&gt;lock);
+	INIT_LIST_HEAD(&amp;s-&gt;list);
 	spin_lock_init(&amp;s-&gt;pe_lock);
 
 	/* Allocate hash table for COW data */
@@ -706,6 +753,39 @@ static int snapshot_ctr(struct dm_target
 
 	spin_lock_init(&amp;s-&gt;tracked_chunk_lock);
 
+	/*
+	 * Does snapshot need exceptions handing over to it?
+	 */
+	snap_src = find_snapshot_using_cow(s);
+// FIXME What stops snap_src being freed here?  (existing handover, resume new snap, remove old)
+	if (snap_src) {
+		down_write(&amp;snap_src-&gt;lock);
+		if (snap_src-&gt;is_handover_source) {
+			up_write(&amp;snap_src-&gt;lock);
+			ti-&gt;error = "Unable to handover snapshot to "
+				    "two devices at once.";
+			goto bad_load_and_register;
+		}
+		snap_src-&gt;is_handover_source = 1;
+		up_write(&amp;snap_src-&gt;lock);
+	}
+
+	bio_list_init(&amp;s-&gt;queued_bios);
+	INIT_WORK(&amp;s-&gt;queued_bios_work, flush_queued_bios);
+
+	ti-&gt;private = s;
+	ti-&gt;num_flush_requests = 1;
+
+	/*
+	 * Defer snapshot registration until device is resumed.
+	 * Chunk size will be set during the handover. We clear it
+	 * here so we can tell if the handover got cancelled.
+	 */
+	if (snap_src) {
+		s-&gt;store-&gt;chunk_size = 0;
+		return 0;
+	}
+
 	/* Metadata must only be loaded into one table at once */
 	r = s-&gt;store-&gt;type-&gt;read_metadata(s-&gt;store, dm_add_exception,
 					  (void *)s);
@@ -717,13 +797,11 @@ static int snapshot_ctr(struct dm_target
 		DMWARN("Snapshot is marked invalid.");
 	}
 
-	bio_list_init(&amp;s-&gt;queued_bios);
-	INIT_WORK(&amp;s-&gt;queued_bios_work, flush_queued_bios);
-
 	if (!s-&gt;store-&gt;chunk_size) {
 		ti-&gt;error = "Chunk size not set";
 		goto bad_load_and_register;
 	}
+	ti-&gt;split_io = s-&gt;store-&gt;chunk_size;
 
 	/* Add snapshot to the list of snapshots for this origin */
 	/* Exceptions aren't triggered till snapshot_resume() is called */
@@ -733,10 +811,6 @@ static int snapshot_ctr(struct dm_target
 		goto bad_load_and_register;
 	}
 
-	ti-&gt;private = s;
-	ti-&gt;split_io = s-&gt;store-&gt;chunk_size;
-	ti-&gt;num_flush_requests = 1;
-
 	return 0;
 
 bad_load_and_register:
@@ -777,15 +851,70 @@ static void __free_exceptions(struct dm_
 	dm_exception_table_exit(&amp;s-&gt;complete, exception_cache);
 }
 
+static void handover_exceptions(struct dm_snapshot *snap_src,
+				struct dm_snapshot *snap_dest)
+{
+	union {
+		struct dm_exception_table table_swap;
+		struct dm_exception_store *store_swap;
+	} u;
+
+	BUG_ON((snap_src-&gt;is_handover_source != 1) ||
+	       (snap_dest-&gt;is_handover_source != 0));
+
+	/* swap exceptions tables and stores */
+	u.table_swap = snap_dest-&gt;complete;
+	snap_dest-&gt;complete = snap_src-&gt;complete;
+	snap_src-&gt;complete = u.table_swap;
+	u.store_swap = snap_dest-&gt;store;
+	snap_dest-&gt;store = snap_src-&gt;store;
+	snap_src-&gt;store = u.store_swap;
+
+	snap_dest-&gt;store-&gt;snap = snap_dest;
+	snap_src-&gt;store-&gt;snap = snap_src;
+
+	/* reset split_io to store's chunk_size */
+	if (snap_dest-&gt;ti-&gt;split_io != snap_dest-&gt;store-&gt;chunk_size)
+		snap_dest-&gt;ti-&gt;split_io = snap_dest-&gt;store-&gt;chunk_size;
+
+	/* transfer 'valid' state, mark snap_src snapshot invalid */
+	snap_dest-&gt;valid = snap_src-&gt;valid;
+	snap_src-&gt;valid = 0;
+
+	/* mark snap_src as inactive */
+	snap_src-&gt;active = 0;
+
+	snap_src-&gt;is_handover_source = 0;
+}
+
 static void snapshot_dtr(struct dm_target *ti)
 {
 #ifdef CONFIG_DM_DEBUG
 	int i;
 #endif
 	struct dm_snapshot *s = ti-&gt;private;
+	struct dm_snapshot *snap_src = NULL;
 
 	flush_workqueue(ksnapd);
 
+	/* Check if exception handover must be cancelled */
+	snap_src = find_snapshot_using_cow(s);
+	if (snap_src) {
+		down_write(&amp;snap_src-&gt;lock);
+		if (!snap_src-&gt;is_handover_source) {
+			up_write(&amp;snap_src-&gt;lock);
+			goto normal_snapshot;
+		}
+		if (s == snap_src) {
+			DMERR("Cancelling shapshot handover.");
+			s-&gt;valid = 0;
+		}
+		/* allow table_clear to cancel handover */
+		snap_src-&gt;is_handover_source = 0;
+		up_write(&amp;snap_src-&gt;lock);
+	}
+
+normal_snapshot:
 	/* Prevent further origin writes from using this snapshot. */
 	/* After this returns there can be no new kcopyd jobs. */
 	unregister_snapshot(s);
@@ -1198,9 +1327,65 @@ static void snapshot_postsuspend(struct 
 	up_write(&amp;s-&gt;lock);
 }
 
+static int snapshot_preresume(struct dm_target *ti)
+{
+	int r = 0;
+	struct dm_snapshot *s = ti-&gt;private;
+	struct dm_snapshot *snap_src = NULL;
+
+	snap_src = find_snapshot_using_cow(s);
+	if (snap_src) {
+		down_write(&amp;snap_src-&gt;lock);
+		if (snap_src-&gt;is_handover_source) {
+			if (s == snap_src) {
+				DMERR("Unable to resume snapshot: complete "
+				      "or cancel exception handover first.");
+				r = -EINVAL;
+			} else if (!snap_src-&gt;suspended) {
+				DMERR("Unable to perform snapshot handover: "
+				      "suspend source device first.");
+				r = -EINVAL;
+			}
+		}
+		up_write(&amp;snap_src-&gt;lock);
+	}
+
+	if (s-&gt;store-&gt;chunk_size) {
+		DMERR("Unable to resume snapshot from cancelled handover.");
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
 static void snapshot_resume(struct dm_target *ti)
 {
 	struct dm_snapshot *s = ti-&gt;private;
+	struct dm_snapshot *snap_src = NULL;
+	struct dm_snapshot *snap_dest = NULL;
+
+	/* Check if snapshot needs exceptions handed over to it */
+	snap_src = find_snapshot_using_cow(s);
+	if (snap_src) {
+		down_write_nested(&amp;snap_src-&gt;lock, SINGLE_DEPTH_NESTING);
+		if (snap_src-&gt;is_handover_source) {
+			BUG_ON(s == snap_src);
+			snap_dest = s;
+			down_write(&amp;snap_dest-&gt;lock);
+			/* Get exception store from another snapshot */
+			handover_exceptions(snap_src, snap_dest);
+			up_write(&amp;snap_dest-&gt;lock);
+		}
+		up_write(&amp;snap_src-&gt;lock);
+
+		if (snap_dest &amp;&amp; register_snapshot(snap_dest, 1)) {
+			DMERR("Unable to register snapshot "
+			      "after exception handover.");
+			down_write(&amp;snap_dest-&gt;lock);
+			snap_dest-&gt;valid = 0;
+			up_write(&amp;snap_dest-&gt;lock);
+		}
+	}
 
 	down_write(&amp;s-&gt;lock);
 	s-&gt;active = 1;
@@ -1518,6 +1703,7 @@ static struct target_type snapshot_targe
 	.map     = snapshot_map,
 	.end_io  = snapshot_end_io,
 	.postsuspend = snapshot_postsuspend,
+	.preresume  = snapshot_preresume,
 	.resume  = snapshot_resume,
 	.status  = snapshot_status,
 	.iterate_devices = snapshot_iterate_devices,
</pre></body></html>