From: Jonathan Brassow <jbrassow@redhat.com>

This patch gives mirror the ability to handle write failures
during recovery.

When kcopyd finishes resynchronizing a mirror region, it
calls recovery_complete() with the results - which are
currently ignored.  This patch checks over the bits in
'write_err' and calls a new function, fail_mirror, on those
devices whose bit is set.  'fail_mirror' increments the
as-yet-unused error_count on the mirror device, and will
switch the primary device pointer for the mirror set if the
mirror is in-sync.

To maintain backwards compatibility, fail_mirror does nothing
if the DM_FEATURES_HANDLE_ERRORS flag is not present.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

---
 drivers/md/dm-raid1.c |   58 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 53 insertions(+), 5 deletions(-)

Index: linux-2.6.24-rc5/drivers/md/dm-raid1.c
===================================================================
--- linux-2.6.24-rc5.orig/drivers/md/dm-raid1.c	2007-12-12 12:59:53.000000000 +0000
+++ linux-2.6.24-rc5/drivers/md/dm-raid1.c	2007-12-12 15:43:59.000000000 +0000
@@ -646,6 +646,35 @@ static void bio_set_ms(struct bio *bio, 
 	bio->bi_next = (struct bio *) ms;
 }
 
+/* fail_mirror
+ * @m: mirror device to fail
+ *
+ * If the device is valid, mark it invalid.  Also,
+ * if this is the default mirror device (i.e. the primary
+ * device) and the mirror set is in-sync, choose an
+ * alternative primary device.
+ *
+ * This function must not block.
+ */
+static void fail_mirror(struct mirror *m)
+{
+	struct mirror_set *ms = m->ms;
+	struct mirror *new;
+
+	if (!errors_handled(ms))
+		return;
+
+	if (atomic_inc_return(&m->error_count) > 1)
+		return;
+
+	/*
+	 * Currently, fail_mirror never happens on the default_mirror.
+	 * IOW, the default_mirror currently never changes.
+	 */
+	if (m == ms->default_mirror)
+		BUG();
+}
+
 /*-----------------------------------------------------------------
  * Recovery.
  *
@@ -656,16 +685,34 @@ static void bio_set_ms(struct bio *bio, 
 static void recovery_complete(int read_err, unsigned int write_err,
 			      void *context)
 {
-	struct region *reg = (struct region *) context;
+	struct region *reg = (struct region *)context;
+	struct mirror_set *ms = reg->rh->ms;
+	unsigned long write_err_ulong = (unsigned long)write_err;
+	unsigned m;
+	int bit = 0;
 
 	if (read_err)
-		/* Read error means the failure of default mirror. */
 		DMERR_LIMIT("Unable to read primary mirror during recovery");
 
-	if (write_err)
-		DMERR_LIMIT("Write error during recovery (error = 0x%x)",
-			    write_err);
+	if (!write_err)
+		goto out;
+
+	DMERR_LIMIT("Write error during recovery: 0x%x", write_err);
+
+	/*
+	 * Bits correspond to devices excluding default mirror.
+	 * The default mirror cannot change during recovery.
+	 */
+	for (m = 0; m < ms->nr_mirrors; m++) {
+		if (&ms->mirror[m] == ms->default_mirror)
+			continue;
+
+		if (test_bit(bit, &write_err_ulong))
+			fail_mirror(ms->mirror + m);
+		bit++;
+	}
 
+      out:
 	rh_recovery_end(reg, !(read_err || write_err));
 }
 
@@ -1019,6 +1066,7 @@ static int get_mirror(struct mirror_set 
 	}
 
 	ms->mirror[mirror].ms = ms;
+	atomic_set(&(ms->mirror[mirror].error_count), 0);
 	ms->mirror[mirror].offset = offset;
 
 	return 0;
