From: NeilBrown <neilb@cse.unsw.edu.au>

Instead of having a single end_request handler that must determine whether it
was a read or a write request, we have two separate handlers, which makes
each of them easier to follow.



---

 drivers/md/raid1.c |  102 ++++++++++++++++++++++++++++++++++-------------------
 1 files changed, 66 insertions(+), 36 deletions(-)

diff -puN drivers/md/raid1.c~md-02-split-end_request-handlers drivers/md/raid1.c
--- 25/drivers/md/raid1.c~md-02-split-end_request-handlers	2004-02-05 22:05:09.000000000 -0800
+++ 25-akpm/drivers/md/raid1.c	2004-02-05 22:05:09.000000000 -0800
@@ -261,7 +261,7 @@ static inline void update_head_pos(int d
 		r1_bio->sector + (r1_bio->master_bio->bi_size >> 9);
 }
 
-static int raid1_end_request(struct bio *bio, unsigned int bytes_done, int error)
+static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int error)
 {
 	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
@@ -271,13 +271,7 @@ static int raid1_end_request(struct bio 
 	if (bio->bi_size)
 		return 1;
 	
-	if (r1_bio->cmd == READ || r1_bio->cmd == READA)
-		mirror = r1_bio->read_disk;
-	else {
-		for (mirror = 0; mirror < conf->raid_disks; mirror++)
-			if (r1_bio->write_bios[mirror] == bio)
-				break;
-	}
+	mirror = r1_bio->read_disk;
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
@@ -296,42 +290,78 @@ static int raid1_end_request(struct bio 
 		set_bit(R1BIO_Uptodate, &r1_bio->state);
 
 	update_head_pos(mirror, r1_bio);
-	if ((r1_bio->cmd == READ) || (r1_bio->cmd == READA)) {
-		if (!r1_bio->read_bio)
-			BUG();
+
+	if (!r1_bio->read_bio)
+		BUG();
+	/*
+	 * we have only one bio on the read side
+	 */
+	if (uptodate)
+		raid_end_bio_io(r1_bio);
+	else {
 		/*
-		 * we have only one bio on the read side
+		 * oops, read error:
 		 */
-		if (uptodate)
-			raid_end_bio_io(r1_bio);
-		else {
-			/*
-			 * oops, read error:
-			 */
-			char b[BDEVNAME_SIZE];
-			printk(KERN_ERR "raid1: %s: rescheduling sector %llu\n",
-				bdevname(conf->mirrors[mirror].rdev->bdev,b), (unsigned long long)r1_bio->sector);
-			reschedule_retry(r1_bio);
-		}
-	} else {
+		char b[BDEVNAME_SIZE];
+		printk(KERN_ERR "raid1: %s: rescheduling sector %llu\n",
+		       bdevname(conf->mirrors[mirror].rdev->bdev,b), (unsigned long long)r1_bio->sector);
+		reschedule_retry(r1_bio);
+	}
 
-		if (r1_bio->read_bio)
-			BUG();
+	atomic_dec(&conf->mirrors[mirror].rdev->nr_pending);
+	return 0;
+}
+
+static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int error)
+{
+	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+	r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
+	int mirror;
+	conf_t *conf = mddev_to_conf(r1_bio->mddev);
+
+	if (bio->bi_size)
+		return 1;
+
+	for (mirror = 0; mirror < conf->raid_disks; mirror++)
+		if (r1_bio->write_bios[mirror] == bio)
+			break;
+
+	/*
+	 * this branch is our 'one mirror IO has finished' event handler:
+	 */
+	if (!uptodate)
+		md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
+	else
 		/*
-		 * WRITE:
+		 * Set R1BIO_Uptodate in our master bio, so that
+		 * we will return a good error code for to the higher
+		 * levels even if IO on some other mirrored buffer fails.
 		 *
-		 * Let's see if all mirrored write operations have finished
-		 * already.
+		 * The 'master' represents the composite IO operation to
+		 * user-side. So if something waits for IO, then it will
+		 * wait for the 'master' bio.
 		 */
-		if (atomic_dec_and_test(&r1_bio->remaining)) {
-			md_write_end(r1_bio->mddev);
-			raid_end_bio_io(r1_bio);
-		}	
-	}
+		set_bit(R1BIO_Uptodate, &r1_bio->state);
+
+	update_head_pos(mirror, r1_bio);
+
+	if (r1_bio->read_bio)
+		BUG();
+	/*
+	 *
+	 * Let's see if all mirrored write operations have finished
+	 * already.
+	 */
+	if (atomic_dec_and_test(&r1_bio->remaining)) {
+		md_write_end(r1_bio->mddev);
+		raid_end_bio_io(r1_bio);
+	}
+
 	atomic_dec(&conf->mirrors[mirror].rdev->nr_pending);
 	return 0;
 }
 
+
 /*
  * This routine returns the disk from which the requested read should
  * be done. There is a per-array 'next expected sequential IO' sector
@@ -508,7 +538,7 @@ static int make_request(request_queue_t 
 
 		read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
 		read_bio->bi_bdev = mirror->rdev->bdev;
-		read_bio->bi_end_io = raid1_end_request;
+		read_bio->bi_end_io = raid1_end_read_request;
 		read_bio->bi_rw = r1_bio->cmd;
 		read_bio->bi_private = r1_bio;
 
@@ -546,7 +576,7 @@ static int make_request(request_queue_t 
 
 		mbio->bi_sector	= r1_bio->sector + conf->mirrors[i].rdev->data_offset;
 		mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
-		mbio->bi_end_io	= raid1_end_request;
+		mbio->bi_end_io	= raid1_end_write_request;
 		mbio->bi_rw = r1_bio->cmd;
 		mbio->bi_private = r1_bio;
 

_