Bugfixes for md/raid1

particularly, but not only, fixing new "resync" code. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQIVAwUAVCIoAznsnt1WYoG5AQJDzRAAtciwFilYXxu8M7fPOQ/HZeoMtNLVX0dK cvL5yRhNxfoGLIG7TEeb5Wvd8cxHNR5t4x+jGmipJ7cTGE4S6Edgdpy2yhHDFBdo AyGgYreX441P07cefPUUa9nTVFlqx2TzJa+SR75CmBwbuZpx52kfHK9KMXWljY+Q Hm60k34tK4zzC5Tm2E7aeegFjUaIAwrpt3TOJlh8E/JiEQDsVz2+o+7RFwPXrXgm YnxfpaAcw5XcanlUj0q6r6O86hhItO54sBBcTtTNZtD7oZC82/OYj6SxlG0V3D2a wBFouI518Rf0TmdtG3XgPAfI0eCZyowZtYmpoYX+/8rkGSy2ZmJfxSY2NzmGBmX4 LtH0tYkp2qSu6WCXUMPOLmPRqQuT6iX4ho7KCNMr2n05kHMom/InNUajWUvqPFdE eBs27u9HngTVCTMpwdCfFV/qWXszEhpp9wyzAv5zRV7gyc3hZM3cQ1iV2GKor8Ka wSTeDT+gY9J2sCJgqx7li45jpsZPzayupwW+hBvieKeY6/fM1leur4Ji/mcRXytK YUci6fiy2kwxs1uzFq7Kra3Y5gqGq+S6HCspmZTtstzFxbKcMTmOC1B2ukKDPvGS HwXnQ6w+fXmF/+fXWD98++ET80rWj6utXBJhSGhkdQcyYRz5DU/2GsLsA4yvho4N Dbo2gIjTtD8= =gMsu -----END PGP SIGNATURE----- Merge tag 'md/3.17-more-fixes' of git://git.neil.brown.name/md Pull bugfixes for md/raid1 from Neil Brown: "It is amazing how much easier it is to find bugs when you know one is there. Two bug reports resulted in finding 7 bugs! All are tagged for -stable. Those that can't cause (rare) data corruption, cause lockups. Particularly, but not only, fixing new "resync" code" * tag 'md/3.17-more-fixes' of git://git.neil.brown.name/md: md/raid1: fix_read_error should act on all non-faulty devices. md/raid1: count resync requests in nr_pending. md/raid1: update next_resync under resync_lock. md/raid1: Don't use next_resync to determine how far resync has progressed md/raid1: make sure resync waits for conflicting writes to complete. md/raid1: clean up request counts properly in close_sync() md/raid1: be more cautious where we read-balance during resync. md/raid1: intialise start_next_window for READ case to avoid hang
2014-09-24 08:53:33 -07:00 · 2014-09-24 08:53:33 -07:00 · a90e41e228
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@ -540,11 +540,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
 	has_nonrot_disk = 0;
 	choose_next_idle = 0;

-	if (conf->mddev->recovery_cp < MaxSector &&
-	    (this_sector + sectors >= conf->next_resync))
-		choose_first = 1;
-	else
-		choose_first = 0;
+	choose_first = (conf->mddev->recovery_cp < this_sector + sectors);

 	for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
 		sector_t dist;
@ -831,7 +827,7 @@ static void flush_pending_writes(struct r1conf *conf)
 *    there is no normal IO happeing.  It must arrange to call
 *    lower_barrier when the particular background IO completes.
 */
-static void raise_barrier(struct r1conf *conf)
+static void raise_barrier(struct r1conf *conf, sector_t sector_nr)
 {
 	spin_lock_irq(&conf->resync_lock);

@ -841,6 +837,7 @@ static void raise_barrier(struct r1conf *conf)

 	/* block any new IO from starting */
 	conf->barrier++;
+	conf->next_resync = sector_nr;

 	/* For these conditions we must wait:
 	 * A: while the array is in frozen state
@ -849,14 +846,17 @@ static void raise_barrier(struct r1conf *conf)
 	 * C: next_resync + RESYNC_SECTORS > start_next_window, meaning
 	 *    next resync will reach to the window which normal bios are
 	 *    handling.
+	 * D: while there are any active requests in the current window.
 	 */
 	wait_event_lock_irq(conf->wait_barrier,
 			    !conf->array_frozen &&
 			    conf->barrier < RESYNC_DEPTH &&
+			    conf->current_window_requests == 0 &&
 			    (conf->start_next_window >=
 			     conf->next_resync + RESYNC_SECTORS),
 			    conf->resync_lock);

+	conf->nr_pending++;
 	spin_unlock_irq(&conf->resync_lock);
 }

@ -866,6 +866,7 @@ static void lower_barrier(struct r1conf *conf)
 	BUG_ON(conf->barrier <= 0);
 	spin_lock_irqsave(&conf->resync_lock, flags);
 	conf->barrier--;
+	conf->nr_pending--;
 	spin_unlock_irqrestore(&conf->resync_lock, flags);
 	wake_up(&conf->wait_barrier);
 }
@ -877,12 +878,10 @@ static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio)
 	if (conf->array_frozen || !bio)
 		wait = true;
 	else if (conf->barrier && bio_data_dir(bio) == WRITE) {
-		if (conf->next_resync < RESYNC_WINDOW_SECTORS)
-			wait = true;
-		else if ((conf->next_resync - RESYNC_WINDOW_SECTORS
-				>= bio_end_sector(bio)) ||
-			 (conf->next_resync + NEXT_NORMALIO_DISTANCE
-				<= bio->bi_iter.bi_sector))
+		if ((conf->mddev->curr_resync_completed
+		     >= bio_end_sector(bio)) ||
+		    (conf->next_resync + NEXT_NORMALIO_DISTANCE
+		     <= bio->bi_iter.bi_sector))
 			wait = false;
 		else
 			wait = true;
@ -919,8 +918,8 @@ static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
 	}

 	if (bio && bio_data_dir(bio) == WRITE) {
-		if (conf->next_resync + NEXT_NORMALIO_DISTANCE
-		    <= bio->bi_iter.bi_sector) {
+		if (bio->bi_iter.bi_sector >=
+		    conf->mddev->curr_resync_completed) {
 			if (conf->start_next_window == MaxSector)
 				conf->start_next_window =
 					conf->next_resync +
@ -1186,6 +1185,7 @@ read_again:
 				   atomic_read(&bitmap->behind_writes) == 0);
 		}
 		r1_bio->read_disk = rdisk;
+		r1_bio->start_next_window = 0;

 		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
 		bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
@ -1548,8 +1548,13 @@ static void close_sync(struct r1conf *conf)
 	mempool_destroy(conf->r1buf_pool);
 	conf->r1buf_pool = NULL;

+	spin_lock_irq(&conf->resync_lock);
 	conf->next_resync = 0;
 	conf->start_next_window = MaxSector;
+	conf->current_window_requests +=
+		conf->next_window_requests;
+	conf->next_window_requests = 0;
+	spin_unlock_irq(&conf->resync_lock);
 }

 static int raid1_spare_active(struct mddev *mddev)
@ -2150,7 +2155,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
 			d--;
 			rdev = conf->mirrors[d].rdev;
 			if (rdev &&
-			    test_bit(In_sync, &rdev->flags))
+			    !test_bit(Faulty, &rdev->flags))
 				r1_sync_page_io(rdev, sect, s,
 						conf->tmppage, WRITE);
 		}
@ -2162,7 +2167,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
 			d--;
 			rdev = conf->mirrors[d].rdev;
 			if (rdev &&
-			    test_bit(In_sync, &rdev->flags)) {
+			    !test_bit(Faulty, &rdev->flags)) {
 				if (r1_sync_page_io(rdev, sect, s,
 						    conf->tmppage, READ)) {
 					atomic_add(s, &rdev->corrected_errors);
@ -2541,9 +2546,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp

 	bitmap_cond_end_sync(mddev->bitmap, sector_nr);
 	r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
-	raise_barrier(conf);

-	conf->next_resync = sector_nr;
+	raise_barrier(conf, sector_nr);

 	rcu_read_lock();
 	/*