diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-05 17:52:22 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-05 17:52:22 -0700 |
| commit | 2a013e37ce691a7c072df27b35e9790fc8f5a82f (patch) | |
| tree | 0d81e512ebb93484602a0802396ed7c1f465d3b4 /drivers/md/raid1.c | |
| parent | 17447717a3266965e257d3eae79d89539ce3ec0a (diff) | |
| parent | e89c6fdf9e0eb1b5a03574d4ca73e83eae8deb91 (diff) | |
Merge tag 'md/4.3' of git://neil.brown.name/md
Pull md updates from Neil Brown:
- an assortment of little fixes, several for minor races only likely to
be hit during testing
- further cluster-md-raid1 development, not ready for real use yet.
- new RAID6 syndrome code for ARM NEON
- fix a race where a write can return before failure of one device is
properly recorded in metadata, so an immediate crash might result in
that write being lost.
* tag 'md/4.3' of git://neil.brown.name/md: (33 commits)
md/raid5: ensure device failure recorded before write request returns.
md/raid5: use bio_list for the list of bios to return.
md/raid10: ensure device failure recorded before write request returns.
md/raid1: ensure device failure recorded before write request returns.
md-cluster: remove inappropriate try_module_get from join()
md: extend spinlock protection in register_md_cluster_operations
md-cluster: Read the disk bitmap sb and check if it needs recovery
md-cluster: only call complete(&cinfo->completion) when node join cluster
md-cluster: add missed lockres_free
md-cluster: remove the unused sb_lock
md-cluster: init suspend_list and suspend_lock early in join
md-cluster: add the error check if failed to get dlm lock
md-cluster: init completion within lockres_init
md-cluster: fix deadlock issue on message lock
md-cluster: transfer the resync ownership to another node
md-cluster: split recover_slot for future code reuse
md-cluster: use %pU to print UUIDs
md: setup safemode_timer before it's being used
md/raid5: handle possible race as reshape completes.
md: sync sync_completed has correct value as recovery finishes.
...
Diffstat (limited to 'drivers/md/raid1.c')
| -rw-r--r-- | drivers/md/raid1.c | 30 |
1 files changed, 29 insertions, 1 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f39d69f884de..4517f06c41ba 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1474,6 +1474,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) */ set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_CHANGE_DEVS, &mddev->flags); + set_bit(MD_CHANGE_PENDING, &mddev->flags); printk(KERN_ALERT "md/raid1:%s: Disk failure on %s, disabling device.\n" "md/raid1:%s: Operation continuing on %d devices.\n", @@ -2235,6 +2236,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) { int m; + bool fail = false; for (m = 0; m < conf->raid_disks * 2 ; m++) if (r1_bio->bios[m] == IO_MADE_GOOD) { struct md_rdev *rdev = conf->mirrors[m].rdev; @@ -2247,6 +2249,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) * narrow down and record precise write * errors. */ + fail = true; if (!narrow_write_error(r1_bio, m)) { md_error(conf->mddev, conf->mirrors[m].rdev); @@ -2258,7 +2261,13 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) } if (test_bit(R1BIO_WriteError, &r1_bio->state)) close_write(r1_bio); - raid_end_bio_io(r1_bio); + if (fail) { + spin_lock_irq(&conf->device_lock); + list_add(&r1_bio->retry_list, &conf->bio_end_io_list); + spin_unlock_irq(&conf->device_lock); + md_wakeup_thread(conf->mddev->thread); + } else + raid_end_bio_io(r1_bio); } static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) @@ -2364,6 +2373,23 @@ static void raid1d(struct md_thread *thread) md_check_recovery(mddev); + if (!list_empty_careful(&conf->bio_end_io_list) && + !test_bit(MD_CHANGE_PENDING, &mddev->flags)) { + LIST_HEAD(tmp); + spin_lock_irqsave(&conf->device_lock, flags); + if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) { + list_add(&tmp, &conf->bio_end_io_list); + list_del_init(&conf->bio_end_io_list); + } + spin_unlock_irqrestore(&conf->device_lock, flags); + while (!list_empty(&tmp)) { + r1_bio = list_first_entry(&conf->bio_end_io_list, + struct r1bio, retry_list); + list_del(&r1_bio->retry_list); + raid_end_bio_io(r1_bio); + } + } + blk_start_plug(&plug); for (;;) { @@ -2763,6 +2789,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) conf->raid_disks = mddev->raid_disks; conf->mddev = mddev; INIT_LIST_HEAD(&conf->retry_list); + INIT_LIST_HEAD(&conf->bio_end_io_list); spin_lock_init(&conf->resync_lock); init_waitqueue_head(&conf->wait_barrier); @@ -3057,6 +3084,7 @@ static int raid1_reshape(struct mddev *mddev) unfreeze_array(conf); + set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); |
