diff options
Diffstat (limited to 'drivers/md/raid1.c')
| -rw-r--r-- | drivers/md/raid1.c | 100 | 
1 files changed, 74 insertions, 26 deletions
| diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a368db2431a5..4a40a200d769 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -523,6 +523,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect  		rdev = rcu_dereference(conf->mirrors[disk].rdev);  		if (r1_bio->bios[disk] == IO_BLOCKED  		    || rdev == NULL +		    || test_bit(Unmerged, &rdev->flags)  		    || test_bit(Faulty, &rdev->flags))  			continue;  		if (!test_bit(In_sync, &rdev->flags) && @@ -614,6 +615,39 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect  	return best_disk;  } +static int raid1_mergeable_bvec(struct request_queue *q, +				struct bvec_merge_data *bvm, +				struct bio_vec *biovec) +{ +	struct mddev *mddev = q->queuedata; +	struct r1conf *conf = mddev->private; +	sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); +	int max = biovec->bv_len; + +	if (mddev->merge_check_needed) { +		int disk; +		rcu_read_lock(); +		for (disk = 0; disk < conf->raid_disks * 2; disk++) { +			struct md_rdev *rdev = rcu_dereference( +				conf->mirrors[disk].rdev); +			if (rdev && !test_bit(Faulty, &rdev->flags)) { +				struct request_queue *q = +					bdev_get_queue(rdev->bdev); +				if (q->merge_bvec_fn) { +					bvm->bi_sector = sector + +						rdev->data_offset; +					bvm->bi_bdev = rdev->bdev; +					max = min(max, q->merge_bvec_fn( +							  q, bvm, biovec)); +				} +			} +		} +		rcu_read_unlock(); +	} +	return max; + +} +  int md_raid1_congested(struct mddev *mddev, int bits)  {  	struct r1conf *conf = mddev->private; @@ -624,7 +658,7 @@ int md_raid1_congested(struct mddev *mddev, int bits)  		return 1;  	rcu_read_lock(); -	for (i = 0; i < conf->raid_disks; i++) { +	for (i = 0; i < conf->raid_disks * 2; i++) {  		struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);  		if (rdev && !test_bit(Faulty, &rdev->flags)) {  			struct request_queue *q = bdev_get_queue(rdev->bdev); @@ -737,9 +771,22 @@ static void wait_barrier(struct r1conf *conf)  	spin_lock_irq(&conf->resync_lock);  	if (conf->barrier) {  		conf->nr_waiting++; -		wait_event_lock_irq(conf->wait_barrier, !conf->barrier, +		/* Wait for the barrier to drop. +		 * However if there are already pending +		 * requests (preventing the barrier from +		 * rising completely), and the +		 * pre-process bio queue isn't empty, +		 * then don't wait, as we need to empty +		 * that queue to get the nr_pending +		 * count down. +		 */ +		wait_event_lock_irq(conf->wait_barrier, +				    !conf->barrier || +				    (conf->nr_pending && +				     current->bio_list && +				     !bio_list_empty(current->bio_list)),  				    conf->resync_lock, -				    ); +			);  		conf->nr_waiting--;  	}  	conf->nr_pending++; @@ -1002,7 +1049,8 @@ read_again:  			break;  		}  		r1_bio->bios[i] = NULL; -		if (!rdev || test_bit(Faulty, &rdev->flags)) { +		if (!rdev || test_bit(Faulty, &rdev->flags) +		    || test_bit(Unmerged, &rdev->flags)) {  			if (i < conf->raid_disks)  				set_bit(R1BIO_Degraded, &r1_bio->state);  			continue; @@ -1322,6 +1370,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)  	struct mirror_info *p;  	int first = 0;  	int last = conf->raid_disks - 1; +	struct request_queue *q = bdev_get_queue(rdev->bdev);  	if (mddev->recovery_disabled == conf->recovery_disabled)  		return -EBUSY; @@ -1329,23 +1378,17 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)  	if (rdev->raid_disk >= 0)  		first = last = rdev->raid_disk; +	if (q->merge_bvec_fn) { +		set_bit(Unmerged, &rdev->flags); +		mddev->merge_check_needed = 1; +	} +  	for (mirror = first; mirror <= last; mirror++) {  		p = conf->mirrors+mirror;  		if (!p->rdev) {  			disk_stack_limits(mddev->gendisk, rdev->bdev,  					  rdev->data_offset << 9); -			/* as we don't honour merge_bvec_fn, we must -			 * never risk violating it, so limit -			 * ->max_segments to one lying with a single -			 * page, as a one page request is never in -			 * violation. -			 */ -			if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { -				blk_queue_max_segments(mddev->queue, 1); -				blk_queue_segment_boundary(mddev->queue, -							   PAGE_CACHE_SIZE - 1); -			}  			p->head_position = 0;  			rdev->raid_disk = mirror; @@ -1370,6 +1413,19 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)  			break;  		}  	} +	if (err == 0 && test_bit(Unmerged, &rdev->flags)) { +		/* Some requests might not have seen this new +		 * merge_bvec_fn.  We must wait for them to complete +		 * before merging the device fully. +		 * First we make sure any code which has tested +		 * our function has submitted the request, then +		 * we wait for all outstanding requests to complete. +		 */ +		synchronize_sched(); +		raise_barrier(conf); +		lower_barrier(conf); +		clear_bit(Unmerged, &rdev->flags); +	}  	md_integrity_add_rdev(rdev, mddev);  	print_conf(conf);  	return err; @@ -2491,7 +2547,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)  	err = -EINVAL;  	spin_lock_init(&conf->device_lock); -	list_for_each_entry(rdev, &mddev->disks, same_set) { +	rdev_for_each(rdev, mddev) {  		int disk_idx = rdev->raid_disk;  		if (disk_idx >= mddev->raid_disks  		    || disk_idx < 0) @@ -2609,20 +2665,11 @@ static int run(struct mddev *mddev)  	if (IS_ERR(conf))  		return PTR_ERR(conf); -	list_for_each_entry(rdev, &mddev->disks, same_set) { +	rdev_for_each(rdev, mddev) {  		if (!mddev->gendisk)  			continue;  		disk_stack_limits(mddev->gendisk, rdev->bdev,  				  rdev->data_offset << 9); -		/* as we don't honour merge_bvec_fn, we must never risk -		 * violating it, so limit ->max_segments to 1 lying within -		 * a single page, as a one page request is never in violation. -		 */ -		if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { -			blk_queue_max_segments(mddev->queue, 1); -			blk_queue_segment_boundary(mddev->queue, -						   PAGE_CACHE_SIZE - 1); -		}  	}  	mddev->degraded = 0; @@ -2656,6 +2703,7 @@ static int run(struct mddev *mddev)  	if (mddev->queue) {  		mddev->queue->backing_dev_info.congested_fn = raid1_congested;  		mddev->queue->backing_dev_info.congested_data = mddev; +		blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec);  	}  	return md_integrity_register(mddev);  } | 
