aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2011-10-19 14:30:42 +0200
committerJens Axboe <axboe@kernel.dk>2011-10-19 14:30:42 +0200
commit5c04b426f2e8b46cfc7969a35b2631063a3c646c (patch)
tree2d27d9f5d2fe5d5e8fbc01a467ec58bcb50235c1 /drivers/md
parent499337bb6511e665a236a6a947f819d98ea340c6 (diff)
parent899e3ee404961a90b828ad527573aaaac39f0ab1 (diff)
Merge branch 'v3.1-rc10' into for-3.2/core
Conflicts: block/blk-core.c include/linux/blkdev.h Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-crypt.c2
-rw-r--r--drivers/md/dm-flakey.c4
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm-table.c32
-rw-r--r--drivers/md/linear.h2
-rw-r--r--drivers/md/md.c50
-rw-r--r--drivers/md/md.h2
-rw-r--r--drivers/md/multipath.c3
-rw-r--r--drivers/md/raid1.c17
-rw-r--r--drivers/md/raid10.c52
-rw-r--r--drivers/md/raid5.c8
11 files changed, 116 insertions, 58 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 49da55c1528a..8c2a000cf3f5 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1698,6 +1698,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
ti->num_flush_requests = 1;
+ ti->discard_zeroes_data_unsupported = 1;
+
return 0;
bad:
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 89f73ca22cfa..f84c08029b21 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -81,8 +81,10 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
* corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>
*/
if (!strcasecmp(arg_name, "corrupt_bio_byte")) {
- if (!argc)
+ if (!argc) {
ti->error = "Feature corrupt_bio_byte requires parameters";
+ return -EINVAL;
+ }
r = dm_read_arg(_args + 1, as, &fc->corrupt_bio_byte, &ti->error);
if (r)
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index a002dd85db1e..86df8b2cf927 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -449,7 +449,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
rs->ti->error = "write_mostly option is only valid for RAID1";
return -EINVAL;
}
- if (value > rs->md.raid_disks) {
+ if (value >= rs->md.raid_disks) {
rs->ti->error = "Invalid write_mostly drive index given";
return -EINVAL;
}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 986b8754bb08..bc04518e9d8b 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1238,14 +1238,15 @@ static void dm_table_set_integrity(struct dm_table *t)
return;
template_disk = dm_table_get_integrity_disk(t, true);
- if (!template_disk &&
- blk_integrity_is_initialized(dm_disk(t->md))) {
+ if (template_disk)
+ blk_integrity_register(dm_disk(t->md),
+ blk_get_integrity(template_disk));
+ else if (blk_integrity_is_initialized(dm_disk(t->md)))
DMWARN("%s: device no longer has a valid integrity profile",
dm_device_name(t->md));
- return;
- }
- blk_integrity_register(dm_disk(t->md),
- blk_get_integrity(template_disk));
+ else
+ DMWARN("%s: unable to establish an integrity profile",
+ dm_device_name(t->md));
}
static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
@@ -1282,6 +1283,22 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush)
return 0;
}
+static bool dm_table_discard_zeroes_data(struct dm_table *t)
+{
+ struct dm_target *ti;
+ unsigned i = 0;
+
+ /* Ensure that all targets supports discard_zeroes_data. */
+ while (i < dm_table_get_num_targets(t)) {
+ ti = dm_table_get_target(t, i++);
+
+ if (ti->discard_zeroes_data_unsupported)
+ return 0;
+ }
+
+ return 1;
+}
+
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
@@ -1304,6 +1321,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
}
blk_queue_flush(q, flush);
+ if (!dm_table_discard_zeroes_data(t))
+ q->limits.discard_zeroes_data = 0;
+
dm_table_set_integrity(t);
/*
diff --git a/drivers/md/linear.h b/drivers/md/linear.h
index 0ce29b61605a..2f2da05b2ce9 100644
--- a/drivers/md/linear.h
+++ b/drivers/md/linear.h
@@ -10,9 +10,9 @@ typedef struct dev_info dev_info_t;
struct linear_private_data
{
+ struct rcu_head rcu;
sector_t array_sectors;
dev_info_t disks[0];
- struct rcu_head rcu;
};
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5c2178562c96..8f52d4eb78a0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -61,6 +61,11 @@
static void autostart_arrays(int part);
#endif
+/* pers_list is a list of registered personalities protected
+ * by pers_lock.
+ * pers_lock does extra service to protect accesses to
+ * mddev->thread when the mutex cannot be held.
+ */
static LIST_HEAD(pers_list);
static DEFINE_SPINLOCK(pers_lock);
@@ -735,7 +740,12 @@ static void mddev_unlock(mddev_t * mddev)
} else
mutex_unlock(&mddev->reconfig_mutex);
+ /* was we've dropped the mutex we need a spinlock to
+ * make sur the thread doesn't disappear
+ */
+ spin_lock(&pers_lock);
md_wakeup_thread(mddev->thread);
+ spin_unlock(&pers_lock);
}
static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
@@ -844,7 +854,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
bio->bi_end_io = super_written;
atomic_inc(&mddev->pending_writes);
- submit_bio(REQ_WRITE | REQ_SYNC | REQ_FLUSH | REQ_FUA, bio);
+ submit_bio(WRITE_FLUSH_FUA, bio);
}
void md_super_wait(mddev_t *mddev)
@@ -1134,8 +1144,11 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
ret = 0;
}
rdev->sectors = rdev->sb_start;
+ /* Limit to 4TB as metadata cannot record more than that */
+ if (rdev->sectors >= (2ULL << 32))
+ rdev->sectors = (2ULL << 32) - 2;
- if (rdev->sectors < sb->size * 2 && sb->level > 1)
+ if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
/* "this cannot possibly happen" ... */
ret = -EINVAL;
@@ -1169,7 +1182,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
mddev->clevel[0] = 0;
mddev->layout = sb->layout;
mddev->raid_disks = sb->raid_disks;
- mddev->dev_sectors = sb->size * 2;
+ mddev->dev_sectors = ((sector_t)sb->size) * 2;
mddev->events = ev1;
mddev->bitmap_info.offset = 0;
mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
@@ -1411,6 +1424,11 @@ super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
rdev->sb_start = calc_dev_sboffset(rdev);
if (!num_sectors || num_sectors > rdev->sb_start)
num_sectors = rdev->sb_start;
+ /* Limit to 4TB as metadata cannot record more than that.
+ * 4TB == 2^32 KB, or 2*2^32 sectors.
+ */
+ if (num_sectors >= (2ULL << 32))
+ num_sectors = (2ULL << 32) - 2;
md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
rdev->sb_page);
md_super_wait(rdev->mddev);
@@ -1734,6 +1752,11 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
sb->level = cpu_to_le32(mddev->level);
sb->layout = cpu_to_le32(mddev->layout);
+ if (test_bit(WriteMostly, &rdev->flags))
+ sb->devflags |= WriteMostly1;
+ else
+ sb->devflags &= ~WriteMostly1;
+
if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
@@ -2557,7 +2580,10 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
int err = -EINVAL;
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
md_error(rdev->mddev, rdev);
- err = 0;
+ if (test_bit(Faulty, &rdev->flags))
+ err = 0;
+ else
+ err = -EBUSY;
} else if (cmd_match(buf, "remove")) {
if (rdev->raid_disk >= 0)
err = -EBUSY;
@@ -2580,7 +2606,7 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
err = 0;
} else if (cmd_match(buf, "-blocked")) {
if (!test_bit(Faulty, &rdev->flags) &&
- test_bit(BlockedBadBlocks, &rdev->flags)) {
+ rdev->badblocks.unacked_exist) {
/* metadata handler doesn't understand badblocks,
* so we need to fail the device
*/
@@ -5979,6 +6005,8 @@ static int set_disk_faulty(mddev_t *mddev, dev_t dev)
return -ENODEV;
md_error(mddev, rdev);
+ if (!test_bit(Faulty, &rdev->flags))
+ return -EBUSY;
return 0;
}
@@ -6407,11 +6435,18 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
return thread;
}
-void md_unregister_thread(mdk_thread_t *thread)
+void md_unregister_thread(mdk_thread_t **threadp)
{
+ mdk_thread_t *thread = *threadp;
if (!thread)
return;
dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
+ /* Locking ensures that mddev_unlock does not wake_up a
+ * non-existent thread
+ */
+ spin_lock(&pers_lock);
+ *threadp = NULL;
+ spin_unlock(&pers_lock);
kthread_stop(thread->tsk);
kfree(thread);
@@ -7318,8 +7353,7 @@ static void reap_sync_thread(mddev_t *mddev)
mdk_rdev_t *rdev;
/* resync has finished, collect result */
- md_unregister_thread(mddev->sync_thread);
- mddev->sync_thread = NULL;
+ md_unregister_thread(&mddev->sync_thread);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
/* success...*/
diff --git a/drivers/md/md.h b/drivers/md/md.h
index bd47847cf7ca..1509a3eb9ae1 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -560,7 +560,7 @@ extern int register_md_personality(struct mdk_personality *p);
extern int unregister_md_personality(struct mdk_personality *p);
extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev),
mddev_t *mddev, const char *name);
-extern void md_unregister_thread(mdk_thread_t *thread);
+extern void md_unregister_thread(mdk_thread_t **threadp);
extern void md_wakeup_thread(mdk_thread_t *thread);
extern void md_check_recovery(mddev_t *mddev);
extern void md_write_start(mddev_t *mddev, struct bio *bi);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 407cb5691425..618dd9e22513 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -514,8 +514,7 @@ static int multipath_stop (mddev_t *mddev)
{
multipath_conf_t *conf = mddev->private;
- md_unregister_thread(mddev->thread);
- mddev->thread = NULL;
+ md_unregister_thread(&mddev->thread);
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
mempool_destroy(conf->pool);
kfree(conf->multipaths);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 97f2a5f977b1..d4ddfa627301 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1099,12 +1099,11 @@ read_again:
bio_list_add(&conf->pending_bio_list, mbio);
spin_unlock_irqrestore(&conf->device_lock, flags);
}
- r1_bio_write_done(r1_bio);
-
- /* In case raid1d snuck in to freeze_array */
- wake_up(&conf->wait_barrier);
-
+ /* Mustn't call r1_bio_write_done before this next test,
+ * as it could result in the bio being freed.
+ */
if (sectors_handled < (bio->bi_size >> 9)) {
+ r1_bio_write_done(r1_bio);
/* We need another r1_bio. It has already been counted
* in bio->bi_phys_segments
*/
@@ -1117,6 +1116,11 @@ read_again:
goto retry_write;
}
+ r1_bio_write_done(r1_bio);
+
+ /* In case raid1d snuck in to freeze_array */
+ wake_up(&conf->wait_barrier);
+
if (do_sync || !bitmap || !plugged)
md_wakeup_thread(mddev->thread);
}
@@ -2556,8 +2560,7 @@ static int stop(mddev_t *mddev)
raise_barrier(conf);
lower_barrier(conf);
- md_unregister_thread(mddev->thread);
- mddev->thread = NULL;
+ md_unregister_thread(&mddev->thread);
if (conf->r1bio_pool)
mempool_destroy(conf->r1bio_pool);
kfree(conf->mirrors);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 04b625e1cb60..ea5fc0b6a84c 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -337,6 +337,21 @@ static void close_write(r10bio_t *r10_bio)
md_write_end(r10_bio->mddev);
}
+static void one_write_done(r10bio_t *r10_bio)
+{
+ if (atomic_dec_and_test(&r10_bio->remaining)) {
+ if (test_bit(R10BIO_WriteError, &r10_bio->state))
+ reschedule_retry(r10_bio);
+ else {
+ close_write(r10_bio);
+ if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+ reschedule_retry(r10_bio);
+ else
+ raid_end_bio_io(r10_bio);
+ }
+ }
+}
+
static void raid10_end_write_request(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -387,17 +402,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
* Let's see if all mirrored write operations have finished
* already.
*/
- if (atomic_dec_and_test(&r10_bio->remaining)) {
- if (test_bit(R10BIO_WriteError, &r10_bio->state))
- reschedule_retry(r10_bio);
- else {
- close_write(r10_bio);
- if (test_bit(R10BIO_MadeGood, &r10_bio->state))
- reschedule_retry(r10_bio);
- else
- raid_end_bio_io(r10_bio);
- }
- }
+ one_write_done(r10_bio);
if (dec_rdev)
rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
}
@@ -1125,20 +1130,12 @@ retry_write:
spin_unlock_irqrestore(&conf->device_lock, flags);
}
- if (atomic_dec_and_test(&r10_bio->remaining)) {
- /* This matches the end of raid10_end_write_request() */
- bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
- r10_bio->sectors,
- !test_bit(R10BIO_Degraded, &r10_bio->state),
- 0);
- md_write_end(mddev);
- raid_end_bio_io(r10_bio);
- }
-
- /* In case raid10d snuck in to freeze_array */
- wake_up(&conf->wait_barrier);
+ /* Don't remove the bias on 'remaining' (one_write_done) until
+ * after checking if we need to go around again.
+ */
if (sectors_handled < (bio->bi_size >> 9)) {
+ one_write_done(r10_bio);
/* We need another r10_bio. It has already been counted
* in bio->bi_phys_segments.
*/
@@ -1152,6 +1149,10 @@ retry_write:
r10_bio->state = 0;
goto retry_write;
}
+ one_write_done(r10_bio);
+
+ /* In case raid10d snuck in to freeze_array */
+ wake_up(&conf->wait_barrier);
if (do_sync || !mddev->bitmap || !plugged)
md_wakeup_thread(mddev->thread);
@@ -2951,7 +2952,7 @@ static int run(mddev_t *mddev)
return 0;
out_free_conf:
- md_unregister_thread(mddev->thread);
+ md_unregister_thread(&mddev->thread);
if (conf->r10bio_pool)
mempool_destroy(conf->r10bio_pool);
safe_put_page(conf->tmppage);
@@ -2969,8 +2970,7 @@ static int stop(mddev_t *mddev)
raise_barrier(conf, 0);
lower_barrier(conf);
- md_unregister_thread(mddev->thread);
- mddev->thread = NULL;
+ md_unregister_thread(&mddev->thread);
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
if (conf->r10bio_pool)
mempool_destroy(conf->r10bio_pool);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 96b7f6a1b6f2..83f2c44e170f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3336,7 +3336,7 @@ static void handle_stripe(struct stripe_head *sh)
finish:
/* wait for this device to become unblocked */
- if (unlikely(s.blocked_rdev))
+ if (conf->mddev->external && unlikely(s.blocked_rdev))
md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev);
if (s.handle_bad_blocks)
@@ -4939,8 +4939,7 @@ static int run(mddev_t *mddev)
return 0;
abort:
- md_unregister_thread(mddev->thread);
- mddev->thread = NULL;
+ md_unregister_thread(&mddev->thread);
if (conf) {
print_raid5_conf(conf);
free_conf(conf);
@@ -4954,8 +4953,7 @@ static int stop(mddev_t *mddev)
{
raid5_conf_t *conf = mddev->private;
- md_unregister_thread(mddev->thread);
- mddev->thread = NULL;
+ md_unregister_thread(&mddev->thread);
if (mddev->queue)
mddev->queue->backing_dev_info.congested_fn = NULL;
free_conf(conf);