summaryrefslogtreecommitdiff
path: root/drivers/md/dm-thin.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-thin.c')
-rw-r--r--drivers/md/dm-thin.c152
1 files changed, 137 insertions, 15 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index a1cc797fe88f..d52ea584e0bc 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -200,7 +200,13 @@ struct dm_thin_new_mapping;
enum pool_mode {
PM_WRITE, /* metadata may be changed */
PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */
+
+ /*
+ * Like READ_ONLY, except may switch back to WRITE on metadata resize. Reported as READ_ONLY.
+ */
+ PM_OUT_OF_METADATA_SPACE,
PM_READ_ONLY, /* metadata may not be changed */
+
PM_FAIL, /* all I/O fails */
};
@@ -250,6 +256,7 @@ struct pool {
spinlock_t lock;
struct bio_list deferred_flush_bios;
+ struct bio_list deferred_flush_completions;
struct list_head prepared_mappings;
struct list_head prepared_discards;
struct list_head active_thins;
@@ -914,6 +921,39 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
mempool_free(m, m->tc->pool->mapping_pool);
}
+static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio)
+{
+ struct pool *pool = tc->pool;
+ unsigned long flags;
+
+ /*
+ * If the bio has the REQ_FUA flag set we must commit the metadata
+ * before signaling its completion.
+ */
+ if (!bio_triggers_commit(tc, bio)) {
+ bio_endio(bio);
+ return;
+ }
+
+ /*
+ * Complete bio with an error if earlier I/O caused changes to the
+ * metadata that can't be committed, e.g, due to I/O errors on the
+ * metadata device.
+ */
+ if (dm_thin_aborted_changes(tc->td)) {
+ bio_io_error(bio);
+ return;
+ }
+
+ /*
+ * Batch together any bios that trigger commits and then issue a
+ * single commit for them in process_deferred_bios().
+ */
+ spin_lock_irqsave(&pool->lock, flags);
+ bio_list_add(&pool->deferred_flush_completions, bio);
+ spin_unlock_irqrestore(&pool->lock, flags);
+}
+
static void process_prepared_mapping(struct dm_thin_new_mapping *m)
{
struct thin_c *tc = m->tc;
@@ -946,7 +986,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
*/
if (bio) {
inc_remap_and_issue_cell(tc, m->cell, m->data_block);
- bio_endio(bio);
+ complete_overwrite_bio(tc, bio);
} else {
inc_all_io_entry(tc->pool, m->cell->holder);
remap_and_issue(tc, m->cell->holder, m->data_block);
@@ -1299,7 +1339,37 @@ static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
-static void check_for_space(struct pool *pool)
+static void requeue_bios(struct pool *pool);
+
+static bool is_read_only_pool_mode(enum pool_mode mode)
+{
+ return (mode == PM_OUT_OF_METADATA_SPACE || mode == PM_READ_ONLY);
+}
+
+static bool is_read_only(struct pool *pool)
+{
+ return is_read_only_pool_mode(get_pool_mode(pool));
+}
+
+static void check_for_metadata_space(struct pool *pool)
+{
+ int r;
+ const char *ooms_reason = NULL;
+ dm_block_t nr_free;
+
+ r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free);
+ if (r)
+ ooms_reason = "Could not get free metadata blocks";
+ else if (!nr_free)
+ ooms_reason = "No free metadata blocks";
+
+ if (ooms_reason && !is_read_only(pool)) {
+ DMERR("%s", ooms_reason);
+ set_pool_mode(pool, PM_OUT_OF_METADATA_SPACE);
+ }
+}
+
+static void check_for_data_space(struct pool *pool)
{
int r;
dm_block_t nr_free;
@@ -1311,8 +1381,10 @@ static void check_for_space(struct pool *pool)
if (r)
return;
- if (nr_free)
+ if (nr_free) {
set_pool_mode(pool, PM_WRITE);
+ requeue_bios(pool);
+ }
}
/*
@@ -1323,14 +1395,16 @@ static int commit(struct pool *pool)
{
int r;
- if (get_pool_mode(pool) >= PM_READ_ONLY)
+ if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE)
return -EINVAL;
r = dm_pool_commit_metadata(pool->pmd);
if (r)
metadata_operation_failed(pool, "dm_pool_commit_metadata", r);
- else
- check_for_space(pool);
+ else {
+ check_for_metadata_space(pool);
+ check_for_data_space(pool);
+ }
return r;
}
@@ -1389,10 +1463,26 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
r = dm_pool_alloc_data_block(pool->pmd, result);
if (r) {
- metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
+ if (r == -ENOSPC)
+ set_pool_mode(pool, PM_OUT_OF_DATA_SPACE);
+ else
+ metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
+ return r;
+ }
+
+ r = dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks);
+ if (r) {
+ metadata_operation_failed(pool, "dm_pool_get_free_metadata_block_count", r);
return r;
}
+ if (!free_blocks) {
+ /* Let's commit before we use up the metadata reserve. */
+ r = commit(pool);
+ if (r)
+ return r;
+ }
+
return 0;
}
@@ -1424,6 +1514,7 @@ static int should_error_unserviceable_bio(struct pool *pool)
case PM_OUT_OF_DATA_SPACE:
return pool->pf.error_if_no_space ? -ENOSPC : 0;
+ case PM_OUT_OF_METADATA_SPACE:
case PM_READ_ONLY:
case PM_FAIL:
return -EIO;
@@ -2171,7 +2262,7 @@ static void process_deferred_bios(struct pool *pool)
{
unsigned long flags;
struct bio *bio;
- struct bio_list bios;
+ struct bio_list bios, bio_completions;
struct thin_c *tc;
tc = get_first_thin(pool);
@@ -2182,26 +2273,36 @@ static void process_deferred_bios(struct pool *pool)
}
/*
- * If there are any deferred flush bios, we must commit
- * the metadata before issuing them.
+ * If there are any deferred flush bios, we must commit the metadata
+ * before issuing them or signaling their completion.
*/
bio_list_init(&bios);
+ bio_list_init(&bio_completions);
+
spin_lock_irqsave(&pool->lock, flags);
bio_list_merge(&bios, &pool->deferred_flush_bios);
bio_list_init(&pool->deferred_flush_bios);
+
+ bio_list_merge(&bio_completions, &pool->deferred_flush_completions);
+ bio_list_init(&pool->deferred_flush_completions);
spin_unlock_irqrestore(&pool->lock, flags);
- if (bio_list_empty(&bios) &&
+ if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
!(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool)))
return;
if (commit(pool)) {
+ bio_list_merge(&bios, &bio_completions);
+
while ((bio = bio_list_pop(&bios)))
bio_io_error(bio);
return;
}
pool->last_commit_jiffies = jiffies;
+ while ((bio = bio_list_pop(&bio_completions)))
+ bio_endio(bio);
+
while ((bio = bio_list_pop(&bios)))
generic_make_request(bio);
}
@@ -2394,8 +2495,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
error_retry_list(pool);
break;
+ case PM_OUT_OF_METADATA_SPACE:
case PM_READ_ONLY:
- if (old_mode != new_mode)
+ if (!is_read_only_pool_mode(old_mode))
notify_of_pool_mode_change(pool, "read-only");
dm_pool_metadata_read_only(pool->pmd);
pool->process_bio = process_bio_read_only;
@@ -2827,6 +2929,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
spin_lock_init(&pool->lock);
bio_list_init(&pool->deferred_flush_bios);
+ bio_list_init(&pool->deferred_flush_completions);
INIT_LIST_HEAD(&pool->prepared_mappings);
INIT_LIST_HEAD(&pool->prepared_discards);
INIT_LIST_HEAD(&pool->active_thins);
@@ -3107,6 +3210,13 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
as.argc = argc;
as.argv = argv;
+ /* make sure metadata and data are different devices */
+ if (!strcmp(argv[0], argv[1])) {
+ ti->error = "Error setting metadata or data device";
+ r = -EINVAL;
+ goto out_unlock;
+ }
+
/*
* Set default pool features.
*/
@@ -3326,6 +3436,10 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit)
DMINFO("%s: growing the metadata device from %llu to %llu blocks",
dm_device_name(pool->pool_md),
sb_metadata_dev_size, metadata_dev_size);
+
+ if (get_pool_mode(pool) == PM_OUT_OF_METADATA_SPACE)
+ set_pool_mode(pool, PM_WRITE);
+
r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size);
if (r) {
metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r);
@@ -3629,7 +3743,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
- if (get_pool_mode(pool) >= PM_READ_ONLY) {
+ if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) {
DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode",
dm_device_name(pool->pool_md));
return -EOPNOTSUPP;
@@ -3703,6 +3817,7 @@ static void pool_status(struct dm_target *ti, status_type_t type,
dm_block_t nr_blocks_data;
dm_block_t nr_blocks_metadata;
dm_block_t held_root;
+ enum pool_mode mode;
char buf[BDEVNAME_SIZE];
char buf2[BDEVNAME_SIZE];
struct pool_c *pt = ti->private;
@@ -3773,9 +3888,10 @@ static void pool_status(struct dm_target *ti, status_type_t type,
else
DMEMIT("- ");
- if (pool->pf.mode == PM_OUT_OF_DATA_SPACE)
+ mode = get_pool_mode(pool);
+ if (mode == PM_OUT_OF_DATA_SPACE)
DMEMIT("out_of_data_space ");
- else if (pool->pf.mode == PM_READ_ONLY)
+ else if (is_read_only_pool_mode(mode))
DMEMIT("ro ");
else
DMEMIT("rw ");
@@ -3983,6 +4099,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
tc->sort_bio_list = RB_ROOT;
if (argc == 3) {
+ if (!strcmp(argv[0], argv[2])) {
+ ti->error = "Error setting origin device";
+ r = -EINVAL;
+ goto bad_origin_dev;
+ }
+
r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev);
if (r) {
ti->error = "Error opening origin device";