diff options
Diffstat (limited to 'drivers/md/dm-thin.c')
-rw-r--r-- | drivers/md/dm-thin.c | 152 |
1 files changed, 137 insertions, 15 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index a1cc797fe88f..d52ea584e0bc 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -200,7 +200,13 @@ struct dm_thin_new_mapping; enum pool_mode { PM_WRITE, /* metadata may be changed */ PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */ + + /* + * Like READ_ONLY, except may switch back to WRITE on metadata resize. Reported as READ_ONLY. + */ + PM_OUT_OF_METADATA_SPACE, PM_READ_ONLY, /* metadata may not be changed */ + PM_FAIL, /* all I/O fails */ }; @@ -250,6 +256,7 @@ struct pool { spinlock_t lock; struct bio_list deferred_flush_bios; + struct bio_list deferred_flush_completions; struct list_head prepared_mappings; struct list_head prepared_discards; struct list_head active_thins; @@ -914,6 +921,39 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) mempool_free(m, m->tc->pool->mapping_pool); } +static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio) +{ + struct pool *pool = tc->pool; + unsigned long flags; + + /* + * If the bio has the REQ_FUA flag set we must commit the metadata + * before signaling its completion. + */ + if (!bio_triggers_commit(tc, bio)) { + bio_endio(bio); + return; + } + + /* + * Complete bio with an error if earlier I/O caused changes to the + * metadata that can't be committed, e.g, due to I/O errors on the + * metadata device. + */ + if (dm_thin_aborted_changes(tc->td)) { + bio_io_error(bio); + return; + } + + /* + * Batch together any bios that trigger commits and then issue a + * single commit for them in process_deferred_bios(). + */ + spin_lock_irqsave(&pool->lock, flags); + bio_list_add(&pool->deferred_flush_completions, bio); + spin_unlock_irqrestore(&pool->lock, flags); +} + static void process_prepared_mapping(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; @@ -946,7 +986,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) */ if (bio) { inc_remap_and_issue_cell(tc, m->cell, m->data_block); - bio_endio(bio); + complete_overwrite_bio(tc, bio); } else { inc_all_io_entry(tc->pool, m->cell->holder); remap_and_issue(tc, m->cell->holder, m->data_block); @@ -1299,7 +1339,37 @@ static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block, static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); -static void check_for_space(struct pool *pool) +static void requeue_bios(struct pool *pool); + +static bool is_read_only_pool_mode(enum pool_mode mode) +{ + return (mode == PM_OUT_OF_METADATA_SPACE || mode == PM_READ_ONLY); +} + +static bool is_read_only(struct pool *pool) +{ + return is_read_only_pool_mode(get_pool_mode(pool)); +} + +static void check_for_metadata_space(struct pool *pool) +{ + int r; + const char *ooms_reason = NULL; + dm_block_t nr_free; + + r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free); + if (r) + ooms_reason = "Could not get free metadata blocks"; + else if (!nr_free) + ooms_reason = "No free metadata blocks"; + + if (ooms_reason && !is_read_only(pool)) { + DMERR("%s", ooms_reason); + set_pool_mode(pool, PM_OUT_OF_METADATA_SPACE); + } +} + +static void check_for_data_space(struct pool *pool) { int r; dm_block_t nr_free; @@ -1311,8 +1381,10 @@ static void check_for_space(struct pool *pool) if (r) return; - if (nr_free) + if (nr_free) { set_pool_mode(pool, PM_WRITE); + requeue_bios(pool); + } } /* @@ -1323,14 +1395,16 @@ static int commit(struct pool *pool) { int r; - if (get_pool_mode(pool) >= PM_READ_ONLY) + if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) return -EINVAL; r = dm_pool_commit_metadata(pool->pmd); if (r) metadata_operation_failed(pool, "dm_pool_commit_metadata", r); - else - check_for_space(pool); + else { + check_for_metadata_space(pool); + check_for_data_space(pool); + } return r; } @@ -1389,10 +1463,26 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) r = dm_pool_alloc_data_block(pool->pmd, result); if (r) { - metadata_operation_failed(pool, "dm_pool_alloc_data_block", r); + if (r == -ENOSPC) + set_pool_mode(pool, PM_OUT_OF_DATA_SPACE); + else + metadata_operation_failed(pool, "dm_pool_alloc_data_block", r); + return r; + } + + r = dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks); + if (r) { + metadata_operation_failed(pool, "dm_pool_get_free_metadata_block_count", r); return r; } + if (!free_blocks) { + /* Let's commit before we use up the metadata reserve. */ + r = commit(pool); + if (r) + return r; + } + return 0; } @@ -1424,6 +1514,7 @@ static int should_error_unserviceable_bio(struct pool *pool) case PM_OUT_OF_DATA_SPACE: return pool->pf.error_if_no_space ? -ENOSPC : 0; + case PM_OUT_OF_METADATA_SPACE: case PM_READ_ONLY: case PM_FAIL: return -EIO; @@ -2171,7 +2262,7 @@ static void process_deferred_bios(struct pool *pool) { unsigned long flags; struct bio *bio; - struct bio_list bios; + struct bio_list bios, bio_completions; struct thin_c *tc; tc = get_first_thin(pool); @@ -2182,26 +2273,36 @@ static void process_deferred_bios(struct pool *pool) } /* - * If there are any deferred flush bios, we must commit - * the metadata before issuing them. + * If there are any deferred flush bios, we must commit the metadata + * before issuing them or signaling their completion. */ bio_list_init(&bios); + bio_list_init(&bio_completions); + spin_lock_irqsave(&pool->lock, flags); bio_list_merge(&bios, &pool->deferred_flush_bios); bio_list_init(&pool->deferred_flush_bios); + + bio_list_merge(&bio_completions, &pool->deferred_flush_completions); + bio_list_init(&pool->deferred_flush_completions); spin_unlock_irqrestore(&pool->lock, flags); - if (bio_list_empty(&bios) && + if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) && !(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool))) return; if (commit(pool)) { + bio_list_merge(&bios, &bio_completions); + while ((bio = bio_list_pop(&bios))) bio_io_error(bio); return; } pool->last_commit_jiffies = jiffies; + while ((bio = bio_list_pop(&bio_completions))) + bio_endio(bio); + while ((bio = bio_list_pop(&bios))) generic_make_request(bio); } @@ -2394,8 +2495,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) error_retry_list(pool); break; + case PM_OUT_OF_METADATA_SPACE: case PM_READ_ONLY: - if (old_mode != new_mode) + if (!is_read_only_pool_mode(old_mode)) notify_of_pool_mode_change(pool, "read-only"); dm_pool_metadata_read_only(pool->pmd); pool->process_bio = process_bio_read_only; @@ -2827,6 +2929,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout); spin_lock_init(&pool->lock); bio_list_init(&pool->deferred_flush_bios); + bio_list_init(&pool->deferred_flush_completions); INIT_LIST_HEAD(&pool->prepared_mappings); INIT_LIST_HEAD(&pool->prepared_discards); INIT_LIST_HEAD(&pool->active_thins); @@ -3107,6 +3210,13 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) as.argc = argc; as.argv = argv; + /* make sure metadata and data are different devices */ + if (!strcmp(argv[0], argv[1])) { + ti->error = "Error setting metadata or data device"; + r = -EINVAL; + goto out_unlock; + } + /* * Set default pool features. */ @@ -3326,6 +3436,10 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit) DMINFO("%s: growing the metadata device from %llu to %llu blocks", dm_device_name(pool->pool_md), sb_metadata_dev_size, metadata_dev_size); + + if (get_pool_mode(pool) == PM_OUT_OF_METADATA_SPACE) + set_pool_mode(pool, PM_WRITE); + r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size); if (r) { metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r); @@ -3629,7 +3743,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) struct pool_c *pt = ti->private; struct pool *pool = pt->pool; - if (get_pool_mode(pool) >= PM_READ_ONLY) { + if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) { DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode", dm_device_name(pool->pool_md)); return -EOPNOTSUPP; @@ -3703,6 +3817,7 @@ static void pool_status(struct dm_target *ti, status_type_t type, dm_block_t nr_blocks_data; dm_block_t nr_blocks_metadata; dm_block_t held_root; + enum pool_mode mode; char buf[BDEVNAME_SIZE]; char buf2[BDEVNAME_SIZE]; struct pool_c *pt = ti->private; @@ -3773,9 +3888,10 @@ static void pool_status(struct dm_target *ti, status_type_t type, else DMEMIT("- "); - if (pool->pf.mode == PM_OUT_OF_DATA_SPACE) + mode = get_pool_mode(pool); + if (mode == PM_OUT_OF_DATA_SPACE) DMEMIT("out_of_data_space "); - else if (pool->pf.mode == PM_READ_ONLY) + else if (is_read_only_pool_mode(mode)) DMEMIT("ro "); else DMEMIT("rw "); @@ -3983,6 +4099,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) tc->sort_bio_list = RB_ROOT; if (argc == 3) { + if (!strcmp(argv[0], argv[2])) { + ti->error = "Error setting origin device"; + r = -EINVAL; + goto bad_origin_dev; + } + r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev); if (r) { ti->error = "Error opening origin device"; |