diff options
Diffstat (limited to 'drivers/md/dm-thin.c')
-rw-r--r-- | drivers/md/dm-thin.c | 80 |
1 files changed, 70 insertions, 10 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index be70d38745f7..c1120eb96d86 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -25,6 +25,9 @@ #define MAPPING_POOL_SIZE 1024 #define PRISON_CELLS 1024 #define COMMIT_PERIOD HZ +#define NO_SPACE_TIMEOUT_SECS 60 + +static unsigned no_space_timeout_secs = NO_SPACE_TIMEOUT_SECS; DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, "A percentage of time allocated for copy on write"); @@ -173,6 +176,7 @@ struct pool { struct workqueue_struct *wq; struct work_struct worker; struct delayed_work waker; + struct delayed_work no_space_timeout; unsigned long last_commit_jiffies; unsigned ref_count; @@ -912,6 +916,24 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, } } +static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); + +static void check_for_space(struct pool *pool) +{ + int r; + dm_block_t nr_free; + + if (get_pool_mode(pool) != PM_OUT_OF_DATA_SPACE) + return; + + r = dm_pool_get_free_block_count(pool->pmd, &nr_free); + if (r) + return; + + if (nr_free) + set_pool_mode(pool, PM_WRITE); +} + /* * A non-zero return indicates read_only or fail_io mode. * Many callers don't care about the return value. @@ -920,12 +942,14 @@ static int commit(struct pool *pool) { int r; - if (get_pool_mode(pool) != PM_WRITE) + if (get_pool_mode(pool) >= PM_READ_ONLY) return -EINVAL; r = dm_pool_commit_metadata(pool->pmd); if (r) metadata_operation_failed(pool, "dm_pool_commit_metadata", r); + else + check_for_space(pool); return r; } @@ -944,8 +968,6 @@ static void check_low_water_mark(struct pool *pool, dm_block_t free_blocks) } } -static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); - static int alloc_data_block(struct thin_c *tc, dm_block_t *result) { int r; @@ -1392,9 +1414,9 @@ static void process_deferred_bios(struct pool *pool) */ if (ensure_next_mapping(pool)) { spin_lock_irqsave(&pool->lock, flags); + bio_list_add(&pool->deferred_bios, bio); bio_list_merge(&pool->deferred_bios, &bios); spin_unlock_irqrestore(&pool->lock, flags); - break; } @@ -1449,6 +1471,20 @@ static void do_waker(struct work_struct *ws) queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); } +/* + * We're holding onto IO to allow userland time to react. After the + * timeout either the pool will have been resized (and thus back in + * PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO. + */ +static void do_no_space_timeout(struct work_struct *ws) +{ + struct pool *pool = container_of(to_delayed_work(ws), struct pool, + no_space_timeout); + + if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) + set_pool_mode(pool, PM_READ_ONLY); +} + /*----------------------------------------------------------------*/ struct noflush_work { @@ -1513,6 +1549,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) struct pool_c *pt = pool->ti->private; bool needs_check = dm_pool_metadata_needs_check(pool->pmd); enum pool_mode old_mode = get_pool_mode(pool); + unsigned long no_space_timeout = ACCESS_ONCE(no_space_timeout_secs) * HZ; /* * Never allow the pool to transition to PM_WRITE mode if user @@ -1573,7 +1610,10 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) pool->process_bio = process_bio_read_only; pool->process_discard = process_discard; pool->process_prepared_mapping = process_prepared_mapping; - pool->process_prepared_discard = process_prepared_discard_passdown; + pool->process_prepared_discard = process_prepared_discard; + + if (!pool->pf.error_if_no_space && no_space_timeout) + queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout); break; case PM_WRITE: @@ -1682,6 +1722,14 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } + /* + * We must hold the virtual cell before doing the lookup, otherwise + * there's a race with discard. + */ + build_virtual_key(tc->td, block, &key); + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1, &cell_result)) + return DM_MAPIO_SUBMITTED; + r = dm_thin_find_block(td, block, 0, &result); /* @@ -1705,13 +1753,10 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) * shared flag will be set in their case. */ thin_defer_bio(tc, bio); + cell_defer_no_holder_no_free(tc, &cell1); return DM_MAPIO_SUBMITTED; } - build_virtual_key(tc->td, block, &key); - if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1, &cell_result)) - return DM_MAPIO_SUBMITTED; - build_data_key(tc->td, result.block, &key); if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2, &cell_result)) { cell_defer_no_holder_no_free(tc, &cell1); @@ -1732,6 +1777,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) * of doing so. */ handle_unserviceable_bio(tc->pool, bio); + cell_defer_no_holder_no_free(tc, &cell1); return DM_MAPIO_SUBMITTED; } /* fall through */ @@ -1742,6 +1788,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) * provide the hint to load the metadata into cache. */ thin_defer_bio(tc, bio); + cell_defer_no_holder_no_free(tc, &cell1); return DM_MAPIO_SUBMITTED; default: @@ -1751,6 +1798,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) * pool is switched to fail-io mode. */ bio_io_error(bio); + cell_defer_no_holder_no_free(tc, &cell1); return DM_MAPIO_SUBMITTED; } } @@ -1956,6 +2004,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, INIT_WORK(&pool->worker, do_worker); INIT_DELAYED_WORK(&pool->waker, do_waker); + INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout); spin_lock_init(&pool->lock); bio_list_init(&pool->deferred_bios); bio_list_init(&pool->deferred_flush_bios); @@ -2519,6 +2568,7 @@ static void pool_postsuspend(struct dm_target *ti) struct pool *pool = pt->pool; cancel_delayed_work(&pool->waker); + cancel_delayed_work(&pool->no_space_timeout); flush_workqueue(pool->wq); (void) commit(pool); } @@ -2694,6 +2744,12 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) struct pool_c *pt = ti->private; struct pool *pool = pt->pool; + if (get_pool_mode(pool) >= PM_READ_ONLY) { + DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode", + dm_device_name(pool->pool_md)); + return -EINVAL; + } + if (!strcasecmp(argv[0], "create_thin")) r = process_create_thin_mesg(argc, argv, pool); @@ -2901,7 +2957,8 @@ static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) */ if (pt->adjusted_pf.discard_passdown) { data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; - limits->discard_granularity = data_limits->discard_granularity; + limits->discard_granularity = max(data_limits->discard_granularity, + pool->sectors_per_block << SECTOR_SHIFT); } else limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; } @@ -3305,6 +3362,9 @@ static void dm_thin_exit(void) module_init(dm_thin_init); module_exit(dm_thin_exit); +module_param_named(no_space_timeout, no_space_timeout_secs, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(no_space_timeout, "Out of data space queue IO timeout in seconds"); + MODULE_DESCRIPTION(DM_NAME " thin provisioning target"); MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); MODULE_LICENSE("GPL"); |