diff options
author | Mark Brown <broonie@linaro.org> | 2014-02-14 12:15:46 +0000 |
---|---|---|
committer | Mark Brown <broonie@linaro.org> | 2014-02-14 12:15:46 +0000 |
commit | 8415e604452966f981f20cb4d8a8a30e38a772dd (patch) | |
tree | 0c111cf6c6ed487c9baee37275621097d57f5127 /drivers/md | |
parent | 4b9c8ddadce3ec70fa5d93875adeda861b21c84e (diff) | |
parent | 29b5f720990fafc302a034468455426dd662e101 (diff) | |
download | linaro-lsk-8415e604452966f981f20cb4d8a8a30e38a772dd.tar.gz |
Merge tag 'v3.10.30' into linux-linaro-lskHEADlinux-linaro-lsk-v3.10linux-linaro-lsk
This is the 3.10.30 stable release
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 4 | ||||
-rw-r--r-- | drivers/md/Makefile | 1 | ||||
-rw-r--r-- | drivers/md/dm-builtin.c | 48 | ||||
-rw-r--r-- | drivers/md/dm-sysfs.c | 5 | ||||
-rw-r--r-- | drivers/md/dm-thin-metadata.c | 20 | ||||
-rw-r--r-- | drivers/md/dm-thin-metadata.h | 2 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 31 | ||||
-rw-r--r-- | drivers/md/dm.c | 15 | ||||
-rw-r--r-- | drivers/md/dm.h | 17 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-common.c | 6 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.c | 28 |
11 files changed, 147 insertions, 30 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 3bfc8f1da9f..29cff90096a 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -176,8 +176,12 @@ config MD_FAULTY source "drivers/md/bcache/Kconfig" +config BLK_DEV_DM_BUILTIN + boolean + config BLK_DEV_DM tristate "Device mapper support" + select BLK_DEV_DM_BUILTIN ---help--- Device-mapper is a low level volume manager. It works by allowing people to specify mappings for ranges of logical sectors. Various diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 1439fd4ad9b..3591a729238 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o obj-$(CONFIG_BCACHE) += bcache/ obj-$(CONFIG_BLK_DEV_MD) += md-mod.o obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o +obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o obj-$(CONFIG_DM_BUFIO) += dm-bufio.o obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o obj-$(CONFIG_DM_CRYPT) += dm-crypt.o diff --git a/drivers/md/dm-builtin.c b/drivers/md/dm-builtin.c new file mode 100644 index 00000000000..6c9049c51b2 --- /dev/null +++ b/drivers/md/dm-builtin.c @@ -0,0 +1,48 @@ +#include "dm.h" + +/* + * The kobject release method must not be placed in the module itself, + * otherwise we are subject to module unload races. + * + * The release method is called when the last reference to the kobject is + * dropped. It may be called by any other kernel code that drops the last + * reference. + * + * The release method suffers from module unload race. We may prevent the + * module from being unloaded at the start of the release method (using + * increased module reference count or synchronizing against the release + * method), however there is no way to prevent the module from being + * unloaded at the end of the release method. + * + * If this code were placed in the dm module, the following race may + * happen: + * 1. Some other process takes a reference to dm kobject + * 2. The user issues ioctl function to unload the dm device + * 3. dm_sysfs_exit calls kobject_put, however the object is not released + * because of the other reference taken at step 1 + * 4. dm_sysfs_exit waits on the completion + * 5. The other process that took the reference in step 1 drops it, + * dm_kobject_release is called from this process + * 6. dm_kobject_release calls complete() + * 7. a reschedule happens before dm_kobject_release returns + * 8. dm_sysfs_exit continues, the dm device is unloaded, module reference + * count is decremented + * 9. The user unloads the dm module + * 10. The other process that was rescheduled in step 7 continues to run, + * it is now executing code in unloaded module, so it crashes + * + * Note that if the process that takes the foreign reference to dm kobject + * has a low priority and the system is sufficiently loaded with + * higher-priority processes that prevent the low-priority process from + * being scheduled long enough, this bug may really happen. + * + * In order to fix this module unload race, we place the release method + * into a helper code that is compiled directly into the kernel. + */ + +void dm_kobject_release(struct kobject *kobj) +{ + complete(dm_get_completion_from_kobject(kobj)); +} + +EXPORT_SYMBOL(dm_kobject_release); diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c index 84d2b91e4ef..c62c5ab6aed 100644 --- a/drivers/md/dm-sysfs.c +++ b/drivers/md/dm-sysfs.c @@ -86,6 +86,7 @@ static const struct sysfs_ops dm_sysfs_ops = { static struct kobj_type dm_ktype = { .sysfs_ops = &dm_sysfs_ops, .default_attrs = dm_attrs, + .release = dm_kobject_release, }; /* @@ -104,5 +105,7 @@ int dm_sysfs_init(struct mapped_device *md) */ void dm_sysfs_exit(struct mapped_device *md) { - kobject_put(dm_kobject(md)); + struct kobject *kobj = dm_kobject(md); + kobject_put(kobj); + wait_for_completion(dm_get_completion_from_kobject(kobj)); } diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 60bce435f4f..33ac3be2e83 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1349,6 +1349,12 @@ dm_thin_id dm_thin_dev_id(struct dm_thin_device *td) return td->id; } +/* + * Check whether @time (of block creation) is older than @td's last snapshot. + * If so then the associated block is shared with the last snapshot device. + * Any block on a device created *after* the device last got snapshotted is + * necessarily not shared. + */ static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time) { return td->snapshotted_time > time; @@ -1458,6 +1464,20 @@ int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) return r; } +int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result) +{ + int r; + uint32_t ref_count; + + down_read(&pmd->root_lock); + r = dm_sm_get_count(pmd->data_sm, b, &ref_count); + if (!r) + *result = (ref_count != 0); + up_read(&pmd->root_lock); + + return r; +} + bool dm_thin_changed_this_transaction(struct dm_thin_device *td) { int r; diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 845ebbe589a..125c0944401 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -181,6 +181,8 @@ int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result); int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result); +int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result); + /* * Returns -ENOSPC if the new size is too small and already allocated * blocks would be lost. diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e36f81e282e..73c76c565a4 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -512,6 +512,7 @@ struct dm_thin_new_mapping { unsigned quiesced:1; unsigned prepared:1; unsigned pass_discard:1; + unsigned definitely_not_shared:1; struct thin_c *tc; dm_block_t virt_block; @@ -683,7 +684,15 @@ static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) cell_defer_no_holder(tc, m->cell2); if (m->pass_discard) - remap_and_issue(tc, m->bio, m->data_block); + if (m->definitely_not_shared) + remap_and_issue(tc, m->bio, m->data_block); + else { + bool used = false; + if (dm_pool_block_is_used(tc->pool->pmd, m->data_block, &used) || used) + bio_endio(m->bio, 0); + else + remap_and_issue(tc, m->bio, m->data_block); + } else bio_endio(m->bio, 0); @@ -751,13 +760,17 @@ static int ensure_next_mapping(struct pool *pool) static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool) { - struct dm_thin_new_mapping *r = pool->next_mapping; + struct dm_thin_new_mapping *m = pool->next_mapping; BUG_ON(!pool->next_mapping); + memset(m, 0, sizeof(struct dm_thin_new_mapping)); + INIT_LIST_HEAD(&m->list); + m->bio = NULL; + pool->next_mapping = NULL; - return r; + return m; } static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, @@ -769,15 +782,10 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, struct pool *pool = tc->pool; struct dm_thin_new_mapping *m = get_next_mapping(pool); - INIT_LIST_HEAD(&m->list); - m->quiesced = 0; - m->prepared = 0; m->tc = tc; m->virt_block = virt_block; m->data_block = data_dest; m->cell = cell; - m->err = 0; - m->bio = NULL; if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list)) m->quiesced = 1; @@ -840,15 +848,12 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, struct pool *pool = tc->pool; struct dm_thin_new_mapping *m = get_next_mapping(pool); - INIT_LIST_HEAD(&m->list); m->quiesced = 1; m->prepared = 0; m->tc = tc; m->virt_block = virt_block; m->data_block = data_block; m->cell = cell; - m->err = 0; - m->bio = NULL; /* * If the whole block of data is being overwritten or we are not @@ -1032,12 +1037,12 @@ static void process_discard(struct thin_c *tc, struct bio *bio) */ m = get_next_mapping(pool); m->tc = tc; - m->pass_discard = (!lookup_result.shared) && pool->pf.discard_passdown; + m->pass_discard = pool->pf.discard_passdown; + m->definitely_not_shared = !lookup_result.shared; m->virt_block = block; m->data_block = lookup_result.block; m->cell = cell; m->cell2 = cell2; - m->err = 0; m->bio = bio; if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list)) { diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1c13071a81b..204a59fd872 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -184,8 +184,8 @@ struct mapped_device { /* forced geometry settings */ struct hd_geometry geometry; - /* sysfs handle */ - struct kobject kobj; + /* kobject and completion */ + struct dm_kobject_holder kobj_holder; /* zero-length flush that will be cloned and submitted to targets */ struct bio flush_bio; @@ -1904,6 +1904,7 @@ static struct mapped_device *alloc_dev(int minor) init_waitqueue_head(&md->wait); INIT_WORK(&md->work, dm_wq_work); init_waitqueue_head(&md->eventq); + init_completion(&md->kobj_holder.completion); md->disk->major = _major; md->disk->first_minor = minor; @@ -2735,20 +2736,14 @@ struct gendisk *dm_disk(struct mapped_device *md) struct kobject *dm_kobject(struct mapped_device *md) { - return &md->kobj; + return &md->kobj_holder.kobj; } -/* - * struct mapped_device should not be exported outside of dm.c - * so use this check to verify that kobj is part of md structure - */ struct mapped_device *dm_get_from_kobject(struct kobject *kobj) { struct mapped_device *md; - md = container_of(kobj, struct mapped_device, kobj); - if (&md->kobj != kobj) - return NULL; + md = container_of(kobj, struct mapped_device, kobj_holder.kobj); if (test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 45b97da1bd0..9b3222f4483 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -15,6 +15,8 @@ #include <linux/list.h> #include <linux/blkdev.h> #include <linux/hdreg.h> +#include <linux/completion.h> +#include <linux/kobject.h> /* * Suspend feature flags @@ -125,12 +127,27 @@ void dm_interface_exit(void); /* * sysfs interface */ +struct dm_kobject_holder { + struct kobject kobj; + struct completion completion; +}; + +static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj) +{ + return &container_of(kobj, struct dm_kobject_holder, kobj)->completion; +} + int dm_sysfs_init(struct mapped_device *md); void dm_sysfs_exit(struct mapped_device *md); struct kobject *dm_kobject(struct mapped_device *md); struct mapped_device *dm_get_from_kobject(struct kobject *kobj); /* + * The kobject helper + */ +void dm_kobject_release(struct kobject *kobj); + +/* * Targets for linear and striped mappings */ int dm_linear_init(void); diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 3e7a88d99eb..0d240373ffa 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -245,6 +245,10 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks) return -EINVAL; } + /* + * We need to set this before the dm_tm_new_block() call below. + */ + ll->nr_blocks = nr_blocks; for (i = old_blocks; i < blocks; i++) { struct dm_block *b; struct disk_index_entry idx; @@ -252,6 +256,7 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks) r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b); if (r < 0) return r; + idx.blocknr = cpu_to_le64(dm_block_location(b)); r = dm_tm_unlock(ll->tm, b); @@ -266,7 +271,6 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks) return r; } - ll->nr_blocks = nr_blocks; return 0; } diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 58fc1eef749..afb419e514b 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -608,20 +608,38 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) * Flick into a mode where all blocks get allocated in the new area. */ smm->begin = old_len; - memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); + memcpy(sm, &bootstrap_ops, sizeof(*sm)); /* * Extend. */ r = sm_ll_extend(&smm->ll, extra_blocks); + if (r) + goto out; /* - * Switch back to normal behaviour. + * We repeatedly increment then commit until the commit doesn't + * allocate any new blocks. */ - memcpy(&smm->sm, &ops, sizeof(smm->sm)); - for (i = old_len; !r && i < smm->begin; i++) - r = sm_ll_inc(&smm->ll, i, &ev); + do { + for (i = old_len; !r && i < smm->begin; i++) { + r = sm_ll_inc(&smm->ll, i, &ev); + if (r) + goto out; + } + old_len = smm->begin; + + r = sm_ll_commit(&smm->ll); + if (r) + goto out; + + } while (old_len != smm->begin); +out: + /* + * Switch back to normal behaviour. + */ + memcpy(sm, &ops, sizeof(*sm)); return r; } |