aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-integrity.c536
1 files changed, 503 insertions, 33 deletions
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index fb8935d80842..54b3fe1403a8 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -24,6 +24,7 @@
#define DEFAULT_INTERLEAVE_SECTORS 32768
#define DEFAULT_JOURNAL_SIZE_FACTOR 7
+#define DEFAULT_SECTORS_PER_BITMAP_BIT 32768
#define DEFAULT_BUFFER_SECTORS 128
#define DEFAULT_JOURNAL_WATERMARK 50
#define DEFAULT_SYNC_MSEC 10000
@@ -33,6 +34,8 @@
#define METADATA_WORKQUEUE_MAX_ACTIVE 16
#define RECALC_SECTORS 8192
#define RECALC_WRITE_SUPER 16
+#define BITMAP_BLOCK_SIZE 4096 /* don't change it */
+#define BITMAP_FLUSH_INTERVAL (10 * HZ)
/*
* Warning - DEBUG_PRINT prints security-sensitive data to the log,
@@ -48,6 +51,7 @@
#define SB_MAGIC "integrt"
#define SB_VERSION_1 1
#define SB_VERSION_2 2
+#define SB_VERSION_3 3
#define SB_SECTORS 8
#define MAX_SECTORS_PER_BLOCK 8
@@ -60,12 +64,14 @@ struct superblock {
__u64 provided_data_sectors; /* userspace uses this value */
__u32 flags;
__u8 log2_sectors_per_block;
- __u8 pad[3];
+ __u8 log2_blocks_per_bitmap_bit;
+ __u8 pad[2];
__u64 recalc_sector;
};
#define SB_FLAG_HAVE_JOURNAL_MAC 0x1
#define SB_FLAG_RECALCULATING 0x2
+#define SB_FLAG_DIRTY_BITMAP 0x4
#define JOURNAL_ENTRY_ROUNDUP 8
@@ -155,9 +161,16 @@ struct dm_integrity_c {
struct workqueue_struct *metadata_wq;
struct superblock *sb;
unsigned journal_pages;
+ unsigned n_bitmap_blocks;
+
struct page_list *journal;
struct page_list *journal_io;
struct page_list *journal_xor;
+ struct page_list *recalc_bitmap;
+ struct page_list *may_write_bitmap;
+ struct bitmap_block_status *bbs;
+ unsigned bitmap_flush_interval;
+ struct delayed_work bitmap_flush_work;
struct crypto_skcipher *journal_crypt;
struct scatterlist **journal_scatterlist;
@@ -184,6 +197,7 @@ struct dm_integrity_c {
__s8 log2_metadata_run;
__u8 log2_buffer_sectors;
__u8 sectors_per_block;
+ __u8 log2_blocks_per_bitmap_bit;
unsigned char mode;
int suspending;
@@ -236,6 +250,7 @@ struct dm_integrity_c {
bool journal_uptodate;
bool just_formatted;
+ bool recalculate_flag;
struct alg_spec internal_hash_alg;
struct alg_spec journal_crypt_alg;
@@ -292,6 +307,16 @@ struct journal_io {
struct journal_completion *comp;
};
+struct bitmap_block_status {
+ struct work_struct work;
+ struct dm_integrity_c *ic;
+ unsigned idx;
+ unsigned long *bitmap;
+ struct bio_list bio_queue;
+ spinlock_t bio_queue_lock;
+
+};
+
static struct kmem_cache *journal_io_cache;
#define JOURNAL_IO_MEMPOOL 32
@@ -427,7 +452,9 @@ static void wraparound_section(struct dm_integrity_c *ic, unsigned *sec_ptr)
static void sb_set_version(struct dm_integrity_c *ic)
{
- if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
+ if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP))
+ ic->sb->version = SB_VERSION_3;
+ else if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
ic->sb->version = SB_VERSION_2;
else
ic->sb->version = SB_VERSION_1;
@@ -451,6 +478,135 @@ static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags)
return dm_io(&io_req, 1, &io_loc, NULL);
}
+#define BITMAP_OP_TEST_ALL_SET 0
+#define BITMAP_OP_TEST_ALL_CLEAR 1
+#define BITMAP_OP_SET 2
+#define BITMAP_OP_CLEAR 3
+
+static bool block_bitmap_op(struct dm_integrity_c *ic, struct page_list *bitmap, sector_t sector, sector_t n_sectors, int mode)
+{
+ unsigned long bit, end_bit, this_end_bit, page, end_page;
+ unsigned long *data;
+
+ if (unlikely(((sector | n_sectors) & ((1 << ic->sb->log2_sectors_per_block) - 1)) != 0)) {
+ DMCRIT("invalid bitmap access (%llx,%llx,%d,%d,%d)\n",
+ (unsigned long long)sector,
+ (unsigned long long)n_sectors,
+ ic->sb->log2_sectors_per_block,
+ ic->log2_blocks_per_bitmap_bit,
+ mode);
+ BUG();
+ }
+
+ if (unlikely(!n_sectors))
+ return true;
+
+ bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
+ end_bit = (sector + n_sectors - 1) >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
+
+ page = bit / (PAGE_SIZE * 8);
+ bit %= PAGE_SIZE * 8;
+
+ end_page = end_bit / (PAGE_SIZE * 8);
+ end_bit %= PAGE_SIZE * 8;
+
+repeat:
+ if (page < end_page) {
+ this_end_bit = PAGE_SIZE * 8 - 1;
+ } else {
+ this_end_bit = end_bit;
+ }
+
+ data = lowmem_page_address(bitmap[page].page);
+
+ if (mode == BITMAP_OP_TEST_ALL_SET) {
+ while (bit <= this_end_bit) {
+ if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
+ do {
+ if (data[bit / BITS_PER_LONG] != -1)
+ return false;
+ bit += BITS_PER_LONG;
+ } while (this_end_bit >= bit + BITS_PER_LONG - 1);
+ continue;
+ }
+ if (!test_bit(bit, data))
+ return false;
+ bit++;
+ }
+ } else if (mode == BITMAP_OP_TEST_ALL_CLEAR) {
+ while (bit <= this_end_bit) {
+ if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
+ do {
+ if (data[bit / BITS_PER_LONG] != 0)
+ return false;
+ bit += BITS_PER_LONG;
+ } while (this_end_bit >= bit + BITS_PER_LONG - 1);
+ continue;
+ }
+ if (test_bit(bit, data))
+ return false;
+ bit++;
+ }
+ } else if (mode == BITMAP_OP_SET) {
+ while (bit <= this_end_bit) {
+ if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
+ do {
+ data[bit / BITS_PER_LONG] = -1;
+ bit += BITS_PER_LONG;
+ } while (this_end_bit >= bit + BITS_PER_LONG - 1);
+ continue;
+ }
+ __set_bit(bit, data);
+ bit++;
+ }
+ } else if (mode == BITMAP_OP_CLEAR) {
+ if (!bit && this_end_bit == PAGE_SIZE * 8 - 1)
+ clear_page(data);
+ else while (bit <= this_end_bit) {
+ if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
+ do {
+ data[bit / BITS_PER_LONG] = 0;
+ bit += BITS_PER_LONG;
+ } while (this_end_bit >= bit + BITS_PER_LONG - 1);
+ continue;
+ }
+ __clear_bit(bit, data);
+ bit++;
+ }
+ } else {
+ BUG();
+ }
+
+ if (unlikely(page < end_page)) {
+ bit = 0;
+ page++;
+ goto repeat;
+ }
+
+ return true;
+}
+
+static void block_bitmap_copy(struct dm_integrity_c *ic, struct page_list *dst, struct page_list *src)
+{
+ unsigned n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE);
+ unsigned i;
+
+ for (i = 0; i < n_bitmap_pages; i++) {
+ unsigned long *dst_data = lowmem_page_address(dst[i].page);
+ unsigned long *src_data = lowmem_page_address(src[i].page);
+ copy_page(dst_data, src_data);
+ }
+}
+
+static struct bitmap_block_status *sector_to_bitmap_block(struct dm_integrity_c *ic, sector_t sector)
+{
+ unsigned bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
+ unsigned bitmap_block = bit / (BITMAP_BLOCK_SIZE * 8);
+
+ BUG_ON(bitmap_block >= ic->n_bitmap_blocks);
+ return &ic->bbs[bitmap_block];
+}
+
static void access_journal_check(struct dm_integrity_c *ic, unsigned section, unsigned offset,
bool e, const char *function)
{
@@ -1784,6 +1940,20 @@ offload_to_thread:
goto journal_read_write;
}
+ if (ic->mode == 'B' && dio->write) {
+ if (!block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
+ struct bitmap_block_status *bbs = sector_to_bitmap_block(ic, dio->range.logical_sector);
+
+ spin_lock(&bbs->bio_queue_lock);
+ bio_list_add(&bbs->bio_queue, bio);
+ spin_unlock(&bbs->bio_queue_lock);
+
+ queue_work(ic->writer_wq, &bbs->work);
+
+ return;
+ }
+ }
+
dio->in_flight = (atomic_t)ATOMIC_INIT(2);
if (need_sync_io) {
@@ -1810,10 +1980,14 @@ offload_to_thread:
if (need_sync_io) {
wait_for_completion_io(&read_comp);
- if (unlikely(ic->recalc_wq != NULL) &&
- ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
dio->range.logical_sector + dio->range.n_sectors > le64_to_cpu(ic->sb->recalc_sector))
goto skip_check;
+ if (ic->mode == 'B') {
+ if (!block_bitmap_op(ic, ic->recalc_bitmap, dio->range.logical_sector, dio->range.n_sectors, BITMAP_OP_TEST_ALL_CLEAR))
+ goto skip_check;
+ }
+
if (likely(!bio->bi_status))
integrity_metadata(&dio->work);
else
@@ -1851,8 +2025,22 @@ static void pad_uncommitted(struct dm_integrity_c *ic)
wraparound_section(ic, &ic->free_section);
ic->n_uncommitted_sections++;
}
- WARN_ON(ic->journal_sections * ic->journal_section_entries !=
- (ic->n_uncommitted_sections + ic->n_committed_sections) * ic->journal_section_entries + ic->free_sectors);
+ if (WARN_ON(ic->journal_sections * ic->journal_section_entries !=
+ (ic->n_uncommitted_sections + ic->n_committed_sections) * ic->journal_section_entries + ic->free_sectors)) {
+ printk(KERN_CRIT "dm-integrity: "
+ "journal_sections %u, "
+ "journal_section_entries %u, "
+ "n_uncommitted_sections %u, "
+ "n_committed_sections %u, "
+ "journal_section_entries %u, "
+ "free_sectors %u\n",
+ ic->journal_sections,
+ ic->journal_section_entries,
+ ic->n_uncommitted_sections,
+ ic->n_committed_sections,
+ ic->journal_section_entries,
+ ic->free_sectors);
+ }
}
static void integrity_commit(struct work_struct *w)
@@ -2139,11 +2327,14 @@ static void integrity_recalc(struct work_struct *w)
sector_t area, offset;
sector_t metadata_block;
unsigned metadata_offset;
+ sector_t logical_sector, n_sectors;
__u8 *t;
unsigned i;
int r;
unsigned super_counter = 0;
+ DEBUG_print("start recalculation... (position %llx)\n", le64_to_cpu(ic->sb->recalc_sector));
+
spin_lock_irq(&ic->endio_wait.lock);
next_chunk:
@@ -2152,8 +2343,13 @@ next_chunk:
goto unlock_ret;
range.logical_sector = le64_to_cpu(ic->sb->recalc_sector);
- if (unlikely(range.logical_sector >= ic->provided_data_sectors))
+ if (unlikely(range.logical_sector >= ic->provided_data_sectors)) {
+ if (ic->mode == 'B') {
+ DEBUG_print("queue_delayed_work: bitmap_flush_work\n");
+ queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
+ }
goto unlock_ret;
+ }
get_area_and_offset(ic, range.logical_sector, &area, &offset);
range.n_sectors = min((sector_t)RECALC_SECTORS, ic->provided_data_sectors - range.logical_sector);
@@ -2161,11 +2357,33 @@ next_chunk:
range.n_sectors = min(range.n_sectors, ((sector_t)1U << ic->sb->log2_interleave_sectors) - (unsigned)offset);
add_new_range_and_wait(ic, &range);
-
spin_unlock_irq(&ic->endio_wait.lock);
+ logical_sector = range.logical_sector;
+ n_sectors = range.n_sectors;
+
+ if (ic->mode == 'B') {
+ if (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, n_sectors, BITMAP_OP_TEST_ALL_CLEAR)) {
+ goto advance_and_next;
+ }
+ while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
+ logical_sector += ic->sectors_per_block;
+ n_sectors -= ic->sectors_per_block;
+ cond_resched();
+ }
+ while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector + n_sectors - ic->sectors_per_block, ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
+ n_sectors -= ic->sectors_per_block;
+ cond_resched();
+ }
+ get_area_and_offset(ic, logical_sector, &area, &offset);
+ }
+
+ DEBUG_print("recalculating: %lx, %lx\n", logical_sector, n_sectors);
if (unlikely(++super_counter == RECALC_WRITE_SUPER)) {
recalc_write_super(ic);
+ if (ic->mode == 'B') {
+ queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
+ }
super_counter = 0;
}
@@ -2180,7 +2398,7 @@ next_chunk:
io_req.client = ic->io;
io_loc.bdev = ic->dev->bdev;
io_loc.sector = get_data_sector(ic, area, offset);
- io_loc.count = range.n_sectors;
+ io_loc.count = n_sectors;
r = dm_io(&io_req, 1, &io_loc, NULL);
if (unlikely(r)) {
@@ -2189,8 +2407,8 @@ next_chunk:
}
t = ic->recalc_tags;
- for (i = 0; i < range.n_sectors; i += ic->sectors_per_block) {
- integrity_sector_checksum(ic, range.logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t);
+ for (i = 0; i < n_sectors; i += ic->sectors_per_block) {
+ integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t);
t += ic->tag_size;
}
@@ -2202,6 +2420,9 @@ next_chunk:
goto err;
}
+advance_and_next:
+ cond_resched();
+
spin_lock_irq(&ic->endio_wait.lock);
remove_range_unlocked(ic, &range);
ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors);
@@ -2217,6 +2438,89 @@ unlock_ret:
recalc_write_super(ic);
}
+static void bitmap_block_work(struct work_struct *w)
+{
+ struct bitmap_block_status *bbs = container_of(w, struct bitmap_block_status, work);
+ struct dm_integrity_c *ic = bbs->ic;
+ struct bio *bio;
+ struct bio_list bio_queue;
+ struct bio_list waiting;
+
+ bio_list_init(&waiting);
+
+ spin_lock(&bbs->bio_queue_lock);
+ bio_queue = bbs->bio_queue;
+ bio_list_init(&bbs->bio_queue);
+ spin_unlock(&bbs->bio_queue_lock);
+
+ while ((bio = bio_list_pop(&bio_queue))) {
+ struct dm_integrity_io *dio;
+
+ dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
+
+ if (block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
+ remove_range(ic, &dio->range);
+ INIT_WORK(&dio->work, integrity_bio_wait);
+ queue_work(ic->wait_wq, &dio->work);
+ } else {
+ block_bitmap_op(ic, ic->journal, dio->range.logical_sector, dio->range.n_sectors, BITMAP_OP_SET);
+ bio_list_add(&waiting, bio);
+ }
+ }
+
+ if (bio_list_empty(&waiting))
+ return;
+
+ rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, bbs->idx * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), BITMAP_BLOCK_SIZE >> SECTOR_SHIFT, NULL);
+
+ while ((bio = bio_list_pop(&waiting))) {
+ struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
+
+ block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, dio->range.n_sectors, BITMAP_OP_SET);
+
+ remove_range(ic, &dio->range);
+ INIT_WORK(&dio->work, integrity_bio_wait);
+ queue_work(ic->wait_wq, &dio->work);
+ }
+
+ queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
+}
+
+static void bitmap_flush_work(struct work_struct *work)
+{
+ struct dm_integrity_c *ic = container_of(work, struct dm_integrity_c, bitmap_flush_work.work);
+ struct dm_integrity_range range;
+ unsigned long limit;
+
+ dm_integrity_flush_buffers(ic);
+
+ range.logical_sector = 0;
+ range.n_sectors = ic->provided_data_sectors;
+
+ spin_lock_irq(&ic->endio_wait.lock);
+ add_new_range_and_wait(ic, &range);
+ spin_unlock_irq(&ic->endio_wait.lock);
+
+ dm_integrity_flush_buffers(ic);
+ if (ic->meta_dev)
+ blkdev_issue_flush(ic->dev->bdev, GFP_NOIO, NULL);
+
+ limit = ic->provided_data_sectors;
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
+ limit = le64_to_cpu(ic->sb->recalc_sector)
+ >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)
+ << (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
+ }
+ DEBUG_print("zeroing journal\n");
+ block_bitmap_op(ic, ic->journal, 0, limit, BITMAP_OP_CLEAR);
+ block_bitmap_op(ic, ic->may_write_bitmap, 0, limit, BITMAP_OP_CLEAR);
+
+ rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
+
+ remove_range(ic, &range);
+}
+
+
static void init_journal(struct dm_integrity_c *ic, unsigned start_section,
unsigned n_sections, unsigned char commit_seq)
{
@@ -2416,6 +2720,7 @@ clear_journal:
static void dm_integrity_postsuspend(struct dm_target *ti)
{
struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
+ int r;
del_timer_sync(&ic->autocommit_timer);
@@ -2424,6 +2729,9 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
if (ic->recalc_wq)
drain_workqueue(ic->recalc_wq);
+ if (ic->mode == 'B')
+ cancel_delayed_work_sync(&ic->bitmap_flush_work);
+
queue_work(ic->commit_wq, &ic->commit_work);
drain_workqueue(ic->commit_wq);
@@ -2434,6 +2742,17 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
dm_integrity_flush_buffers(ic);
}
+ if (ic->mode == 'B') {
+ dm_integrity_flush_buffers(ic);
+#if 1
+ init_journal(ic, 0, ic->journal_sections, 0);
+ ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
+ r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
+ if (unlikely(r))
+ dm_integrity_io_error(ic, "writing superblock", r);
+#endif
+ }
+
WRITE_ONCE(ic->suspending, 0);
BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
@@ -2444,11 +2763,65 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
static void dm_integrity_resume(struct dm_target *ti)
{
struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
+ int r;
+ DEBUG_print("resume\n");
+
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) {
+ DEBUG_print("resume dirty_bitmap\n");
+ rw_journal_sectors(ic, REQ_OP_READ, 0, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
+ if (ic->mode == 'B') {
+ if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) {
+ block_bitmap_copy(ic, ic->recalc_bitmap, ic->journal);
+ block_bitmap_copy(ic, ic->may_write_bitmap, ic->journal);
+ if (!block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_TEST_ALL_CLEAR)) {
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
+ ic->sb->recalc_sector = cpu_to_le64(0);
+ }
+ } else {
+ DEBUG_print("non-matching blocks_per_bitmap_bit: %u, %u\n", ic->sb->log2_blocks_per_bitmap_bit, ic->log2_blocks_per_bitmap_bit);
+ ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
+ block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
+ block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
+ block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_SET);
+ rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
+ ic->sb->recalc_sector = cpu_to_le64(0);
+ }
+ } else {
+ if (!(ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit &&
+ block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_TEST_ALL_CLEAR))) {
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
+ ic->sb->recalc_sector = cpu_to_le64(0);
+ }
+ init_journal(ic, 0, ic->journal_sections, 0);
+ replay_journal(ic);
+ ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
+ }
+ r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
+ if (unlikely(r))
+ dm_integrity_io_error(ic, "writing superblock", r);
+ } else {
+ replay_journal(ic);
+ if (ic->mode == 'B') {
+ int mode;
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
+ ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
+ r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
+ if (unlikely(r))
+ dm_integrity_io_error(ic, "writing superblock", r);
+
+ mode = ic->recalculate_flag ? BITMAP_OP_SET : BITMAP_OP_CLEAR;
+ block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, mode);
+ block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, mode);
+ block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, mode);
+ rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
+ }
+ }
- replay_journal(ic);
-
- if (ic->recalc_wq && ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
+ DEBUG_print("testing recalc: %x\n", ic->sb->flags);
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
__u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector);
+ DEBUG_print("recalc pos: %lx / %lx\n", (long)recalc_pos, ic->provided_data_sectors);
if (recalc_pos < ic->provided_data_sectors) {
queue_work(ic->recalc_wq, &ic->recalc_work);
} else if (recalc_pos > ic->provided_data_sectors) {
@@ -2486,6 +2859,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING));
arg_count += ic->mode == 'J';
arg_count += ic->mode == 'J';
+ arg_count += ic->mode == 'B';
+ arg_count += ic->mode == 'B';
arg_count += !!ic->internal_hash_alg.alg_string;
arg_count += !!ic->journal_crypt_alg.alg_string;
arg_count += !!ic->journal_mac_alg.alg_string;
@@ -2495,7 +2870,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
DMEMIT(" meta_device:%s", ic->meta_dev->name);
if (ic->sectors_per_block != 1)
DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
- if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
+ if (ic->recalculate_flag)
DMEMIT(" recalculate");
DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
@@ -2504,6 +2879,10 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
DMEMIT(" commit_time:%u", ic->autocommit_msec);
}
+ if (ic->mode == 'B') {
+ DMEMIT(" sectors_per_bit:%llu", (unsigned long long)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit);
+ DMEMIT(" bitmap_flush_interval:%u", jiffies_to_msecs(ic->bitmap_flush_interval));
+ }
#define EMIT_ALG(a, n) \
do { \
@@ -3085,7 +3464,7 @@ bad:
* device
* offset from the start of the device
* tag size
- * D - direct writes, J - journal writes, R - recovery mode
+ * D - direct writes, J - journal writes, B - bitmap mode, R - recovery mode
* number of optional arguments
* optional arguments:
* journal_sectors
@@ -3095,6 +3474,8 @@ bad:
* commit_time
* meta_device
* block_size
+ * sectors_per_bit
+ * bitmap_flush_interval
* internal_hash
* journal_crypt
* journal_mac
@@ -3111,10 +3492,13 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
{0, 9, "Invalid number of feature args"},
};
unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
- bool recalculate;
bool should_write_sb;
__u64 threshold;
unsigned long long start;
+ __s8 log2_sectors_per_bitmap_bit = -1;
+ __s8 log2_blocks_per_bitmap_bit;
+ __u64 bits_in_journal;
+ __u64 n_bitmap_bits;
#define DIRECT_ARGUMENTS 4
@@ -3138,6 +3522,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
init_waitqueue_head(&ic->copy_to_journal_wait);
init_completion(&ic->crypto_backoff);
atomic64_set(&ic->number_of_mismatches, 0);
+ ic->bitmap_flush_interval = BITMAP_FLUSH_INTERVAL;
r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev);
if (r) {
@@ -3160,10 +3545,10 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
}
- if (!strcmp(argv[3], "J") || !strcmp(argv[3], "D") || !strcmp(argv[3], "R"))
+ if (!strcmp(argv[3], "J") || !strcmp(argv[3], "B") || !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) {
ic->mode = argv[3][0];
- else {
- ti->error = "Invalid mode (expecting J, D, R)";
+ } else {
+ ti->error = "Invalid mode (expecting J, B, D, R)";
r = -EINVAL;
goto bad;
}
@@ -3173,7 +3558,6 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
buffer_sectors = DEFAULT_BUFFER_SECTORS;
journal_watermark = DEFAULT_JOURNAL_WATERMARK;
sync_msec = DEFAULT_SYNC_MSEC;
- recalculate = false;
ic->sectors_per_block = 1;
as.argc = argc - DIRECT_ARGUMENTS;
@@ -3185,6 +3569,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
while (extra_args--) {
const char *opt_string;
unsigned val;
+ unsigned long long llval;
opt_string = dm_shift_arg(&as);
if (!opt_string) {
r = -EINVAL;
@@ -3220,6 +3605,14 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
ic->sectors_per_block = val >> SECTOR_SHIFT;
+ } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) {
+ log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval);
+ } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) {
+ if (val >= (uint64_t)UINT_MAX * 1000 / HZ) {
+ r = -EINVAL;
+ ti->error = "Invalid bitmap_flush_interval argument";
+ }
+ ic->bitmap_flush_interval = msecs_to_jiffies(val);
} else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error,
"Invalid internal_hash argument");
@@ -3236,7 +3629,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
if (r)
goto bad;
} else if (!strcmp(opt_string, "recalculate")) {
- recalculate = true;
+ ic->recalculate_flag = true;
} else {
r = -EINVAL;
ti->error = "Invalid argument";
@@ -3287,6 +3680,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
else
ic->log2_tag_size = -1;
+ if (ic->mode == 'B' && !ic->internal_hash) {
+ r = -EINVAL;
+ ti->error = "Bitmap mode can be only used with internal hash";
+ goto bad;
+ }
+
ic->autocommit_jiffies = msecs_to_jiffies(sync_msec);
ic->autocommit_msec = sync_msec;
timer_setup(&ic->autocommit_timer, autocommit_fn, 0);
@@ -3332,7 +3731,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
INIT_WORK(&ic->commit_work, integrity_commit);
- if (ic->mode == 'J') {
+ if (ic->mode == 'J' || ic->mode == 'B') {
ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1);
if (!ic->writer_wq) {
ti->error = "Cannot allocate workqueue";
@@ -3373,7 +3772,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
should_write_sb = true;
}
- if (!ic->sb->version || ic->sb->version > SB_VERSION_2) {
+ if (!ic->sb->version || ic->sb->version > SB_VERSION_3) {
r = -EINVAL;
ti->error = "Unknown version";
goto bad;
@@ -3433,6 +3832,27 @@ try_smaller_buffer:
ti->error = "The device is too small";
goto bad;
}
+
+ if (log2_sectors_per_bitmap_bit < 0)
+ log2_sectors_per_bitmap_bit = __fls(DEFAULT_SECTORS_PER_BITMAP_BIT);
+ if (log2_sectors_per_bitmap_bit < ic->sb->log2_sectors_per_block)
+ log2_sectors_per_bitmap_bit = ic->sb->log2_sectors_per_block;
+
+ bits_in_journal = ((__u64)ic->journal_section_sectors * ic->journal_sections) << (SECTOR_SHIFT + 3);
+ if (bits_in_journal > UINT_MAX)
+ bits_in_journal = UINT_MAX;
+ while (bits_in_journal < (ic->provided_data_sectors + ((sector_t)1 << log2_sectors_per_bitmap_bit) - 1) >> log2_sectors_per_bitmap_bit)
+ log2_sectors_per_bitmap_bit++;
+
+ log2_blocks_per_bitmap_bit = log2_sectors_per_bitmap_bit - ic->sb->log2_sectors_per_block;
+ ic->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit;
+ if (should_write_sb) {
+ ic->sb->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit;
+ }
+ n_bitmap_bits = ((ic->provided_data_sectors >> ic->sb->log2_sectors_per_block)
+ + (((sector_t)1 << log2_blocks_per_bitmap_bit) - 1)) >> log2_blocks_per_bitmap_bit;
+ ic->n_bitmap_blocks = DIV_ROUND_UP(n_bitmap_bits, BITMAP_BLOCK_SIZE * 8);
+
if (!ic->meta_dev)
ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run));
@@ -3457,25 +3877,21 @@ try_smaller_buffer:
DEBUG_print(" journal_sections %u\n", (unsigned)le32_to_cpu(ic->sb->journal_sections));
DEBUG_print(" journal_entries %u\n", ic->journal_entries);
DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors);
- DEBUG_print(" data_device_sectors 0x%llx\n", (unsigned long long)ic->data_device_sectors);
+ DEBUG_print(" data_device_sectors 0x%llx\n", i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT);
DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors);
DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run);
DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run);
DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", (unsigned long long)ic->provided_data_sectors,
(unsigned long long)ic->provided_data_sectors);
DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors);
+ DEBUG_print(" bits_in_journal %llu\n", (unsigned long long)bits_in_journal);
- if (recalculate && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) {
+ if (ic->recalculate_flag && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) {
ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
ic->sb->recalc_sector = cpu_to_le64(0);
}
- if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
- if (!ic->internal_hash) {
- r = -EINVAL;
- ti->error = "Recalculate is only valid with internal hash";
- goto bad;
- }
+ if (ic->internal_hash) {
ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
if (!ic->recalc_wq ) {
ti->error = "Cannot allocate workqueue";
@@ -3512,6 +3928,45 @@ try_smaller_buffer:
r = create_journal(ic, &ti->error);
if (r)
goto bad;
+
+ }
+
+ if (ic->mode == 'B') {
+ unsigned i;
+ unsigned n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE);
+
+ ic->recalc_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
+ if (!ic->recalc_bitmap) {
+ r = -ENOMEM;
+ goto bad;
+ }
+ ic->may_write_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
+ if (!ic->may_write_bitmap) {
+ r = -ENOMEM;
+ goto bad;
+ }
+ ic->bbs = kvmalloc_array(ic->n_bitmap_blocks, sizeof(struct bitmap_block_status), GFP_KERNEL);
+ if (!ic->bbs) {
+ r = -ENOMEM;
+ goto bad;
+ }
+ INIT_DELAYED_WORK(&ic->bitmap_flush_work, bitmap_flush_work);
+ for (i = 0; i < ic->n_bitmap_blocks; i++) {
+ struct bitmap_block_status *bbs = &ic->bbs[i];
+ unsigned sector, pl_index, pl_offset;
+
+ INIT_WORK(&bbs->work, bitmap_block_work);
+ bbs->ic = ic;
+ bbs->idx = i;
+ bio_list_init(&bbs->bio_queue);
+ spin_lock_init(&bbs->bio_queue_lock);
+
+ sector = i * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT);
+ pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
+ pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
+
+ bbs->bitmap = lowmem_page_address(ic->journal[pl_index].page) + pl_offset;
+ }
}
if (should_write_sb) {
@@ -3536,6 +3991,17 @@ try_smaller_buffer:
if (r)
goto bad;
}
+ if (ic->mode == 'B') {
+ unsigned max_io_len = ((sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit) * (BITMAP_BLOCK_SIZE * 8);
+ if (!max_io_len)
+ max_io_len = 1U << 31;
+ DEBUG_print("max_io_len: old %u, new %u\n", ti->max_io_len, max_io_len);
+ if (!ti->max_io_len || ti->max_io_len > max_io_len) {
+ r = dm_set_target_max_io_len(ti, max_io_len);
+ if (r)
+ goto bad;
+ }
+ }
if (!ic->internal_hash)
dm_integrity_set(ti, ic);
@@ -3544,6 +4010,7 @@ try_smaller_buffer:
ti->flush_supported = true;
return 0;
+
bad:
dm_integrity_dtr(ti);
return r;
@@ -3568,6 +4035,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
destroy_workqueue(ic->recalc_wq);
vfree(ic->recalc_buffer);
kvfree(ic->recalc_tags);
+ kvfree(ic->bbs);
if (ic->bufio)
dm_bufio_client_destroy(ic->bufio);
mempool_exit(&ic->journal_io_mempool);
@@ -3580,6 +4048,8 @@ static void dm_integrity_dtr(struct dm_target *ti)
dm_integrity_free_page_list(ic->journal);
dm_integrity_free_page_list(ic->journal_io);
dm_integrity_free_page_list(ic->journal_xor);
+ dm_integrity_free_page_list(ic->recalc_bitmap);
+ dm_integrity_free_page_list(ic->may_write_bitmap);
if (ic->journal_scatterlist)
dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist);
if (ic->journal_io_scatterlist)
@@ -3617,7 +4087,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
static struct target_type integrity_target = {
.name = "integrity",
- .version = {1, 2, 0},
+ .version = {1, 3, 0},
.module = THIS_MODULE,
.features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
.ctr = dm_integrity_ctr,