diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/file.c | 16 | ||||
-rw-r--r-- | fs/ext4/inode.c | 45 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 1 |
3 files changed, 48 insertions, 14 deletions
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index dd65fac5ff2f..0d062ffacb24 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -192,15 +192,27 @@ out: } #ifdef CONFIG_FS_DAX +static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate) +{ + struct inode *inode = bh->b_assoc_map->host; + /* XXX: breaks on 32-bit > 16GB. Is that even supported? */ + loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits; + int err; + if (!uptodate) + return; + WARN_ON(!buffer_unwritten(bh)); + err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size); +} + static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_fault(vma, vmf, ext4_get_block); + return dax_fault(vma, vmf, ext4_get_block, ext4_end_io_unwritten); /* Is this the right get_block? */ } static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_mkwrite(vma, vmf, ext4_get_block); + return dax_mkwrite(vma, vmf, ext4_get_block, ext4_end_io_unwritten); } static const struct vm_operations_struct ext4_dax_vm_ops = { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3291e1af0e24..f43996884242 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -656,16 +656,32 @@ has_zeroout: return retval; } -static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate) +/* + * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages + * we have to be careful as someone else may be manipulating b_state as well. + */ +static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags) { - struct inode *inode = bh->b_assoc_map->host; - /* XXX: breaks on 32-bit > 16GB. Is that even supported? */ - loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits; - int err; - if (!uptodate) + unsigned long old_state; + unsigned long new_state; + + flags &= EXT4_MAP_FLAGS; + + /* Dummy buffer_head? Set non-atomically. */ + if (!bh->b_page) { + bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags; return; - WARN_ON(!buffer_unwritten(bh)); - err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size); + } + /* + * Someone else may be modifying b_state. Be careful! This is ugly but + * once we get rid of using bh as a container for mapping information + * to pass to / from get_block functions, this can go away. + */ + do { + old_state = READ_ONCE(bh->b_state); + new_state = (old_state & ~EXT4_MAP_FLAGS) | flags; + } while (unlikely( + cmpxchg(&bh->b_state, old_state, new_state) != old_state)); } /* Maximum number of blocks we map for direct IO at once. */ @@ -704,11 +720,16 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, ext4_io_end_t *io_end = ext4_inode_aio(inode); map_bh(bh, inode->i_sb, map.m_pblk); - bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; - if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) { + ext4_update_bh_state(bh, map.m_flags); + if (IS_DAX(inode) && buffer_unwritten(bh)) { + /* + * dgc: I suspect unwritten conversion on ext4+DAX is + * fundamentally broken here when there are concurrent + * read/write in progress on this inode. + */ + WARN_ON_ONCE(io_end); bh->b_assoc_map = inode->i_mapping; bh->b_private = (void *)(unsigned long)iblock; - bh->b_end_io = ext4_end_io_unwritten; } if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) set_buffer_defer_completion(bh); @@ -1655,7 +1676,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, return ret; map_bh(bh, inode->i_sb, map.m_pblk); - bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; + ext4_update_bh_state(bh, map.m_flags); if (buffer_unwritten(bh)) { /* A delayed write to unwritten bh should be marked diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 7da8ac1047f8..3fb92abe5707 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -393,6 +393,7 @@ data_copy: *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0); if (*err < 0) break; + bh = bh->b_this_page; } if (!*err) *err = block_commit_write(pagep[0], from, from + replaced_size); |