aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorJamie Iles <jamie.iles@oracle.com>2013-02-21 10:18:51 +0000
committerWilly Tarreau <w@1wt.eu>2013-06-10 11:43:16 +0200
commit82c1ce5414718cfee2653c28fe7b9aa268fa0e48 (patch)
tree36218690a052689a79433fd9b96b226e6ade36e5 /fs
parent83c866120f3ae54640dd1fb37efb92c49cf5476d (diff)
CVE-2012-4508 kernel: ext4: AIO vs fallocate stale data exposure
CVE-2012-4508 kernel: ext4: AIO vs fallocate stale data exposure [dannf: backported to Debian's 2.6.32] According to Ben : > The original upstream commits were c278531d39f3158bfee93dc67da0b77e09776de2, > 60d4616f3dc63371b3dc367e5e88fd4b4f037f65 and (most importantly) > dee1f973ca341c266229faa5a1a5bb268bed3531 by Dmitry Monakhov > <dmonakhov@openvz.org>. They were backported into the RHEL 6 kernel by > Lukas Czerner, according to its changelog. Dann got this version from > Oracle's redpatch repository, where, if I understand rightly, Jamie Iles > attempted to regenerate Lukas's patch(es). Cc: Ben Hutchings <ben@decadent.org.uk> Signed-off-by: Jamie Iles <jamie@jamieiles.com> Signed-off-by: Willy Tarreau <w@1wt.eu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/extents.c67
1 files changed, 66 insertions, 1 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f4b471d13260..3f022eac581a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -62,6 +62,7 @@ ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
* idx_pblock:
* combine low and high parts of a leaf physical block number into ext4_fsblk_t
*/
+#define EXT4_EXT_DATA_VALID 0x8 /* extent contains valid data */
ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
{
ext4_fsblk_t block;
@@ -2933,6 +2934,30 @@ static int ext4_split_unwritten_extents(handle_t *handle,
ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
if (err == -ENOSPC && may_zeroout) {
+ /*
+ * This is different from the upstream, because we
+ * need only a flag to say that the extent contains
+ * the actual data.
+ *
+ * If the extent contains valid data, which can only
+ * happen if AIO races with fallocate, then we got
+ * here from ext4_convert_unwritten_extents_dio().
+ * So we have to be careful not to zeroout valid data
+ * in the extent.
+ *
+ * To avoid it, we only zeroout the ex3 and extend the
+ * extent which is going to become initialized to cover
+ * ex3 as well. and continue as we would if only
+ * split in two was required.
+ */
+ if (flags & EXT4_EXT_DATA_VALID) {
+ err = ext4_ext_zeroout(inode, ex3);
+ if (err)
+ goto fix_extent_len;
+ max_blocks = allocated;
+ ex2->ee_len = cpu_to_le16(max_blocks);
+ goto skip;
+ }
err = ext4_ext_zeroout(inode, &orig_ex);
if (err)
goto fix_extent_len;
@@ -2978,6 +3003,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
allocated = max_blocks;
}
+skip:
/*
* If there was a change of depth as part of the
* insertion of ex3 above, we need to update the length
@@ -3030,11 +3056,16 @@ fix_extent_len:
ext4_ext_dirty(handle, inode, path + depth);
return err;
}
+
static int ext4_convert_unwritten_extents_dio(handle_t *handle,
struct inode *inode,
+ ext4_lblk_t iblock,
+ unsigned int max_blocks,
struct ext4_ext_path *path)
{
struct ext4_extent *ex;
+ ext4_lblk_t ee_block;
+ unsigned int ee_len;
struct ext4_extent_header *eh;
int depth;
int err = 0;
@@ -3043,6 +3074,30 @@ static int ext4_convert_unwritten_extents_dio(handle_t *handle,
depth = ext_depth(inode);
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_len = ext4_ext_get_actual_len(ex);
+
+ ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
+ "block %llu, max_blocks %u\n", inode->i_ino,
+ (unsigned long long)ee_block, ee_len);
+
+ /* If extent is larger than requested then split is required */
+
+ if (ee_block != iblock || ee_len > max_blocks) {
+ err = ext4_split_unwritten_extents(handle, inode, path,
+ iblock, max_blocks,
+ EXT4_EXT_DATA_VALID);
+ if (err < 0)
+ goto out;
+ ext4_ext_drop_refs(path);
+ path = ext4_ext_find_extent(inode, iblock, path);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ goto out;
+ }
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ }
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
@@ -3129,7 +3184,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
/* async DIO end_io complete, convert the filled extent to written */
if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
ret = ext4_convert_unwritten_extents_dio(handle, inode,
- path);
+ iblock, max_blocks,
+ path);
if (ret >= 0)
ext4_update_inode_fsync_trans(handle, inode, 1);
goto out2;
@@ -3498,6 +3554,12 @@ void ext4_ext_truncate(struct inode *inode)
int err = 0;
/*
+ * finish any pending end_io work so we won't run the risk of
+ * converting any truncated blocks to initialized later
+ */
+ flush_aio_dio_completed_IO(inode);
+
+ /*
* probably first extent we're gonna free will be last in block
*/
err = ext4_writepage_trans_blocks(inode);
@@ -3630,6 +3692,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
mutex_unlock(&inode->i_mutex);
return ret;
}
+
+ /* Prevent race condition between unwritten */
+ flush_aio_dio_completed_IO(inode);
retry:
while (ret >= 0 && ret < max_blocks) {
block = block + ret;