aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorJulia Cartwright <julia@ni.com>2018-12-20 15:24:26 -0600
committerJulia Cartwright <julia@ni.com>2018-12-20 15:24:26 -0600
commita85a31922e531173f235d62fc36f09acb66763f4 (patch)
tree9c22031b92693d1f250f014073640f7e655301bc /fs
parent290f730d12b100534000f77bf1615b5428fa727a (diff)
parent1aa861ff238ecd17a3095b0dbd2d20bdf7bfaf14 (diff)
Merge tag 'v4.9.144' into v4.9-rt
This is the 4.9.144 stable release
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/ctree.h128
-rw-r--r--fs/btrfs/disk-io.c162
-rw-r--r--fs/btrfs/extent-tree.c86
-rw-r--r--fs/btrfs/extent_io.c24
-rw-r--r--fs/btrfs/extent_io.h19
-rw-r--r--fs/btrfs/free-space-cache.c2
-rw-r--r--fs/btrfs/struct-funcs.c9
-rw-r--r--fs/btrfs/tree-checker.c649
-rw-r--r--fs/btrfs/tree-checker.h38
-rw-r--r--fs/btrfs/volumes.c30
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/ceph/mds_client.c15
-rw-r--r--fs/f2fs/checkpoint.c46
-rw-r--r--fs/f2fs/data.c33
-rw-r--r--fs/f2fs/f2fs.h41
-rw-r--r--fs/f2fs/file.c21
-rw-r--r--fs/f2fs/inode.c68
-rw-r--r--fs/f2fs/node.c73
-rw-r--r--fs/f2fs/recovery.c6
-rw-r--r--fs/f2fs/segment.c34
-rw-r--r--fs/f2fs/segment.h48
-rw-r--r--fs/f2fs/super.c107
-rw-r--r--fs/hugetlbfs/inode.c30
-rw-r--r--fs/kernfs/symlink.c2
-rw-r--r--fs/udf/super.c16
-rw-r--r--fs/udf/unicode.c14
-rw-r--r--fs/xfs/libxfs/xfs_attr.c9
28 files changed, 1378 insertions, 336 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 128ce17a80b0..076ccfb44c28 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -9,7 +9,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
- uuid-tree.o props.o hash.o free-space-tree.o
+ uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 86245b884fce..a423c36bcd72 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1415,7 +1415,7 @@ do { \
#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
struct btrfs_map_token {
- struct extent_buffer *eb;
+ const struct extent_buffer *eb;
char *kaddr;
unsigned long offset;
};
@@ -1449,18 +1449,19 @@ static inline void btrfs_init_map_token (struct btrfs_map_token *token)
sizeof(((type *)0)->member)))
#define DECLARE_BTRFS_SETGET_BITS(bits) \
-u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
- unsigned long off, \
- struct btrfs_map_token *token); \
-void btrfs_set_token_##bits(struct extent_buffer *eb, void *ptr, \
+u##bits btrfs_get_token_##bits(const struct extent_buffer *eb, \
+ const void *ptr, unsigned long off, \
+ struct btrfs_map_token *token); \
+void btrfs_set_token_##bits(struct extent_buffer *eb, const void *ptr, \
unsigned long off, u##bits val, \
struct btrfs_map_token *token); \
-static inline u##bits btrfs_get_##bits(struct extent_buffer *eb, void *ptr, \
+static inline u##bits btrfs_get_##bits(const struct extent_buffer *eb, \
+ const void *ptr, \
unsigned long off) \
{ \
return btrfs_get_token_##bits(eb, ptr, off, NULL); \
} \
-static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr, \
+static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr,\
unsigned long off, u##bits val) \
{ \
btrfs_set_token_##bits(eb, ptr, off, val, NULL); \
@@ -1472,7 +1473,8 @@ DECLARE_BTRFS_SETGET_BITS(32)
DECLARE_BTRFS_SETGET_BITS(64)
#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
-static inline u##bits btrfs_##name(struct extent_buffer *eb, type *s) \
+static inline u##bits btrfs_##name(const struct extent_buffer *eb, \
+ const type *s) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
return btrfs_get_##bits(eb, s, offsetof(type, member)); \
@@ -1483,7 +1485,8 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, type *s, \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
btrfs_set_##bits(eb, s, offsetof(type, member), val); \
} \
-static inline u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, \
+static inline u##bits btrfs_token_##name(const struct extent_buffer *eb,\
+ const type *s, \
struct btrfs_map_token *token) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
@@ -1498,9 +1501,9 @@ static inline void btrfs_set_token_##name(struct extent_buffer *eb, \
}
#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
-static inline u##bits btrfs_##name(struct extent_buffer *eb) \
+static inline u##bits btrfs_##name(const struct extent_buffer *eb) \
{ \
- type *p = page_address(eb->pages[0]); \
+ const type *p = page_address(eb->pages[0]); \
u##bits res = le##bits##_to_cpu(p->member); \
return res; \
} \
@@ -1512,7 +1515,7 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \
}
#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \
-static inline u##bits btrfs_##name(type *s) \
+static inline u##bits btrfs_##name(const type *s) \
{ \
return le##bits##_to_cpu(s->member); \
} \
@@ -1818,7 +1821,7 @@ static inline unsigned long btrfs_node_key_ptr_offset(int nr)
sizeof(struct btrfs_key_ptr) * nr;
}
-void btrfs_node_key(struct extent_buffer *eb,
+void btrfs_node_key(const struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr);
static inline void btrfs_set_node_key(struct extent_buffer *eb,
@@ -1847,28 +1850,28 @@ static inline struct btrfs_item *btrfs_item_nr(int nr)
return (struct btrfs_item *)btrfs_item_nr_offset(nr);
}
-static inline u32 btrfs_item_end(struct extent_buffer *eb,
+static inline u32 btrfs_item_end(const struct extent_buffer *eb,
struct btrfs_item *item)
{
return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item);
}
-static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
+static inline u32 btrfs_item_end_nr(const struct extent_buffer *eb, int nr)
{
return btrfs_item_end(eb, btrfs_item_nr(nr));
}
-static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr)
+static inline u32 btrfs_item_offset_nr(const struct extent_buffer *eb, int nr)
{
return btrfs_item_offset(eb, btrfs_item_nr(nr));
}
-static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
+static inline u32 btrfs_item_size_nr(const struct extent_buffer *eb, int nr)
{
return btrfs_item_size(eb, btrfs_item_nr(nr));
}
-static inline void btrfs_item_key(struct extent_buffer *eb,
+static inline void btrfs_item_key(const struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
struct btrfs_item *item = btrfs_item_nr(nr);
@@ -1904,8 +1907,8 @@ BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item,
BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item,
transid, 64);
-static inline void btrfs_dir_item_key(struct extent_buffer *eb,
- struct btrfs_dir_item *item,
+static inline void btrfs_dir_item_key(const struct extent_buffer *eb,
+ const struct btrfs_dir_item *item,
struct btrfs_disk_key *key)
{
read_eb_member(eb, item, struct btrfs_dir_item, location, key);
@@ -1913,7 +1916,7 @@ static inline void btrfs_dir_item_key(struct extent_buffer *eb,
static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
struct btrfs_dir_item *item,
- struct btrfs_disk_key *key)
+ const struct btrfs_disk_key *key)
{
write_eb_member(eb, item, struct btrfs_dir_item, location, key);
}
@@ -1925,8 +1928,8 @@ BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
generation, 64);
-static inline void btrfs_free_space_key(struct extent_buffer *eb,
- struct btrfs_free_space_header *h,
+static inline void btrfs_free_space_key(const struct extent_buffer *eb,
+ const struct btrfs_free_space_header *h,
struct btrfs_disk_key *key)
{
read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
@@ -1934,7 +1937,7 @@ static inline void btrfs_free_space_key(struct extent_buffer *eb,
static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
struct btrfs_free_space_header *h,
- struct btrfs_disk_key *key)
+ const struct btrfs_disk_key *key)
{
write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
}
@@ -1961,25 +1964,25 @@ static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
disk->objectid = cpu_to_le64(cpu->objectid);
}
-static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb,
- struct btrfs_key *key, int nr)
+static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb,
+ struct btrfs_key *key, int nr)
{
struct btrfs_disk_key disk_key;
btrfs_node_key(eb, &disk_key, nr);
btrfs_disk_key_to_cpu(key, &disk_key);
}
-static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb,
- struct btrfs_key *key, int nr)
+static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb,
+ struct btrfs_key *key, int nr)
{
struct btrfs_disk_key disk_key;
btrfs_item_key(eb, &disk_key, nr);
btrfs_disk_key_to_cpu(key, &disk_key);
}
-static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
- struct btrfs_dir_item *item,
- struct btrfs_key *key)
+static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb,
+ const struct btrfs_dir_item *item,
+ struct btrfs_key *key)
{
struct btrfs_disk_key disk_key;
btrfs_dir_item_key(eb, item, &disk_key);
@@ -2012,7 +2015,7 @@ BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header,
nritems, 32);
BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64);
-static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
+static inline int btrfs_header_flag(const struct extent_buffer *eb, u64 flag)
{
return (btrfs_header_flags(eb) & flag) == flag;
}
@@ -2031,7 +2034,7 @@ static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
return (flags & flag) == flag;
}
-static inline int btrfs_header_backref_rev(struct extent_buffer *eb)
+static inline int btrfs_header_backref_rev(const struct extent_buffer *eb)
{
u64 flags = btrfs_header_flags(eb);
return flags >> BTRFS_BACKREF_REV_SHIFT;
@@ -2051,12 +2054,12 @@ static inline unsigned long btrfs_header_fsid(void)
return offsetof(struct btrfs_header, fsid);
}
-static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
+static inline unsigned long btrfs_header_chunk_tree_uuid(const struct extent_buffer *eb)
{
return offsetof(struct btrfs_header, chunk_tree_uuid);
}
-static inline int btrfs_is_leaf(struct extent_buffer *eb)
+static inline int btrfs_is_leaf(const struct extent_buffer *eb)
{
return btrfs_header_level(eb) == 0;
}
@@ -2090,12 +2093,12 @@ BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item,
BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item,
rtransid, 64);
-static inline bool btrfs_root_readonly(struct btrfs_root *root)
+static inline bool btrfs_root_readonly(const struct btrfs_root *root)
{
return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
}
-static inline bool btrfs_root_dead(struct btrfs_root *root)
+static inline bool btrfs_root_dead(const struct btrfs_root *root)
{
return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0;
}
@@ -2152,51 +2155,51 @@ BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
/* struct btrfs_balance_item */
BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
-static inline void btrfs_balance_data(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
+static inline void btrfs_balance_data(const struct extent_buffer *eb,
+ const struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
read_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
}
static inline void btrfs_set_balance_data(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
- struct btrfs_disk_balance_args *ba)
+ struct btrfs_balance_item *bi,
+ const struct btrfs_disk_balance_args *ba)
{
write_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
}
-static inline void btrfs_balance_meta(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
+static inline void btrfs_balance_meta(const struct extent_buffer *eb,
+ const struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
}
static inline void btrfs_set_balance_meta(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
- struct btrfs_disk_balance_args *ba)
+ struct btrfs_balance_item *bi,
+ const struct btrfs_disk_balance_args *ba)
{
write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
}
-static inline void btrfs_balance_sys(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
+static inline void btrfs_balance_sys(const struct extent_buffer *eb,
+ const struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
}
static inline void btrfs_set_balance_sys(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
- struct btrfs_disk_balance_args *ba)
+ struct btrfs_balance_item *bi,
+ const struct btrfs_disk_balance_args *ba)
{
write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
}
static inline void
btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
- struct btrfs_disk_balance_args *disk)
+ const struct btrfs_disk_balance_args *disk)
{
memset(cpu, 0, sizeof(*cpu));
@@ -2216,7 +2219,7 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
static inline void
btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
- struct btrfs_balance_args *cpu)
+ const struct btrfs_balance_args *cpu)
{
memset(disk, 0, sizeof(*disk));
@@ -2284,7 +2287,7 @@ BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
uuid_tree_generation, 64);
-static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
+static inline int btrfs_super_csum_size(const struct btrfs_super_block *s)
{
u16 t = btrfs_super_csum_type(s);
/*
@@ -2303,8 +2306,8 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
* this returns the address of the start of the last item,
* which is the stop of the leaf data stack
*/
-static inline unsigned int leaf_data_end(struct btrfs_root *root,
- struct extent_buffer *leaf)
+static inline unsigned int leaf_data_end(const struct btrfs_root *root,
+ const struct extent_buffer *leaf)
{
u32 nr = btrfs_header_nritems(leaf);
@@ -2329,7 +2332,7 @@ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression,
struct btrfs_file_extent_item, compression, 8);
static inline unsigned long
-btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e)
+btrfs_file_extent_inline_start(const struct btrfs_file_extent_item *e)
{
return (unsigned long)e + BTRFS_FILE_EXTENT_INLINE_DATA_START;
}
@@ -2363,8 +2366,9 @@ BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
* size of any extent headers. If a file is compressed on disk, this is
* the compressed size
*/
-static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
- struct btrfs_item *e)
+static inline u32 btrfs_file_extent_inline_item_len(
+ const struct extent_buffer *eb,
+ struct btrfs_item *e)
{
return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START;
}
@@ -2372,9 +2376,9 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
/* this returns the number of file bytes represented by the inline item.
* If an item is compressed, this is the uncompressed size
*/
-static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
- int slot,
- struct btrfs_file_extent_item *fi)
+static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb,
+ int slot,
+ const struct btrfs_file_extent_item *fi)
{
struct btrfs_map_token token;
@@ -2396,8 +2400,8 @@ static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
/* btrfs_dev_stats_item */
-static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb,
- struct btrfs_dev_stats_item *ptr,
+static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
+ const struct btrfs_dev_stats_item *ptr,
int index)
{
u64 val;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 57d375c68e46..77b32415d9f2 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -50,6 +50,7 @@
#include "sysfs.h"
#include "qgroup.h"
#include "compression.h"
+#include "tree-checker.h"
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
@@ -452,9 +453,9 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
int mirror_num = 0;
int failed_mirror = 0;
- clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
while (1) {
+ clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
btree_get_extent, mirror_num);
if (!ret) {
@@ -465,14 +466,6 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
ret = -EIO;
}
- /*
- * This buffer's crc is fine, but its contents are corrupted, so
- * there is no reason to read the other copies, they won't be
- * any less wrong.
- */
- if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
- break;
-
num_copies = btrfs_num_copies(root->fs_info,
eb->start, eb->len);
if (num_copies == 1)
@@ -546,145 +539,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
return ret;
}
-#define CORRUPT(reason, eb, root, slot) \
- btrfs_crit(root->fs_info, "corrupt %s, %s: block=%llu," \
- " root=%llu, slot=%d", \
- btrfs_header_level(eb) == 0 ? "leaf" : "node",\
- reason, btrfs_header_bytenr(eb), root->objectid, slot)
-
-static noinline int check_leaf(struct btrfs_root *root,
- struct extent_buffer *leaf)
-{
- struct btrfs_key key;
- struct btrfs_key leaf_key;
- u32 nritems = btrfs_header_nritems(leaf);
- int slot;
-
- /*
- * Extent buffers from a relocation tree have a owner field that
- * corresponds to the subvolume tree they are based on. So just from an
- * extent buffer alone we can not find out what is the id of the
- * corresponding subvolume tree, so we can not figure out if the extent
- * buffer corresponds to the root of the relocation tree or not. So skip
- * this check for relocation trees.
- */
- if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
- struct btrfs_root *check_root;
-
- key.objectid = btrfs_header_owner(leaf);
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
-
- check_root = btrfs_get_fs_root(root->fs_info, &key, false);
- /*
- * The only reason we also check NULL here is that during
- * open_ctree() some roots has not yet been set up.
- */
- if (!IS_ERR_OR_NULL(check_root)) {
- struct extent_buffer *eb;
-
- eb = btrfs_root_node(check_root);
- /* if leaf is the root, then it's fine */
- if (leaf != eb) {
- CORRUPT("non-root leaf's nritems is 0",
- leaf, check_root, 0);
- free_extent_buffer(eb);
- return -EIO;
- }
- free_extent_buffer(eb);
- }
- return 0;
- }
-
- if (nritems == 0)
- return 0;
-
- /* Check the 0 item */
- if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
- BTRFS_LEAF_DATA_SIZE(root)) {
- CORRUPT("invalid item offset size pair", leaf, root, 0);
- return -EIO;
- }
-
- /*
- * Check to make sure each items keys are in the correct order and their
- * offsets make sense. We only have to loop through nritems-1 because
- * we check the current slot against the next slot, which verifies the
- * next slot's offset+size makes sense and that the current's slot
- * offset is correct.
- */
- for (slot = 0; slot < nritems - 1; slot++) {
- btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
- btrfs_item_key_to_cpu(leaf, &key, slot + 1);
-
- /* Make sure the keys are in the right order */
- if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
- CORRUPT("bad key order", leaf, root, slot);
- return -EIO;
- }
-
- /*
- * Make sure the offset and ends are right, remember that the
- * item data starts at the end of the leaf and grows towards the
- * front.
- */
- if (btrfs_item_offset_nr(leaf, slot) !=
- btrfs_item_end_nr(leaf, slot + 1)) {
- CORRUPT("slot offset bad", leaf, root, slot);
- return -EIO;
- }
-
- /*
- * Check to make sure that we don't point outside of the leaf,
- * just in case all the items are consistent to each other, but
- * all point outside of the leaf.
- */
- if (btrfs_item_end_nr(leaf, slot) >
- BTRFS_LEAF_DATA_SIZE(root)) {
- CORRUPT("slot end outside of leaf", leaf, root, slot);
- return -EIO;
- }
- }
-
- return 0;
-}
-
-static int check_node(struct btrfs_root *root, struct extent_buffer *node)
-{
- unsigned long nr = btrfs_header_nritems(node);
- struct btrfs_key key, next_key;
- int slot;
- u64 bytenr;
- int ret = 0;
-
- if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
- btrfs_crit(root->fs_info,
- "corrupt node: block %llu root %llu nritems %lu",
- node->start, root->objectid, nr);
- return -EIO;
- }
-
- for (slot = 0; slot < nr - 1; slot++) {
- bytenr = btrfs_node_blockptr(node, slot);
- btrfs_node_key_to_cpu(node, &key, slot);
- btrfs_node_key_to_cpu(node, &next_key, slot + 1);
-
- if (!bytenr) {
- CORRUPT("invalid item slot", node, root, slot);
- ret = -EIO;
- goto out;
- }
-
- if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
- CORRUPT("bad key order", node, root, slot);
- ret = -EIO;
- goto out;
- }
- }
-out:
- return ret;
-}
-
static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
u64 phy_offset, struct page *page,
u64 start, u64 end, int mirror)
@@ -750,12 +604,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
* that we don't try and read the other copies of this block, just
* return -EIO.
*/
- if (found_level == 0 && check_leaf(root, eb)) {
+ if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = -EIO;
}
- if (found_level > 0 && check_node(root, eb))
+ if (found_level > 0 && btrfs_check_node(root, eb))
ret = -EIO;
if (!ret)
@@ -4086,7 +3940,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
buf->len,
root->fs_info->dirty_metadata_batch);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
+ /*
+ * Since btrfs_mark_buffer_dirty() can be called with item pointer set
+ * but item data not updated.
+ * So here we should only check item pointers, not item data.
+ */
+ if (btrfs_header_level(buf) == 0 &&
+ btrfs_check_leaf_relaxed(root, buf)) {
btrfs_print_leaf(root, buf);
ASSERT(0);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a775307f3b6b..7938c48c72ff 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9896,6 +9896,8 @@ static int find_first_block_group(struct btrfs_root *root,
int ret = 0;
struct btrfs_key found_key;
struct extent_buffer *leaf;
+ struct btrfs_block_group_item bg;
+ u64 flags;
int slot;
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
@@ -9930,8 +9932,32 @@ static int find_first_block_group(struct btrfs_root *root,
"logical %llu len %llu found bg but no related chunk",
found_key.objectid, found_key.offset);
ret = -ENOENT;
+ } else if (em->start != found_key.objectid ||
+ em->len != found_key.offset) {
+ btrfs_err(root->fs_info,
+ "block group %llu len %llu mismatch with chunk %llu len %llu",
+ found_key.objectid, found_key.offset,
+ em->start, em->len);
+ ret = -EUCLEAN;
} else {
- ret = 0;
+ read_extent_buffer(leaf, &bg,
+ btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bg));
+ flags = btrfs_block_group_flags(&bg) &
+ BTRFS_BLOCK_GROUP_TYPE_MASK;
+
+ if (flags != (em->map_lookup->type &
+ BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ btrfs_err(root->fs_info,
+"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
+ found_key.objectid,
+ found_key.offset, flags,
+ (BTRFS_BLOCK_GROUP_TYPE_MASK &
+ em->map_lookup->type));
+ ret = -EUCLEAN;
+ } else {
+ ret = 0;
+ }
}
free_extent_map(em);
goto out;
@@ -10159,6 +10185,62 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
return cache;
}
+
+/*
+ * Iterate all chunks and verify that each of them has the corresponding block
+ * group
+ */
+static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct extent_map *em;
+ struct btrfs_block_group_cache *bg;
+ u64 start = 0;
+ int ret = 0;
+
+ while (1) {
+ read_lock(&map_tree->map_tree.lock);
+ /*
+ * lookup_extent_mapping will return the first extent map
+ * intersecting the range, so setting @len to 1 is enough to
+ * get the first chunk.
+ */
+ em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
+ read_unlock(&map_tree->map_tree.lock);
+ if (!em)
+ break;
+
+ bg = btrfs_lookup_block_group(fs_info, em->start);
+ if (!bg) {
+ btrfs_err(fs_info,
+ "chunk start=%llu len=%llu doesn't have corresponding block group",
+ em->start, em->len);
+ ret = -EUCLEAN;
+ free_extent_map(em);
+ break;
+ }
+ if (bg->key.objectid != em->start ||
+ bg->key.offset != em->len ||
+ (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
+ (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ btrfs_err(fs_info,
+"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
+ em->start, em->len,
+ em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
+ bg->key.objectid, bg->key.offset,
+ bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
+ ret = -EUCLEAN;
+ free_extent_map(em);
+ btrfs_put_block_group(bg);
+ break;
+ }
+ start = em->start + em->len;
+ free_extent_map(em);
+ btrfs_put_block_group(bg);
+ }
+ return ret;
+}
+
int btrfs_read_block_groups(struct btrfs_root *root)
{
struct btrfs_path *path;
@@ -10343,7 +10425,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
}
init_global_block_rsv(info);
- ret = 0;
+ ret = check_chunk_block_group_mappings(info);
error:
btrfs_free_path(path);
return ret;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 5feaef9bcbda..793d4d571d8d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5442,9 +5442,8 @@ unlock_exit:
return ret;
}
-void read_extent_buffer(struct extent_buffer *eb, void *dstv,
- unsigned long start,
- unsigned long len)
+void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
@@ -5473,9 +5472,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
}
}
-int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
- unsigned long start,
- unsigned long len)
+int read_extent_buffer_to_user(const struct extent_buffer *eb,
+ void __user *dstv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
@@ -5515,10 +5514,10 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
* return 1 if the item spans two pages.
* return -EINVAL otherwise.
*/
-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
- unsigned long min_len, char **map,
- unsigned long *map_start,
- unsigned long *map_len)
+int map_private_extent_buffer(const struct extent_buffer *eb,
+ unsigned long start, unsigned long min_len,
+ char **map, unsigned long *map_start,
+ unsigned long *map_len)
{
size_t offset = start & (PAGE_SIZE - 1);
char *kaddr;
@@ -5552,9 +5551,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
return 0;
}
-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
- unsigned long start,
- unsigned long len)
+int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index ab31d145227e..9ecdc9584df7 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -396,14 +396,13 @@ static inline void extent_buffer_get(struct extent_buffer *eb)
atomic_inc(&eb->refs);
}
-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
- unsigned long start,
- unsigned long len);
-void read_extent_buffer(struct extent_buffer *eb, void *dst,
+int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
+ unsigned long start, unsigned long len);
+void read_extent_buffer(const struct extent_buffer *eb, void *dst,
unsigned long start,
unsigned long len);
-int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst,
- unsigned long start,
+int read_extent_buffer_to_user(const struct extent_buffer *eb,
+ void __user *dst, unsigned long start,
unsigned long len);
void write_extent_buffer(struct extent_buffer *eb, const void *src,
unsigned long start, unsigned long len);
@@ -428,10 +427,10 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb);
void clear_extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_under_io(struct extent_buffer *eb);
-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
- unsigned long min_len, char **map,
- unsigned long *map_start,
- unsigned long *map_len);
+int map_private_extent_buffer(const struct extent_buffer *eb,
+ unsigned long offset, unsigned long min_len,
+ char **map, unsigned long *map_start,
+ unsigned long *map_len);
void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 5ca0dbb9074d..69a3c11af9d4 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2464,6 +2464,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
struct rb_node *n;
int count = 0;
+ spin_lock(&ctl->tree_lock);
for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
info = rb_entry(n, struct btrfs_free_space, offset_index);
if (info->bytes >= bytes && !block_group->ro)
@@ -2473,6 +2474,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
info->offset, info->bytes,
(info->bitmap) ? "yes" : "no");
}
+ spin_unlock(&ctl->tree_lock);
btrfs_info(block_group->fs_info, "block group has cluster?: %s",
list_empty(&block_group->cluster_list) ? "no" : "yes");
btrfs_info(block_group->fs_info,
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index 875c757e73e2..5e2b92d83617 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -50,8 +50,8 @@ static inline void put_unaligned_le8(u8 val, void *p)
*/
#define DEFINE_BTRFS_SETGET_BITS(bits) \
-u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
- unsigned long off, \
+u##bits btrfs_get_token_##bits(const struct extent_buffer *eb, \
+ const void *ptr, unsigned long off, \
struct btrfs_map_token *token) \
{ \
unsigned long part_offset = (unsigned long)ptr; \
@@ -90,7 +90,8 @@ u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
return res; \
} \
void btrfs_set_token_##bits(struct extent_buffer *eb, \
- void *ptr, unsigned long off, u##bits val, \
+ const void *ptr, unsigned long off, \
+ u##bits val, \
struct btrfs_map_token *token) \
{ \
unsigned long part_offset = (unsigned long)ptr; \
@@ -133,7 +134,7 @@ DEFINE_BTRFS_SETGET_BITS(16)
DEFINE_BTRFS_SETGET_BITS(32)
DEFINE_BTRFS_SETGET_BITS(64)
-void btrfs_node_key(struct extent_buffer *eb,
+void btrfs_node_key(const struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
unsigned long ptr = btrfs_node_key_ptr_offset(nr);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
new file mode 100644
index 000000000000..7b69ba78e600
--- /dev/null
+++ b/fs/btrfs/tree-checker.c
@@ -0,0 +1,649 @@
+/*
+ * Copyright (C) Qu Wenruo 2017. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program.
+ */
+
+/*
+ * The module is used to catch unexpected/corrupted tree block data.
+ * Such behavior can be caused either by a fuzzed image or bugs.
+ *
+ * The objective is to do leaf/node validation checks when tree block is read
+ * from disk, and check *every* possible member, so other code won't
+ * need to checking them again.
+ *
+ * Due to the potential and unwanted damage, every checker needs to be
+ * carefully reviewed otherwise so it does not prevent mount of valid images.
+ */
+
+#include "ctree.h"
+#include "tree-checker.h"
+#include "disk-io.h"
+#include "compression.h"
+#include "hash.h"
+#include "volumes.h"
+
+#define CORRUPT(reason, eb, root, slot) \
+ btrfs_crit(root->fs_info, \
+ "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \
+ btrfs_header_level(eb) == 0 ? "leaf" : "node", \
+ reason, btrfs_header_bytenr(eb), root->objectid, slot)
+
+/*
+ * Error message should follow the following format:
+ * corrupt <type>: <identifier>, <reason>[, <bad_value>]
+ *
+ * @type: leaf or node
+ * @identifier: the necessary info to locate the leaf/node.
+ * It's recommened to decode key.objecitd/offset if it's
+ * meaningful.
+ * @reason: describe the error
+ * @bad_value: optional, it's recommened to output bad value and its
+ * expected value (range).
+ *
+ * Since comma is used to separate the components, only space is allowed
+ * inside each component.
+ */
+
+/*
+ * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
+ * Allows callers to customize the output.
+ */
+__printf(4, 5)
+static void generic_err(const struct btrfs_root *root,
+ const struct extent_buffer *eb, int slot,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_crit(root->fs_info,
+ "corrupt %s: root=%llu block=%llu slot=%d, %pV",
+ btrfs_header_level(eb) == 0 ? "leaf" : "node",
+ root->objectid, btrfs_header_bytenr(eb), slot, &vaf);
+ va_end(args);
+}
+
+static int check_extent_data_item(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct btrfs_file_extent_item *fi;
+ u32 sectorsize = root->sectorsize;
+ u32 item_size = btrfs_item_size_nr(leaf, slot);
+
+ if (!IS_ALIGNED(key->offset, sectorsize)) {
+ CORRUPT("unaligned key offset for file extent",
+ leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
+ CORRUPT("invalid file extent type", leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ /*
+ * Support for new compression/encrption must introduce incompat flag,
+ * and must be caught in open_ctree().
+ */
+ if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
+ CORRUPT("invalid file extent compression", leaf, root, slot);
+ return -EUCLEAN;
+ }
+ if (btrfs_file_extent_encryption(leaf, fi)) {
+ CORRUPT("invalid file extent encryption", leaf, root, slot);
+ return -EUCLEAN;
+ }
+ if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
+ /* Inline extent must have 0 as key offset */
+ if (key->offset) {
+ CORRUPT("inline extent has non-zero key offset",
+ leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ /* Compressed inline extent has no on-disk size, skip it */
+ if (btrfs_file_extent_compression(leaf, fi) !=
+ BTRFS_COMPRESS_NONE)
+ return 0;
+
+ /* Uncompressed inline extent size must match item size */
+ if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
+ btrfs_file_extent_ram_bytes(leaf, fi)) {
+ CORRUPT("plaintext inline extent has invalid size",
+ leaf, root, slot);
+ return -EUCLEAN;
+ }
+ return 0;
+ }
+
+ /* Regular or preallocated extent has fixed item size */
+ if (item_size != sizeof(*fi)) {
+ CORRUPT(
+ "regluar or preallocated extent data item size is invalid",
+ leaf, root, slot);
+ return -EUCLEAN;
+ }
+ if (!IS_ALIGNED(btrfs_file_extent_ram_bytes(leaf, fi), sectorsize) ||
+ !IS_ALIGNED(btrfs_file_extent_disk_bytenr(leaf, fi), sectorsize) ||
+ !IS_ALIGNED(btrfs_file_extent_disk_num_bytes(leaf, fi), sectorsize) ||
+ !IS_ALIGNED(btrfs_file_extent_offset(leaf, fi), sectorsize) ||
+ !IS_ALIGNED(btrfs_file_extent_num_bytes(leaf, fi), sectorsize)) {
+ CORRUPT(
+ "regular or preallocated extent data item has unaligned value",
+ leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ return 0;
+}
+
+static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ u32 sectorsize = root->sectorsize;
+ u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
+
+ if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
+ CORRUPT("invalid objectid for csum item", leaf, root, slot);
+ return -EUCLEAN;
+ }
+ if (!IS_ALIGNED(key->offset, sectorsize)) {
+ CORRUPT("unaligned key offset for csum item", leaf, root, slot);
+ return -EUCLEAN;
+ }
+ if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
+ CORRUPT("unaligned csum item size", leaf, root, slot);
+ return -EUCLEAN;
+ }
+ return 0;
+}
+
+/*
+ * Customized reported for dir_item, only important new info is key->objectid,
+ * which represents inode number
+ */
+__printf(4, 5)
+static void dir_item_err(const struct btrfs_root *root,
+ const struct extent_buffer *eb, int slot,
+ const char *fmt, ...)
+{
+ struct btrfs_key key;
+ struct va_format vaf;
+ va_list args;
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_crit(root->fs_info,
+ "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
+ btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
+ btrfs_header_bytenr(eb), slot, key.objectid, &vaf);
+ va_end(args);
+}
+
+static int check_dir_item(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct btrfs_dir_item *di;
+ u32 item_size = btrfs_item_size_nr(leaf, slot);
+ u32 cur = 0;
+
+ di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
+ while (cur < item_size) {
+ u32 name_len;
+ u32 data_len;
+ u32 max_name_len;
+ u32 total_size;
+ u32 name_hash;
+ u8 dir_type;
+
+ /* header itself should not cross item boundary */
+ if (cur + sizeof(*di) > item_size) {
+ dir_item_err(root, leaf, slot,
+ "dir item header crosses item boundary, have %zu boundary %u",
+ cur + sizeof(*di), item_size);
+ return -EUCLEAN;
+ }
+
+ /* dir type check */
+ dir_type = btrfs_dir_type(leaf, di);
+ if (dir_type >= BTRFS_FT_MAX) {
+ dir_item_err(root, leaf, slot,
+ "invalid dir item type, have %u expect [0, %u)",
+ dir_type, BTRFS_FT_MAX);
+ return -EUCLEAN;
+ }
+
+ if (key->type == BTRFS_XATTR_ITEM_KEY &&
+ dir_type != BTRFS_FT_XATTR) {
+ dir_item_err(root, leaf, slot,
+ "invalid dir item type for XATTR key, have %u expect %u",
+ dir_type, BTRFS_FT_XATTR);
+ return -EUCLEAN;
+ }
+ if (dir_type == BTRFS_FT_XATTR &&
+ key->type != BTRFS_XATTR_ITEM_KEY) {
+ dir_item_err(root, leaf, slot,
+ "xattr dir type found for non-XATTR key");
+ return -EUCLEAN;
+ }
+ if (dir_type == BTRFS_FT_XATTR)
+ max_name_len = XATTR_NAME_MAX;
+ else
+ max_name_len = BTRFS_NAME_LEN;
+
+ /* Name/data length check */
+ name_len = btrfs_dir_name_len(leaf, di);
+ data_len = btrfs_dir_data_len(leaf, di);
+ if (name_len > max_name_len) {
+ dir_item_err(root, leaf, slot,
+ "dir item name len too long, have %u max %u",
+ name_len, max_name_len);
+ return -EUCLEAN;
+ }
+ if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)) {
+ dir_item_err(root, leaf, slot,
+ "dir item name and data len too long, have %u max %u",
+ name_len + data_len,
+ BTRFS_MAX_XATTR_SIZE(root));
+ return -EUCLEAN;
+ }
+
+ if (data_len && dir_type != BTRFS_FT_XATTR) {
+ dir_item_err(root, leaf, slot,
+ "dir item with invalid data len, have %u expect 0",
+ data_len);
+ return -EUCLEAN;
+ }
+
+ total_size = sizeof(*di) + name_len + data_len;
+
+ /* header and name/data should not cross item boundary */
+ if (cur + total_size > item_size) {
+ dir_item_err(root, leaf, slot,
+ "dir item data crosses item boundary, have %u boundary %u",
+ cur + total_size, item_size);
+ return -EUCLEAN;
+ }
+
+ /*
+ * Special check for XATTR/DIR_ITEM, as key->offset is name
+ * hash, should match its name
+ */
+ if (key->type == BTRFS_DIR_ITEM_KEY ||
+ key->type == BTRFS_XATTR_ITEM_KEY) {
+ char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
+
+ read_extent_buffer(leaf, namebuf,
+ (unsigned long)(di + 1), name_len);
+ name_hash = btrfs_name_hash(namebuf, name_len);
+ if (key->offset != name_hash) {
+ dir_item_err(root, leaf, slot,
+ "name hash mismatch with key, have 0x%016x expect 0x%016llx",
+ name_hash, key->offset);
+ return -EUCLEAN;
+ }
+ }
+ cur += total_size;
+ di = (struct btrfs_dir_item *)((void *)di + total_size);
+ }
+ return 0;
+}
+
+__printf(4, 5)
+__cold
+static void block_group_err(const struct btrfs_fs_info *fs_info,
+ const struct extent_buffer *eb, int slot,
+ const char *fmt, ...)
+{
+ struct btrfs_key key;
+ struct va_format vaf;
+ va_list args;
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_crit(fs_info,
+ "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
+ btrfs_header_level(eb) == 0 ? "leaf" : "node",
+ btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
+ key.objectid, key.offset, &vaf);
+ va_end(args);
+}
+
+static int check_block_group_item(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct btrfs_block_group_item bgi;
+ u32 item_size = btrfs_item_size_nr(leaf, slot);
+ u64 flags;
+ u64 type;
+
+ /*
+ * Here we don't really care about alignment since extent allocator can
+ * handle it. We care more about the size, as if one block group is
+ * larger than maximum size, it's must be some obvious corruption.
+ */
+ if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid block group size, have %llu expect (0, %llu]",
+ key->offset, BTRFS_MAX_DATA_CHUNK_SIZE);
+ return -EUCLEAN;
+ }
+
+ if (item_size != sizeof(bgi)) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid item size, have %u expect %zu",
+ item_size, sizeof(bgi));
+ return -EUCLEAN;
+ }
+
+ read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bgi));
+ if (btrfs_block_group_chunk_objectid(&bgi) !=
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid block group chunk objectid, have %llu expect %llu",
+ btrfs_block_group_chunk_objectid(&bgi),
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ return -EUCLEAN;
+ }
+
+ if (btrfs_block_group_used(&bgi) > key->offset) {
+ block_group_err(fs_info, leaf, slot,
+ "invalid block group used, have %llu expect [0, %llu)",
+ btrfs_block_group_used(&bgi), key->offset);
+ return -EUCLEAN;
+ }
+
+ flags = btrfs_block_group_flags(&bgi);
+ if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
+ block_group_err(fs_info, leaf, slot,
+"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
+ flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
+ hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
+ return -EUCLEAN;
+ }
+
+ type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
+ if (type != BTRFS_BLOCK_GROUP_DATA &&
+ type != BTRFS_BLOCK_GROUP_METADATA &&
+ type != BTRFS_BLOCK_GROUP_SYSTEM &&
+ type != (BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_DATA)) {
+ block_group_err(fs_info, leaf, slot,
+"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
+ type, hweight64(type),
+ BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
+ BTRFS_BLOCK_GROUP_SYSTEM,
+ BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
+ return -EUCLEAN;
+ }
+ return 0;
+}
+
+/*
+ * Common point to switch the item-specific validation.
+ */
+static int check_leaf_item(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ int ret = 0;
+
+ switch (key->type) {
+ case BTRFS_EXTENT_DATA_KEY:
+ ret = check_extent_data_item(root, leaf, key, slot);
+ break;
+ case BTRFS_EXTENT_CSUM_KEY:
+ ret = check_csum_item(root, leaf, key, slot);
+ break;
+ case BTRFS_DIR_ITEM_KEY:
+ case BTRFS_DIR_INDEX_KEY:
+ case BTRFS_XATTR_ITEM_KEY:
+ ret = check_dir_item(root, leaf, key, slot);
+ break;
+ case BTRFS_BLOCK_GROUP_ITEM_KEY:
+ ret = check_block_group_item(root->fs_info, leaf, key, slot);
+ break;
+ }
+ return ret;
+}
+
+static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
+ bool check_item_data)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ /* No valid key type is 0, so all key should be larger than this key */
+ struct btrfs_key prev_key = {0, 0, 0};
+ struct btrfs_key key;
+ u32 nritems = btrfs_header_nritems(leaf);
+ int slot;
+
+ if (btrfs_header_level(leaf) != 0) {
+ generic_err(root, leaf, 0,
+ "invalid level for leaf, have %d expect 0",
+ btrfs_header_level(leaf));
+ return -EUCLEAN;
+ }
+
+ /*
+ * Extent buffers from a relocation tree have a owner field that
+ * corresponds to the subvolume tree they are based on. So just from an
+ * extent buffer alone we can not find out what is the id of the
+ * corresponding subvolume tree, so we can not figure out if the extent
+ * buffer corresponds to the root of the relocation tree or not. So
+ * skip this check for relocation trees.
+ */
+ if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
+ u64 owner = btrfs_header_owner(leaf);
+ struct btrfs_root *check_root;
+
+ /* These trees must never be empty */
+ if (owner == BTRFS_ROOT_TREE_OBJECTID ||
+ owner == BTRFS_CHUNK_TREE_OBJECTID ||
+ owner == BTRFS_EXTENT_TREE_OBJECTID ||
+ owner == BTRFS_DEV_TREE_OBJECTID ||
+ owner == BTRFS_FS_TREE_OBJECTID ||
+ owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
+ generic_err(root, leaf, 0,
+ "invalid root, root %llu must never be empty",
+ owner);
+ return -EUCLEAN;
+ }
+ key.objectid = owner;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ check_root = btrfs_get_fs_root(fs_info, &key, false);
+ /*
+ * The only reason we also check NULL here is that during
+ * open_ctree() some roots has not yet been set up.
+ */
+ if (!IS_ERR_OR_NULL(check_root)) {
+ struct extent_buffer *eb;
+
+ eb = btrfs_root_node(check_root);
+ /* if leaf is the root, then it's fine */
+ if (leaf != eb) {
+ CORRUPT("non-root leaf's nritems is 0",
+ leaf, check_root, 0);
+ free_extent_buffer(eb);
+ return -EUCLEAN;
+ }
+ free_extent_buffer(eb);
+ }
+ return 0;
+ }
+
+ if (nritems == 0)
+ return 0;
+
+ /*
+ * Check the following things to make sure this is a good leaf, and
+ * leaf users won't need to bother with similar sanity checks:
+ *
+ * 1) key ordering
+ * 2) item offset and size
+ * No overlap, no hole, all inside the leaf.
+ * 3) item content
+ * If possible, do comprehensive sanity check.
+ * NOTE: All checks must only rely on the item data itself.
+ */
+ for (slot = 0; slot < nritems; slot++) {
+ u32 item_end_expected;
+ int ret;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ /* Make sure the keys are in the right order */
+ if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
+ CORRUPT("bad key order", leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ /*
+ * Make sure the offset and ends are right, remember that the
+ * item data starts at the end of the leaf and grows towards the
+ * front.
+ */
+ if (slot == 0)
+ item_end_expected = BTRFS_LEAF_DATA_SIZE(root);
+ else
+ item_end_expected = btrfs_item_offset_nr(leaf,
+ slot - 1);
+ if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
+ CORRUPT("slot offset bad", leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ /*
+ * Check to make sure that we don't point outside of the leaf,
+ * just in case all the items are consistent to each other, but
+ * all point outside of the leaf.
+ */
+ if (btrfs_item_end_nr(leaf, slot) >
+ BTRFS_LEAF_DATA_SIZE(root)) {
+ CORRUPT("slot end outside of leaf", leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ /* Also check if the item pointer overlaps with btrfs item. */
+ if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
+ btrfs_item_ptr_offset(leaf, slot)) {
+ CORRUPT("slot overlap with its data", leaf, root, slot);
+ return -EUCLEAN;
+ }
+
+ if (check_item_data) {
+ /*
+ * Check if the item size and content meet other
+ * criteria
+ */
+ ret = check_leaf_item(root, leaf, &key, slot);
+ if (ret < 0)
+ return ret;
+ }
+
+ prev_key.objectid = key.objectid;
+ prev_key.type = key.type;
+ prev_key.offset = key.offset;
+ }
+
+ return 0;
+}
+
+int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf)
+{
+ return check_leaf(root, leaf, true);
+}
+
+int btrfs_check_leaf_relaxed(struct btrfs_root *root,
+ struct extent_buffer *leaf)
+{
+ return check_leaf(root, leaf, false);
+}
+
+int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
+{
+ unsigned long nr = btrfs_header_nritems(node);
+ struct btrfs_key key, next_key;
+ int slot;
+ int level = btrfs_header_level(node);
+ u64 bytenr;
+ int ret = 0;
+
+ if (level <= 0 || level >= BTRFS_MAX_LEVEL) {
+ generic_err(root, node, 0,
+ "invalid level for node, have %d expect [1, %d]",
+ level, BTRFS_MAX_LEVEL - 1);
+ return -EUCLEAN;
+ }
+ if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
+ btrfs_crit(root->fs_info,
+"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
+ root->objectid, node->start,
+ nr == 0 ? "small" : "large", nr,
+ BTRFS_NODEPTRS_PER_BLOCK(root));
+ return -EUCLEAN;
+ }
+
+ for (slot = 0; slot < nr - 1; slot++) {
+ bytenr = btrfs_node_blockptr(node, slot);
+ btrfs_node_key_to_cpu(node, &key, slot);
+ btrfs_node_key_to_cpu(node, &next_key, slot + 1);
+
+ if (!bytenr) {
+ generic_err(root, node, slot,
+ "invalid NULL node pointer");
+ ret = -EUCLEAN;
+ goto out;
+ }
+ if (!IS_ALIGNED(bytenr, root->sectorsize)) {
+ generic_err(root, node, slot,
+ "unaligned pointer, have %llu should be aligned to %u",
+ bytenr, root->sectorsize);
+ ret = -EUCLEAN;
+ goto out;
+ }
+
+ if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
+ generic_err(root, node, slot,
+ "bad key order, current (%llu %u %llu) next (%llu %u %llu)",
+ key.objectid, key.type, key.offset,
+ next_key.objectid, next_key.type,
+ next_key.offset);
+ ret = -EUCLEAN;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
new file mode 100644
index 000000000000..3d53e8d6fda0
--- /dev/null
+++ b/fs/btrfs/tree-checker.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) Qu Wenruo 2017. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program.
+ */
+
+#ifndef __BTRFS_TREE_CHECKER__
+#define __BTRFS_TREE_CHECKER__
+
+#include "ctree.h"
+#include "extent_io.h"
+
+/*
+ * Comprehensive leaf checker.
+ * Will check not only the item pointers, but also every possible member
+ * in item data.
+ */
+int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf);
+
+/*
+ * Less strict leaf checker.
+ * Will only check item pointers, not reading item data.
+ */
+int btrfs_check_leaf_relaxed(struct btrfs_root *root,
+ struct extent_buffer *leaf);
+int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
+
+#endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 76017e1b3c0f..5aa2749eaf42 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4656,7 +4656,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (type & BTRFS_BLOCK_GROUP_DATA) {
max_stripe_size = SZ_1G;
- max_chunk_size = 10 * max_stripe_size;
+ max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS(info->chunk_root);
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
@@ -6370,6 +6370,8 @@ static int btrfs_check_chunk_valid(struct btrfs_root *root,
u16 num_stripes;
u16 sub_stripes;
u64 type;
+ u64 features;
+ bool mixed = false;
length = btrfs_chunk_length(leaf, chunk);
stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
@@ -6410,6 +6412,32 @@ static int btrfs_check_chunk_valid(struct btrfs_root *root,
btrfs_chunk_type(leaf, chunk));
return -EIO;
}
+
+ if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
+ btrfs_err(root->fs_info, "missing chunk type flag: 0x%llx", type);
+ return -EIO;
+ }
+
+ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
+ (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
+ btrfs_err(root->fs_info,
+ "system chunk with data or metadata type: 0x%llx", type);
+ return -EIO;
+ }
+
+ features = btrfs_super_incompat_flags(root->fs_info->super_copy);
+ if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+ mixed = true;
+
+ if (!mixed) {
+ if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
+ (type & BTRFS_BLOCK_GROUP_DATA)) {
+ btrfs_err(root->fs_info,
+ "mixed chunk type in non-mixed mode: 0x%llx", type);
+ return -EIO;
+ }
+ }
+
if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
(type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
(type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 09ed29c67848..9c09aa29d6bd 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -24,6 +24,8 @@
#include <linux/btrfs.h>
#include "async-thread.h"
+#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
+
extern struct mutex uuid_mutex;
#define BTRFS_STRIPE_LEN SZ_64K
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 3d2639c30018..6cbd0d805c9d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3983,14 +3983,24 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
return auth;
}
+static int add_authorizer_challenge(struct ceph_connection *con,
+ void *challenge_buf, int challenge_buf_len)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_mds_client *mdsc = s->s_mdsc;
+ struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+
+ return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer,
+ challenge_buf, challenge_buf_len);
+}
-static int verify_authorizer_reply(struct ceph_connection *con, int len)
+static int verify_authorizer_reply(struct ceph_connection *con)
{
struct ceph_mds_session *s = con->private;
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
- return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer, len);
+ return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer);
}
static int invalidate_authorizer(struct ceph_connection *con)
@@ -4046,6 +4056,7 @@ static const struct ceph_connection_operations mds_con_ops = {
.put = con_put,
.dispatch = dispatch,
.get_authorizer = get_authorizer,
+ .add_authorizer_challenge = add_authorizer_challenge,
.verify_authorizer_reply = verify_authorizer_reply,
.invalidate_authorizer = invalidate_authorizer,
.peer_reset = peer_reset,
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index aee2a066a446..0b061bbf1639 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -69,6 +69,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
.old_blkaddr = index,
.new_blkaddr = index,
.encrypted_page = NULL,
+ .is_meta = is_meta,
};
if (unlikely(!is_meta))
@@ -85,8 +86,10 @@ repeat:
fio.page = page;
if (f2fs_submit_page_bio(&fio)) {
- f2fs_put_page(page, 1);
- goto repeat;
+ memset(page_address(page), 0, PAGE_SIZE);
+ f2fs_stop_checkpoint(sbi, false);
+ f2fs_bug_on(sbi, 1);
+ return page;
}
lock_page(page);
@@ -117,7 +120,8 @@ struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
return __get_meta_page(sbi, index, false);
}
-bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
{
switch (type) {
case META_NAT:
@@ -137,8 +141,20 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
return false;
break;
case META_POR:
+ case DATA_GENERIC:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
- blkaddr < MAIN_BLKADDR(sbi)))
+ blkaddr < MAIN_BLKADDR(sbi))) {
+ if (type == DATA_GENERIC) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "access invalid blkaddr:%u", blkaddr);
+ WARN_ON(1);
+ }
+ return false;
+ }
+ break;
+ case META_GENERIC:
+ if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
+ blkaddr >= MAIN_BLKADDR(sbi)))
return false;
break;
default:
@@ -162,6 +178,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
.op = REQ_OP_READ,
.op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD,
.encrypted_page = NULL,
+ .is_meta = (type != META_POR),
};
struct blk_plug plug;
@@ -171,7 +188,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
blk_start_plug(&plug);
for (; nrpages-- > 0; blkno++) {
- if (!is_valid_blkaddr(sbi, blkno, type))
+ if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
goto out;
switch (type) {
@@ -706,6 +723,14 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
&cp_page_1, version);
if (err)
return NULL;
+
+ if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
+ sbi->blocks_per_seg) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "invalid cp_pack_total_block_count:%u",
+ le32_to_cpu(cp_block->cp_pack_total_block_count));
+ goto invalid_cp;
+ }
pre_version = *version;
cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
@@ -769,15 +794,15 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
memcpy(sbi->ckpt, cp_block, blk_size);
- /* Sanity checking of checkpoint */
- if (sanity_check_ckpt(sbi))
- goto fail_no_cp;
-
if (cur_page == cp1)
sbi->cur_cp_pack = 1;
else
sbi->cur_cp_pack = 2;
+ /* Sanity checking of checkpoint */
+ if (sanity_check_ckpt(sbi))
+ goto free_fail_no_cp;
+
if (cp_blks <= 1)
goto done;
@@ -799,6 +824,9 @@ done:
f2fs_put_page(cp2, 1);
return 0;
+free_fail_no_cp:
+ f2fs_put_page(cp1, 1);
+ f2fs_put_page(cp2, 1);
fail_no_cp:
kfree(sbi->ckpt);
return -EINVAL;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index ae354ac67da1..9041805096e0 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -240,6 +240,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
struct page *page = fio->encrypted_page ?
fio->encrypted_page : fio->page;
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
+ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
+ return -EFAULT;
+
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(fio, 0);
@@ -266,9 +270,9 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
io = is_read ? &sbi->read_io : &sbi->write_io[btype];
- if (fio->old_blkaddr != NEW_ADDR)
- verify_block_addr(sbi, fio->old_blkaddr);
- verify_block_addr(sbi, fio->new_blkaddr);
+ if (__is_valid_data_blkaddr(fio->old_blkaddr))
+ verify_block_addr(fio, fio->old_blkaddr);
+ verify_block_addr(fio, fio->new_blkaddr);
down_write(&io->io_rwsem);
@@ -722,7 +726,13 @@ next_dnode:
next_block:
blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
+ err = -EFAULT;
+ goto sync_out;
+ }
+
+ if (!is_valid_data_blkaddr(sbi, blkaddr)) {
if (create) {
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
@@ -985,6 +995,9 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
struct block_device *bdev = sbi->sb->s_bdev;
struct bio *bio;
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
+ return ERR_PTR(-EFAULT);
+
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
ctx = fscrypt_get_ctx(inode, GFP_NOFS);
if (IS_ERR(ctx))
@@ -1084,6 +1097,10 @@ got_it:
SetPageUptodate(page);
goto confused;
}
+
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
+ DATA_GENERIC))
+ goto set_error_page;
} else {
zero_user_segment(page, 0, PAGE_SIZE);
if (!PageUptodate(page))
@@ -1212,11 +1229,17 @@ retry_encrypt:
set_page_writeback(page);
+ if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
+ !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
+ DATA_GENERIC)) {
+ err = -EFAULT;
+ goto out_writepage;
+ }
/*
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
- if (unlikely(fio->old_blkaddr != NEW_ADDR &&
+ if (unlikely(is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
!is_cold_data(page) &&
!IS_ATOMIC_WRITTEN_PAGE(page) &&
need_inplace_update(inode))) {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 88e111ab068b..9c380885b0fc 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -145,7 +145,7 @@ struct cp_control {
};
/*
- * For CP/NAT/SIT/SSA readahead
+ * indicate meta/data type
*/
enum {
META_CP,
@@ -153,6 +153,8 @@ enum {
META_SIT,
META_SSA,
META_POR,
+ DATA_GENERIC,
+ META_GENERIC,
};
/* for the list of ino */
@@ -694,6 +696,7 @@ struct f2fs_io_info {
block_t old_blkaddr; /* old block address before Cow */
struct page *page; /* page to be written */
struct page *encrypted_page; /* encrypted page */
+ bool is_meta; /* indicate borrow meta inode mapping or not */
};
#define is_read_io(rw) (rw == READ)
@@ -1929,6 +1932,39 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
(pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) / \
ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode))
+#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
+ (!is_read_io(fio->op) || fio->is_meta))
+
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
+static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "invalid blkaddr: %u, type: %d, run fsck to fix.",
+ blkaddr, type);
+ f2fs_bug_on(sbi, 1);
+ }
+}
+
+static inline bool __is_valid_data_blkaddr(block_t blkaddr)
+{
+ if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
+ return false;
+ return true;
+}
+
+static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr)
+{
+ if (!__is_valid_data_blkaddr(blkaddr))
+ return false;
+ verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
+ return true;
+}
+
/*
* file.c
*/
@@ -2114,7 +2150,8 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *, bool);
struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t);
-bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int);
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool);
void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 594e6e20d6dd..b768f495603e 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -310,13 +310,13 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
return pgofs;
}
-static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
- int whence)
+static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
+ pgoff_t dirty, pgoff_t pgofs, int whence)
{
switch (whence) {
case SEEK_DATA:
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
- (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
+ is_valid_data_blkaddr(sbi, blkaddr))
return true;
break;
case SEEK_HOLE:
@@ -378,7 +378,15 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
block_t blkaddr;
blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
- if (__found_offset(blkaddr, dirty, pgofs, whence)) {
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
+ blkaddr, DATA_GENERIC)) {
+ f2fs_put_dnode(&dn);
+ goto fail;
+ }
+
+ if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
+ pgofs, whence)) {
f2fs_put_dnode(&dn);
goto found;
}
@@ -481,6 +489,11 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
dn->data_blkaddr = NULL_ADDR;
set_data_blkaddr(dn);
+
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
+ continue;
+
invalidate_blocks(sbi, blkaddr);
if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index d7369895a78a..1de02c31756b 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -59,13 +59,16 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
}
}
-static bool __written_first_block(struct f2fs_inode *ri)
+static int __written_first_block(struct f2fs_sb_info *sbi,
+ struct f2fs_inode *ri)
{
block_t addr = le32_to_cpu(ri->i_addr[0]);
- if (addr != NEW_ADDR && addr != NULL_ADDR)
- return true;
- return false;
+ if (!__is_valid_data_blkaddr(addr))
+ return 1;
+ if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC))
+ return -EFAULT;
+ return 0;
}
static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -103,12 +106,57 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage)
return;
}
+static bool sanity_check_inode(struct inode *inode, struct page *node_page)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ unsigned long long iblocks;
+
+ iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
+ if (!iblocks) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
+ "run fsck to fix.",
+ __func__, inode->i_ino, iblocks);
+ return false;
+ }
+
+ if (ino_of_node(node_page) != nid_of_node(node_page)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: corrupted inode footer i_ino=%lx, ino,nid: "
+ "[%u, %u] run fsck to fix.",
+ __func__, inode->i_ino,
+ ino_of_node(node_page), nid_of_node(node_page));
+ return false;
+ }
+
+ if (F2FS_I(inode)->extent_tree) {
+ struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
+
+ if (ei->len &&
+ (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
+ !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
+ DATA_GENERIC))) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx) extent info [%u, %u, %u] "
+ "is incorrect, run fsck to fix",
+ __func__, inode->i_ino,
+ ei->blk, ei->fofs, ei->len);
+ return false;
+ }
+ }
+ return true;
+}
+
static int do_read_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct page *node_page;
struct f2fs_inode *ri;
+ int err;
/* Check if ino is within scope */
if (check_nid_range(sbi, inode->i_ino)) {
@@ -152,6 +200,11 @@ static int do_read_inode(struct inode *inode)
get_inline_info(inode, ri);
+ if (!sanity_check_inode(inode, node_page)) {
+ f2fs_put_page(node_page, 1);
+ return -EINVAL;
+ }
+
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
__recover_inline_status(inode, node_page);
@@ -159,7 +212,12 @@ static int do_read_inode(struct inode *inode)
/* get rdev by using inline_info */
__get_inode_rdev(inode, ri);
- if (__written_first_block(ri))
+ err = __written_first_block(sbi, ri);
+ if (err < 0) {
+ f2fs_put_page(node_page, 1);
+ return err;
+ }
+ if (!err)
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
if (!need_inode_block_update(sbi, inode->i_ino))
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index addff6a3b176..f4fe54047fb7 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -304,8 +304,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
new_blkaddr == NULL_ADDR);
f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
new_blkaddr == NEW_ADDR);
- f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
- nat_get_blkaddr(e) != NULL_ADDR &&
+ f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
new_blkaddr == NEW_ADDR);
/* increment version no as node is removed */
@@ -320,7 +319,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
/* change address */
nat_set_blkaddr(e, new_blkaddr);
- if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
+ if (!is_valid_data_blkaddr(sbi, new_blkaddr))
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
@@ -1606,6 +1605,12 @@ static int f2fs_write_node_page(struct page *page,
return 0;
}
+ if (__is_valid_data_blkaddr(ni.blk_addr) &&
+ !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) {
+ up_read(&sbi->node_write);
+ goto redirty_out;
+ }
+
set_page_writeback(page);
fio.old_blkaddr = ni.blk_addr;
write_node_page(nid, &fio);
@@ -1704,8 +1709,9 @@ static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- struct free_nid *i;
+ struct free_nid *i, *e;
struct nat_entry *ne;
+ int err = -EINVAL;
if (!available_free_memory(sbi, FREE_NIDS))
return -1;
@@ -1714,35 +1720,58 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
if (unlikely(nid == 0))
return 0;
- if (build) {
- /* do not add allocated nids */
- ne = __lookup_nat_cache(nm_i, nid);
- if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
- nat_get_blkaddr(ne) != NULL_ADDR))
- return 0;
- }
-
i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
i->nid = nid;
i->state = NID_NEW;
- if (radix_tree_preload(GFP_NOFS)) {
- kmem_cache_free(free_nid_slab, i);
- return 0;
- }
+ if (radix_tree_preload(GFP_NOFS))
+ goto err;
spin_lock(&nm_i->free_nid_list_lock);
- if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
- spin_unlock(&nm_i->free_nid_list_lock);
- radix_tree_preload_end();
- kmem_cache_free(free_nid_slab, i);
- return 0;
+
+ if (build) {
+ /*
+ * Thread A Thread B
+ * - f2fs_create
+ * - f2fs_new_inode
+ * - alloc_nid
+ * - __insert_nid_to_list(ALLOC_NID_LIST)
+ * - f2fs_balance_fs_bg
+ * - build_free_nids
+ * - __build_free_nids
+ * - scan_nat_page
+ * - add_free_nid
+ * - __lookup_nat_cache
+ * - f2fs_add_link
+ * - init_inode_metadata
+ * - new_inode_page
+ * - new_node_page
+ * - set_node_addr
+ * - alloc_nid_done
+ * - __remove_nid_from_list(ALLOC_NID_LIST)
+ * - __insert_nid_to_list(FREE_NID_LIST)
+ */
+ ne = __lookup_nat_cache(nm_i, nid);
+ if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
+ nat_get_blkaddr(ne) != NULL_ADDR))
+ goto err_out;
+
+ e = __lookup_free_nid_list(nm_i, nid);
+ if (e)
+ goto err_out;
}
+ if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i))
+ goto err_out;
+ err = 0;
list_add_tail(&i->list, &nm_i->free_nid_list);
nm_i->fcnt++;
+err_out:
spin_unlock(&nm_i->free_nid_list_lock);
radix_tree_preload_end();
- return 1;
+err:
+ if (err)
+ kmem_cache_free(free_nid_slab, i);
+ return !err;
}
static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 98c1a63a4614..ab4cbb4be423 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -236,7 +236,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
while (1) {
struct fsync_inode_entry *entry;
- if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
return 0;
page = get_tmp_page(sbi, blkaddr);
@@ -468,7 +468,7 @@ retry_dn:
}
/* dest is valid block, try to recover from src to dest */
- if (is_valid_blkaddr(sbi, dest, META_POR)) {
+ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
if (src == NULL_ADDR) {
err = reserve_new_block(&dn);
@@ -527,7 +527,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
while (1) {
struct fsync_inode_entry *entry;
- if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
break;
ra_meta_pages_cond(sbi, blkaddr);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 35d48ef0573c..2fb99a081de8 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -493,6 +493,9 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
init_waitqueue_head(&fcc->flush_wait_queue);
init_llist_head(&fcc->issue_list);
SM_I(sbi)->cmd_control_info = fcc;
+ if (!test_opt(sbi, FLUSH_MERGE))
+ return err;
+
fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
@@ -941,7 +944,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
struct seg_entry *se;
bool is_cp = false;
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
+ if (!is_valid_data_blkaddr(sbi, blkaddr))
return true;
mutex_lock(&sit_i->sentry_lock);
@@ -1665,7 +1668,7 @@ void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
{
struct page *cpage;
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
+ if (!is_valid_data_blkaddr(sbi, blkaddr))
return;
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
@@ -2319,7 +2322,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
return restore_curseg_summaries(sbi);
}
-static void build_sit_entries(struct f2fs_sb_info *sbi)
+static int build_sit_entries(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
@@ -2330,6 +2333,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
unsigned int i, start, end;
unsigned int readed, start_blk = 0;
int nrpages = MAX_BIO_BLOCKS(sbi) * 8;
+ int err = 0;
do {
readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true);
@@ -2347,7 +2351,9 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
f2fs_put_page(page, 1);
- check_block_count(sbi, start, &sit);
+ err = check_block_count(sbi, start, &sit);
+ if (err)
+ return err;
seg_info_from_raw_sit(se, &sit);
/* build discard map only one time */
@@ -2370,12 +2376,23 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
unsigned int old_valid_blocks;
start = le32_to_cpu(segno_in_journal(journal, i));
+ if (start >= MAIN_SEGS(sbi)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong journal entry on segno %u",
+ start);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ err = -EINVAL;
+ break;
+ }
+
se = &sit_i->sentries[start];
sit = sit_in_journal(journal, i);
old_valid_blocks = se->valid_blocks;
- check_block_count(sbi, start, &sit);
+ err = check_block_count(sbi, start, &sit);
+ if (err)
+ break;
seg_info_from_raw_sit(se, &sit);
if (f2fs_discard_en(sbi)) {
@@ -2390,6 +2407,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
se->valid_blocks - old_valid_blocks;
}
up_read(&curseg->journal_rwsem);
+ return err;
}
static void init_free_segmap(struct f2fs_sb_info *sbi)
@@ -2539,7 +2557,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&sm_info->sit_entry_set);
- if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
+ if (!f2fs_readonly(sbi->sb)) {
err = create_flush_cmd_control(sbi);
if (err)
return err;
@@ -2556,7 +2574,9 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
return err;
/* reinit free segmap based on SIT */
- build_sit_entries(sbi);
+ err = build_sit_entries(sbi);
+ if (err)
+ return err;
init_free_segmap(sbi);
err = build_dirty_segmap(sbi);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 3d9b9e98c4c2..893723978f5e 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -18,6 +18,8 @@
#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */
#define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */
+#define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
+
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
@@ -47,13 +49,19 @@
(secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
sbi->segs_per_sec)) \
-#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr)
-#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr)
+#define MAIN_BLKADDR(sbi) \
+ (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr))
+#define SEG0_BLKADDR(sbi) \
+ (SM_I(sbi) ? SM_I(sbi)->seg0_blkaddr : \
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment0_blkaddr))
#define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments)
#define MAIN_SECS(sbi) (sbi->total_sections)
-#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count)
+#define TOTAL_SEGS(sbi) \
+ (SM_I(sbi) ? SM_I(sbi)->segment_count : \
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count))
#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg)
#define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
@@ -73,7 +81,7 @@
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
- (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \
+ ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define GET_SECNO(sbi, segno) \
@@ -589,16 +597,20 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
}
-static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
+static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
{
- BUG_ON(blk_addr < SEG0_BLKADDR(sbi)
- || blk_addr >= MAX_BLKADDR(sbi));
+ struct f2fs_sb_info *sbi = fio->sbi;
+
+ if (__is_meta_io(fio))
+ verify_blkaddr(sbi, blk_addr, META_GENERIC);
+ else
+ verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
}
/*
* Summary block is always treated as an invalid block
*/
-static inline void check_block_count(struct f2fs_sb_info *sbi,
+static inline int check_block_count(struct f2fs_sb_info *sbi,
int segno, struct f2fs_sit_entry *raw_sit)
{
#ifdef CONFIG_F2FS_CHECK_FS
@@ -620,11 +632,25 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
cur_pos = next_pos;
is_valid = !is_valid;
} while (cur_pos < sbi->blocks_per_seg);
- BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
+
+ if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Mismatch valid blocks %d vs. %d",
+ GET_SIT_VBLOCKS(raw_sit), valid_blocks);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return -EINVAL;
+ }
#endif
/* check segment usage, and check boundary of a given segment number */
- f2fs_bug_on(sbi, GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
- || segno > TOTAL_SEGS(sbi) - 1);
+ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
+ || segno > TOTAL_SEGS(sbi) - 1)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong valid blocks %d or segno %u",
+ GET_SIT_VBLOCKS(raw_sit), segno);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return -EINVAL;
+ }
+ return 0;
}
static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 91bf72334722..c8f408d8a582 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1337,6 +1337,8 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
struct buffer_head *bh)
{
+ block_t segment_count, segs_per_sec, secs_per_zone;
+ block_t total_sections, blocks_per_seg;
struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
(bh->b_data + F2FS_SUPER_OFFSET);
struct super_block *sb = sbi->sb;
@@ -1393,6 +1395,68 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return 1;
}
+ segment_count = le32_to_cpu(raw_super->segment_count);
+ segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
+ secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
+ total_sections = le32_to_cpu(raw_super->section_count);
+
+ /* blocks_per_seg should be 512, given the above check */
+ blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg);
+
+ if (segment_count > F2FS_MAX_SEGMENT ||
+ segment_count < F2FS_MIN_SEGMENTS) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid segment count (%u)",
+ segment_count);
+ return 1;
+ }
+
+ if (total_sections > segment_count ||
+ total_sections < F2FS_MIN_SEGMENTS ||
+ segs_per_sec > segment_count || !segs_per_sec) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid segment/section count (%u, %u x %u)",
+ segment_count, total_sections, segs_per_sec);
+ return 1;
+ }
+
+ if ((segment_count / segs_per_sec) < total_sections) {
+ f2fs_msg(sb, KERN_INFO,
+ "Small segment_count (%u < %u * %u)",
+ segment_count, segs_per_sec, total_sections);
+ return 1;
+ }
+
+ if (segment_count > (le32_to_cpu(raw_super->block_count) >> 9)) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong segment_count / block_count (%u > %u)",
+ segment_count, le32_to_cpu(raw_super->block_count));
+ return 1;
+ }
+
+ if (secs_per_zone > total_sections || !secs_per_zone) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong secs_per_zone / total_sections (%u, %u)",
+ secs_per_zone, total_sections);
+ return 1;
+ }
+ if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION) {
+ f2fs_msg(sb, KERN_INFO,
+ "Corrupted extension count (%u > %u)",
+ le32_to_cpu(raw_super->extension_count),
+ F2FS_MAX_EXTENSION);
+ return 1;
+ }
+
+ if (le32_to_cpu(raw_super->cp_payload) >
+ (blocks_per_seg - F2FS_CP_PACKS)) {
+ f2fs_msg(sb, KERN_INFO,
+ "Insane cp_payload (%u > %u)",
+ le32_to_cpu(raw_super->cp_payload),
+ blocks_per_seg - F2FS_CP_PACKS);
+ return 1;
+ }
+
/* check reserved ino info */
if (le32_to_cpu(raw_super->node_ino) != 1 ||
le32_to_cpu(raw_super->meta_ino) != 2 ||
@@ -1405,13 +1469,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return 1;
}
- if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid segment count (%u)",
- le32_to_cpu(raw_super->segment_count));
- return 1;
- }
-
/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
if (sanity_check_area_boundary(sbi, bh))
return 1;
@@ -1424,10 +1481,14 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
unsigned int total, fsmeta;
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ unsigned int ovp_segments, reserved_segments;
unsigned int main_segs, blocks_per_seg;
unsigned int sit_segs, nat_segs;
unsigned int sit_bitmap_size, nat_bitmap_size;
unsigned int log_blocks_per_seg;
+ unsigned int segment_count_main;
+ unsigned int cp_pack_start_sum, cp_payload;
+ block_t user_block_count;
int i;
total = le32_to_cpu(raw_super->segment_count);
@@ -1442,6 +1503,26 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (unlikely(fsmeta >= total))
return 1;
+ ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
+ reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
+
+ if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
+ ovp_segments == 0 || reserved_segments == 0)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong layout: check mkfs.f2fs version");
+ return 1;
+ }
+
+ user_block_count = le64_to_cpu(ckpt->user_block_count);
+ segment_count_main = le32_to_cpu(raw_super->segment_count_main);
+ log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+ if (!user_block_count || user_block_count >=
+ segment_count_main << log_blocks_per_seg) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong user_block_count: %u", user_block_count);
+ return 1;
+ }
+
main_segs = le32_to_cpu(raw_super->segment_count_main);
blocks_per_seg = sbi->blocks_per_seg;
@@ -1458,7 +1539,6 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
- log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
@@ -1468,6 +1548,17 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
return 1;
}
+ cp_pack_start_sum = __start_sum_addr(sbi);
+ cp_payload = __cp_payload(sbi);
+ if (cp_pack_start_sum < cp_payload + 1 ||
+ cp_pack_start_sum > blocks_per_seg - 1 -
+ NR_CURSEG_TYPE) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong cp_pack_start_sum: %u",
+ cp_pack_start_sum);
+ return 1;
+ }
+
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 2c2f182cde03..f53c139c312e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -118,6 +118,16 @@ static void huge_pagevec_release(struct pagevec *pvec)
pagevec_reinit(pvec);
}
+/*
+ * Mask used when checking the page offset value passed in via system
+ * calls. This value will be converted to a loff_t which is signed.
+ * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the
+ * value. The extra bit (- 1 in the shift value) is to take the sign
+ * bit into account.
+ */
+#define PGOFF_LOFFT_MAX \
+ (((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1)))
+
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file_inode(file);
@@ -136,17 +146,31 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
vma->vm_ops = &hugetlb_vm_ops;
+ /*
+ * page based offset in vm_pgoff could be sufficiently large to
+ * overflow a loff_t when converted to byte offset. This can
+ * only happen on architectures where sizeof(loff_t) ==
+ * sizeof(unsigned long). So, only check in those instances.
+ */
+ if (sizeof(unsigned long) == sizeof(loff_t)) {
+ if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
+ return -EINVAL;
+ }
+
+ /* must be huge page aligned */
if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL;
vma_len = (loff_t)(vma->vm_end - vma->vm_start);
+ len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+ /* check for overflow */
+ if (len < vma_len)
+ return -EINVAL;
inode_lock(inode);
file_accessed(file);
ret = -ENOMEM;
- len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
-
if (hugetlb_reserve_pages(inode,
vma->vm_pgoff >> huge_page_order(h),
len >> huge_page_shift(h), vma,
@@ -155,7 +179,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
ret = 0;
if (vma->vm_flags & VM_WRITE && inode->i_size < len)
- inode->i_size = len;
+ i_size_write(inode, len);
out:
inode_unlock(inode);
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 9b43ca02b7ab..80317b04c84a 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -88,7 +88,7 @@ static int kernfs_get_target_path(struct kernfs_node *parent,
int slen = strlen(kn->name);
len -= slen;
- strncpy(s + len, kn->name, slen);
+ memcpy(s + len, kn->name, slen);
if (len)
s[--len] = '/';
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 12467ad608cd..03369a89600e 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -929,16 +929,20 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
}
ret = udf_dstrCS0toUTF8(outstr, 31, pvoldesc->volIdent, 32);
- if (ret < 0)
- goto out_bh;
-
- strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret);
+ if (ret < 0) {
+ strcpy(UDF_SB(sb)->s_volume_ident, "InvalidName");
+ pr_warn("incorrect volume identification, setting to "
+ "'InvalidName'\n");
+ } else {
+ strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret);
+ }
udf_debug("volIdent[] = '%s'\n", UDF_SB(sb)->s_volume_ident);
ret = udf_dstrCS0toUTF8(outstr, 127, pvoldesc->volSetIdent, 128);
- if (ret < 0)
+ if (ret < 0) {
+ ret = 0;
goto out_bh;
-
+ }
outstr[ret] = 0;
udf_debug("volSetIdent[] = '%s'\n", outstr);
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 3a3be23689b3..61a1738895b7 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -341,6 +341,11 @@ try_again:
return u_len;
}
+/*
+ * Convert CS0 dstring to output charset. Warning: This function may truncate
+ * input string if it is too long as it is used for informational strings only
+ * and it is better to truncate the string than to refuse mounting a media.
+ */
int udf_dstrCS0toUTF8(uint8_t *utf_o, int o_len,
const uint8_t *ocu_i, int i_len)
{
@@ -349,9 +354,12 @@ int udf_dstrCS0toUTF8(uint8_t *utf_o, int o_len,
if (i_len > 0) {
s_len = ocu_i[i_len - 1];
if (s_len >= i_len) {
- pr_err("incorrect dstring lengths (%d/%d)\n",
- s_len, i_len);
- return -EINVAL;
+ pr_warn("incorrect dstring lengths (%d/%d),"
+ " truncating\n", s_len, i_len);
+ s_len = i_len - 1;
+ /* 2-byte encoding? Need to round properly... */
+ if (ocu_i[0] == 16)
+ s_len -= (s_len - 1) & 2;
}
}
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 6622d46ddec3..9687208c676f 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -487,7 +487,14 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
if (args->flags & ATTR_CREATE)
return retval;
retval = xfs_attr_shortform_remove(args);
- ASSERT(retval == 0);
+ if (retval)
+ return retval;
+ /*
+ * Since we have removed the old attr, clear ATTR_REPLACE so
+ * that the leaf format add routine won't trip over the attr
+ * not being around.
+ */
+ args->flags &= ~ATTR_REPLACE;
}
if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||