aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c4
-rw-r--r--fs/9p/cache.c3
-rw-r--r--fs/9p/v9fs.c9
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/v9fs_vfs.h2
-rw-r--r--fs/9p/vfs_addr.c7
-rw-r--r--fs/9p/vfs_file.c142
-rw-r--r--fs/9p/vfs_inode.c26
-rw-r--r--fs/9p/vfs_inode_dotl.c17
-rw-r--r--fs/9p/vfs_super.c8
-rw-r--r--fs/9p/xattr.c10
-rw-r--r--fs/Kconfig6
-rw-r--r--fs/Makefile5
-rw-r--r--fs/affs/super.c57
-rw-r--r--fs/afs/internal.h1
-rw-r--r--fs/afs/proc.c122
-rw-r--r--fs/aio.c113
-rw-r--r--fs/anon_inodes.c34
-rw-r--r--fs/attr.c5
-rw-r--r--fs/autofs4/autofs_i.h4
-rw-r--r--fs/autofs4/dev-ioctl.c16
-rw-r--r--fs/autofs4/expire.c14
-rw-r--r--fs/autofs4/inode.c49
-rw-r--r--fs/autofs4/root.c6
-rw-r--r--fs/autofs4/symlink.c4
-rw-r--r--fs/autofs4/waitq.c16
-rw-r--r--fs/befs/linuxvfs.c4
-rw-r--r--fs/binfmt_elf.c3
-rw-r--r--fs/bio-integrity.c184
-rw-r--r--fs/bio.c507
-rw-r--r--fs/btrfs/Kconfig3
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/acl.c142
-rw-r--r--fs/btrfs/backref.c195
-rw-r--r--fs/btrfs/btrfs_inode.h4
-rw-r--r--fs/btrfs/check-integrity.c24
-rw-r--r--fs/btrfs/compression.c41
-rw-r--r--fs/btrfs/ctree.c552
-rw-r--r--fs/btrfs/ctree.h141
-rw-r--r--fs/btrfs/delayed-inode.c208
-rw-r--r--fs/btrfs/delayed-inode.h8
-rw-r--r--fs/btrfs/delayed-ref.c300
-rw-r--r--fs/btrfs/delayed-ref.h26
-rw-r--r--fs/btrfs/dev-replace.c56
-rw-r--r--fs/btrfs/dir-item.c8
-rw-r--r--fs/btrfs/disk-io.c269
-rw-r--r--fs/btrfs/extent-tree.c618
-rw-r--r--fs/btrfs/extent_io.c277
-rw-r--r--fs/btrfs/extent_io.h9
-rw-r--r--fs/btrfs/extent_map.c74
-rw-r--r--fs/btrfs/file-item.c23
-rw-r--r--fs/btrfs/file.c216
-rw-r--r--fs/btrfs/free-space-cache.c23
-rw-r--r--fs/btrfs/hash.c50
-rw-r--r--fs/btrfs/hash.h11
-rw-r--r--fs/btrfs/inode-item.c65
-rw-r--r--fs/btrfs/inode.c500
-rw-r--r--fs/btrfs/ioctl.c404
-rw-r--r--fs/btrfs/lzo.c6
-rw-r--r--fs/btrfs/ordered-data.c15
-rw-r--r--fs/btrfs/orphan.c20
-rw-r--r--fs/btrfs/print-tree.c4
-rw-r--r--fs/btrfs/props.c427
-rw-r--r--fs/btrfs/props.h42
-rw-r--r--fs/btrfs/qgroup.c57
-rw-r--r--fs/btrfs/raid56.c22
-rw-r--r--fs/btrfs/reada.c9
-rw-r--r--fs/btrfs/relocation.c105
-rw-r--r--fs/btrfs/root-tree.c19
-rw-r--r--fs/btrfs/scrub.c146
-rw-r--r--fs/btrfs/send.c973
-rw-r--r--fs/btrfs/super.c254
-rw-r--r--fs/btrfs/sysfs.c623
-rw-r--r--fs/btrfs/sysfs.h64
-rw-r--r--fs/btrfs/tests/btrfs-tests.h2
-rw-r--r--fs/btrfs/tests/free-space-tests.c4
-rw-r--r--fs/btrfs/transaction.c55
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/tree-log.c209
-rw-r--r--fs/btrfs/ulist.c117
-rw-r--r--fs/btrfs/ulist.h39
-rw-r--r--fs/btrfs/uuid-tree.c13
-rw-r--r--fs/btrfs/volumes.c108
-rw-r--r--fs/btrfs/xattr.c17
-rw-r--r--fs/btrfs/xattr.h2
-rw-r--r--fs/btrfs/zlib.c8
-rw-r--r--fs/buffer.c20
-rw-r--r--fs/ceph/Kconfig13
-rw-r--r--fs/ceph/Makefile1
-rw-r--r--fs/ceph/acl.c200
-rw-r--r--fs/ceph/addr.c101
-rw-r--r--fs/ceph/cache.h13
-rw-r--r--fs/ceph/caps.c338
-rw-r--r--fs/ceph/dir.c36
-rw-r--r--fs/ceph/file.c438
-rw-r--r--fs/ceph/inode.c172
-rw-r--r--fs/ceph/ioctl.c8
-rw-r--r--fs/ceph/mds_client.c132
-rw-r--r--fs/ceph/mds_client.h2
-rw-r--r--fs/ceph/strings.c2
-rw-r--r--fs/ceph/super.c35
-rw-r--r--fs/ceph/super.h49
-rw-r--r--fs/ceph/xattr.c115
-rw-r--r--fs/cifs/cifsacl.c101
-rw-r--r--fs/cifs/cifsglob.h27
-rw-r--r--fs/cifs/cifsproto.h36
-rw-r--r--fs/cifs/cifssmb.c179
-rw-r--r--fs/cifs/dir.c72
-rw-r--r--fs/cifs/file.c96
-rw-r--r--fs/cifs/inode.c191
-rw-r--r--fs/cifs/link.c339
-rw-r--r--fs/cifs/readdir.c2
-rw-r--r--fs/cifs/smb1ops.c135
-rw-r--r--fs/cifs/smb2glob.h3
-rw-r--r--fs/cifs/smb2ops.c14
-rw-r--r--fs/cifs/smb2pdu.c9
-rw-r--r--fs/cifs/smb2proto.h3
-rw-r--r--fs/cifs/transport.c29
-rw-r--r--fs/cifs/xattr.c64
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/coredump.c1
-rw-r--r--fs/coredump.h6
-rw-r--r--fs/cramfs/inode.c50
-rw-r--r--fs/cramfs/internal.h4
-rw-r--r--fs/cramfs/uncompress.c2
-rw-r--r--fs/dcache.c21
-rw-r--r--fs/dcookies.c2
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/dlm/lowcomms.c12
-rw-r--r--fs/ecryptfs/inode.c29
-rw-r--r--fs/efs/super.c39
-rw-r--r--fs/eventfd.c13
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/exec.c165
-rw-r--r--fs/exofs/inode.c31
-rw-r--r--fs/exofs/ore.c45
-rw-r--r--fs/ext2/acl.c188
-rw-r--r--fs/ext2/acl.h8
-rw-r--r--fs/ext2/file.c1
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/namei.c2
-rw-r--r--fs/ext2/super.c1
-rw-r--r--fs/ext2/xattr.c8
-rw-r--r--fs/ext2/xattr.h2
-rw-r--r--fs/ext3/acl.c223
-rw-r--r--fs/ext3/acl.h9
-rw-r--r--fs/ext3/dir.c44
-rw-r--r--fs/ext3/file.c1
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/namei.c2
-rw-r--r--fs/ext3/xattr.c8
-rw-r--r--fs/ext3/xattr.h2
-rw-r--r--fs/ext4/acl.c223
-rw-r--r--fs/ext4/acl.h9
-rw-r--r--fs/ext4/block_validity.c33
-rw-r--r--fs/ext4/dir.c35
-rw-r--r--fs/ext4/ext4.h12
-rw-r--r--fs/ext4/ext4_jbd2.c9
-rw-r--r--fs/ext4/extents.c48
-rw-r--r--fs/ext4/file.c3
-rw-r--r--fs/ext4/inline.c26
-rw-r--r--fs/ext4/inode.c44
-rw-r--r--fs/ext4/ioctl.c9
-rw-r--r--fs/ext4/mballoc.c17
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/page-io.c8
-rw-r--r--fs/ext4/resize.c34
-rw-r--r--fs/ext4/super.c41
-rw-r--r--fs/ext4/xattr.c8
-rw-r--r--fs/ext4/xattr.h2
-rw-r--r--fs/f2fs/Makefile2
-rw-r--r--fs/f2fs/acl.c174
-rw-r--r--fs/f2fs/acl.h7
-rw-r--r--fs/f2fs/checkpoint.c195
-rw-r--r--fs/f2fs/data.c614
-rw-r--r--fs/f2fs/debug.c53
-rw-r--r--fs/f2fs/dir.c47
-rw-r--r--fs/f2fs/f2fs.h199
-rw-r--r--fs/f2fs/file.c87
-rw-r--r--fs/f2fs/gc.c22
-rw-r--r--fs/f2fs/gc.h2
-rw-r--r--fs/f2fs/inline.c222
-rw-r--r--fs/f2fs/inode.c23
-rw-r--r--fs/f2fs/namei.c7
-rw-r--r--fs/f2fs/node.c272
-rw-r--r--fs/f2fs/node.h8
-rw-r--r--fs/f2fs/recovery.c49
-rw-r--r--fs/f2fs/segment.c584
-rw-r--r--fs/f2fs/segment.h81
-rw-r--r--fs/f2fs/super.c72
-rw-r--r--fs/f2fs/xattr.c11
-rw-r--r--fs/f2fs/xattr.h2
-rw-r--r--fs/file.c123
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/fs-writeback.c48
-rw-r--r--fs/fscache/object-list.c5
-rw-r--r--fs/fscache/object.c3
-rw-r--r--fs/fuse/dev.c25
-rw-r--r--fs/fuse/dir.c14
-rw-r--r--fs/fuse/file.c44
-rw-r--r--fs/fuse/fuse_i.h5
-rw-r--r--fs/generic_acl.c184
-rw-r--r--fs/gfs2/acl.c234
-rw-r--r--fs/gfs2/acl.h4
-rw-r--r--fs/gfs2/aops.c49
-rw-r--r--fs/gfs2/dir.c90
-rw-r--r--fs/gfs2/dir.h19
-rw-r--r--fs/gfs2/glock.c31
-rw-r--r--fs/gfs2/glock.h2
-rw-r--r--fs/gfs2/glops.c36
-rw-r--r--fs/gfs2/incore.h23
-rw-r--r--fs/gfs2/inode.c152
-rw-r--r--fs/gfs2/log.c4
-rw-r--r--fs/gfs2/lops.c7
-rw-r--r--fs/gfs2/main.c1
-rw-r--r--fs/gfs2/meta_io.c8
-rw-r--r--fs/gfs2/ops_fstype.c72
-rw-r--r--fs/gfs2/quota.c342
-rw-r--r--fs/gfs2/quota.h1
-rw-r--r--fs/gfs2/rgrp.c113
-rw-r--r--fs/gfs2/rgrp.h2
-rw-r--r--fs/gfs2/super.c43
-rw-r--r--fs/gfs2/xattr.c4
-rw-r--r--fs/hfsplus/acl.h9
-rw-r--r--fs/hfsplus/catalog.c41
-rw-r--r--fs/hfsplus/dir.c3
-rw-r--r--fs/hfsplus/hfsplus_fs.h1
-rw-r--r--fs/hfsplus/hfsplus_raw.h6
-rw-r--r--fs/hfsplus/inode.c73
-rw-r--r--fs/hfsplus/options.c2
-rw-r--r--fs/hfsplus/posix_acl.c168
-rw-r--r--fs/hfsplus/wrapper.c2
-rw-r--r--fs/hfsplus/xattr.c150
-rw-r--r--fs/hfsplus/xattr.h4
-rw-r--r--fs/hostfs/hostfs_kern.c53
-rw-r--r--fs/hpfs/alloc.c66
-rw-r--r--fs/hpfs/buffer.c96
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hpfs/super.c29
-rw-r--r--fs/jbd/journal.c8
-rw-r--r--fs/jbd/transaction.c4
-rw-r--r--fs/jbd2/journal.c18
-rw-r--r--fs/jbd2/recovery.c2
-rw-r--r--fs/jbd2/transaction.c22
-rw-r--r--fs/jffs2/acl.c141
-rw-r--r--fs/jffs2/acl.h7
-rw-r--r--fs/jffs2/dir.c1
-rw-r--r--fs/jffs2/file.c1
-rw-r--r--fs/jffs2/fs.c7
-rw-r--r--fs/jffs2/malloc.c4
-rw-r--r--fs/jffs2/nodelist.c28
-rw-r--r--fs/jffs2/readinode.c26
-rw-r--r--fs/jffs2/symlink.c1
-rw-r--r--fs/jffs2/xattr.c9
-rw-r--r--fs/jfs/acl.c107
-rw-r--r--fs/jfs/file.c4
-rw-r--r--fs/jfs/jfs_acl.h7
-rw-r--r--fs/jfs/jfs_logmgr.c12
-rw-r--r--fs/jfs/jfs_metapage.c9
-rw-r--r--fs/jfs/jfs_xattr.h2
-rw-r--r--fs/jfs/namei.c1
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/jfs/xattr.c123
-rw-r--r--fs/kernfs/Makefile5
-rw-r--r--fs/kernfs/dir.c1077
-rw-r--r--fs/kernfs/file.c867
-rw-r--r--fs/kernfs/inode.c377
-rw-r--r--fs/kernfs/kernfs-internal.h122
-rw-r--r--fs/kernfs/mount.c171
-rw-r--r--fs/kernfs/symlink.c151
-rw-r--r--fs/lockd/svclock.c8
-rw-r--r--fs/logfs/dev_bdev.c38
-rw-r--r--fs/logfs/segment.c3
-rw-r--r--fs/mount.h2
-rw-r--r--fs/mpage.c19
-rw-r--r--fs/namei.c91
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c43
-rw-r--r--fs/nfs/delegation.c11
-rw-r--r--fs/nfs/dir.c36
-rw-r--r--fs/nfs/direct.c279
-rw-r--r--fs/nfs/file.c6
-rw-r--r--fs/nfs/inode.c117
-rw-r--r--fs/nfs/internal.h12
-rw-r--r--fs/nfs/nfs3acl.c295
-rw-r--r--fs/nfs/nfs3proc.c77
-rw-r--r--fs/nfs/nfs3super.c3
-rw-r--r--fs/nfs/nfs4_fs.h1
-rw-r--r--fs/nfs/nfs4client.c29
-rw-r--r--fs/nfs/nfs4filelayout.c34
-rw-r--r--fs/nfs/nfs4filelayoutdev.c2
-rw-r--r--fs/nfs/nfs4namespace.c12
-rw-r--r--fs/nfs/nfs4proc.c106
-rw-r--r--fs/nfs/nfs4session.c25
-rw-r--r--fs/nfs/nfs4session.h2
-rw-r--r--fs/nfs/nfs4state.c23
-rw-r--r--fs/nfs/nfs4super.c14
-rw-r--r--fs/nfs/nfs4xdr.c49
-rw-r--r--fs/nfs/nfstrace.h1
-rw-r--r--fs/nfs/pnfs.c67
-rw-r--r--fs/nfs/pnfs.h16
-rw-r--r--fs/nfs/read.c12
-rw-r--r--fs/nfs/write.c26
-rw-r--r--fs/nfsd/acl.h18
-rw-r--r--fs/nfsd/cache.h8
-rw-r--r--fs/nfsd/idmap.h4
-rw-r--r--fs/nfsd/netns.h1
-rw-r--r--fs/nfsd/nfs2acl.c72
-rw-r--r--fs/nfsd/nfs3acl.c62
-rw-r--r--fs/nfsd/nfs3xdr.c14
-rw-r--r--fs/nfsd/nfs4acl.c139
-rw-r--r--fs/nfsd/nfs4idmap.c50
-rw-r--r--fs/nfsd/nfs4proc.c58
-rw-r--r--fs/nfsd/nfs4state.c40
-rw-r--r--fs/nfsd/nfs4xdr.c178
-rw-r--r--fs/nfsd/nfscache.c36
-rw-r--r--fs/nfsd/nfssvc.c30
-rw-r--r--fs/nfsd/nfsxdr.c2
-rw-r--r--fs/nfsd/vfs.c282
-rw-r--r--fs/nfsd/vfs.h10
-rw-r--r--fs/nfsd/xdr3.h3
-rw-r--r--fs/nfsd/xdr4.h4
-rw-r--r--fs/nilfs2/ioctl.c371
-rw-r--r--fs/nilfs2/segbuf.c3
-rw-r--r--fs/nilfs2/segment.c10
-rw-r--r--fs/nls/mac-celtic.c1
-rw-r--r--fs/nls/mac-centeuro.c1
-rw-r--r--fs/nls/mac-croatian.c1
-rw-r--r--fs/nls/mac-cyrillic.c1
-rw-r--r--fs/nls/mac-gaelic.c1
-rw-r--r--fs/nls/mac-greek.c1
-rw-r--r--fs/nls/mac-iceland.c1
-rw-r--r--fs/nls/mac-inuit.c1
-rw-r--r--fs/nls/mac-roman.c1
-rw-r--r--fs/nls/mac-romanian.c1
-rw-r--r--fs/nls/mac-turkish.c1
-rw-r--r--fs/nls/nls_ascii.c1
-rw-r--r--fs/nls/nls_base.c5
-rw-r--r--fs/nls/nls_cp1250.c1
-rw-r--r--fs/nls/nls_cp1251.c1
-rw-r--r--fs/nls/nls_cp1255.c1
-rw-r--r--fs/nls/nls_cp437.c1
-rw-r--r--fs/nls/nls_cp737.c1
-rw-r--r--fs/nls/nls_cp775.c1
-rw-r--r--fs/nls/nls_cp850.c1
-rw-r--r--fs/nls/nls_cp852.c1
-rw-r--r--fs/nls/nls_cp855.c1
-rw-r--r--fs/nls/nls_cp857.c1
-rw-r--r--fs/nls/nls_cp860.c1
-rw-r--r--fs/nls/nls_cp861.c1
-rw-r--r--fs/nls/nls_cp862.c1
-rw-r--r--fs/nls/nls_cp863.c1
-rw-r--r--fs/nls/nls_cp864.c1
-rw-r--r--fs/nls/nls_cp865.c1
-rw-r--r--fs/nls/nls_cp866.c1
-rw-r--r--fs/nls/nls_cp869.c1
-rw-r--r--fs/nls/nls_cp874.c1
-rw-r--r--fs/nls/nls_cp932.c1
-rw-r--r--fs/nls/nls_cp936.c1
-rw-r--r--fs/nls/nls_cp949.c1
-rw-r--r--fs/nls/nls_cp950.c1
-rw-r--r--fs/nls/nls_euc-jp.c1
-rw-r--r--fs/nls/nls_iso8859-1.c1
-rw-r--r--fs/nls/nls_iso8859-13.c1
-rw-r--r--fs/nls/nls_iso8859-14.c1
-rw-r--r--fs/nls/nls_iso8859-15.c1
-rw-r--r--fs/nls/nls_iso8859-2.c1
-rw-r--r--fs/nls/nls_iso8859-3.c1
-rw-r--r--fs/nls/nls_iso8859-4.c1
-rw-r--r--fs/nls/nls_iso8859-5.c1
-rw-r--r--fs/nls/nls_iso8859-6.c1
-rw-r--r--fs/nls/nls_iso8859-7.c1
-rw-r--r--fs/nls/nls_iso8859-9.c1
-rw-r--r--fs/nls/nls_koi8-r.c1
-rw-r--r--fs/nls/nls_koi8-ru.c1
-rw-r--r--fs/nls/nls_koi8-u.c1
-rw-r--r--fs/nls/nls_utf8.c1
-rw-r--r--fs/notify/dnotify/dnotify.c34
-rw-r--r--fs/notify/fanotify/fanotify.c234
-rw-r--r--fs/notify/fanotify/fanotify.h30
-rw-r--r--fs/notify/fanotify/fanotify_user.c63
-rw-r--r--fs/notify/fsnotify.c42
-rw-r--r--fs/notify/group.c7
-rw-r--r--fs/notify/inotify/inotify.h21
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c161
-rw-r--r--fs/notify/inotify/inotify_user.c131
-rw-r--r--fs/notify/notification.c358
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ocfs2/Makefile1
-rw-r--r--fs/ocfs2/acl.c234
-rw-r--r--fs/ocfs2/acl.h13
-rw-r--r--fs/ocfs2/alloc.c50
-rw-r--r--fs/ocfs2/cluster/Makefile2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c2
-rw-r--r--fs/ocfs2/cluster/nodemanager.c4
-rw-r--r--fs/ocfs2/cluster/ver.c42
-rw-r--r--fs/ocfs2/cluster/ver.h31
-rw-r--r--fs/ocfs2/dlm/Makefile2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c5
-rw-r--r--fs/ocfs2/dlm/dlmver.c42
-rw-r--r--fs/ocfs2/dlm/dlmver.h31
-rw-r--r--fs/ocfs2/dlmfs/Makefile2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c4
-rw-r--r--fs/ocfs2/dlmfs/dlmfsver.c42
-rw-r--r--fs/ocfs2/dlmfs/dlmfsver.h31
-rw-r--r--fs/ocfs2/dlmglue.c4
-rw-r--r--fs/ocfs2/file.c67
-rw-r--r--fs/ocfs2/ioctl.c7
-rw-r--r--fs/ocfs2/localalloc.c42
-rw-r--r--fs/ocfs2/localalloc.h6
-rw-r--r--fs/ocfs2/move_extents.c77
-rw-r--r--fs/ocfs2/namei.c44
-rw-r--r--fs/ocfs2/ocfs2.h1
-rw-r--r--fs/ocfs2/quota_global.c27
-rw-r--r--fs/ocfs2/quota_local.c4
-rw-r--r--fs/ocfs2/refcounttree.c19
-rw-r--r--fs/ocfs2/stack_o2cb.c3
-rw-r--r--fs/ocfs2/stack_user.c308
-rw-r--r--fs/ocfs2/stackglue.c18
-rw-r--r--fs/ocfs2/stackglue.h15
-rw-r--r--fs/ocfs2/suballoc.c12
-rw-r--r--fs/ocfs2/suballoc.h12
-rw-r--r--fs/ocfs2/super.c20
-rw-r--r--fs/ocfs2/ver.c43
-rw-r--r--fs/ocfs2/ver.h31
-rw-r--r--fs/ocfs2/xattr.c21
-rw-r--r--fs/ocfs2/xattr.h6
-rw-r--r--fs/open.c4
-rw-r--r--fs/pipe.c3
-rw-r--r--fs/posix_acl.c526
-rw-r--r--fs/proc/array.c18
-rw-r--r--fs/proc/base.c70
-rw-r--r--fs/proc/cmdline.c2
-rw-r--r--fs/proc/consoles.c2
-rw-r--r--fs/proc/cpuinfo.c2
-rw-r--r--fs/proc/devices.c2
-rw-r--r--fs/proc/generic.c3
-rw-r--r--fs/proc/interrupts.c2
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/kmsg.c2
-rw-r--r--fs/proc/loadavg.c2
-rw-r--r--fs/proc/meminfo.c39
-rw-r--r--fs/proc/nommu.c2
-rw-r--r--fs/proc/page.c9
-rw-r--r--fs/proc/proc_devtree.c5
-rw-r--r--fs/proc/softirqs.c2
-rw-r--r--fs/proc/stat.c2
-rw-r--r--fs/proc/uptime.c2
-rw-r--r--fs/proc/version.c2
-rw-r--r--fs/proc/vmcore.c28
-rw-r--r--fs/proc_namespace.c7
-rw-r--r--fs/pstore/platform.c7
-rw-r--r--fs/qnx4/inode.c63
-rw-r--r--fs/qnx4/qnx4.h2
-rw-r--r--fs/quota/dquot.c14
-rw-r--r--fs/ramfs/file-mmu.c7
-rw-r--r--fs/ramfs/file-nommu.c17
-rw-r--r--fs/ramfs/inode.c9
-rw-r--r--fs/ramfs/internal.h1
-rw-r--r--fs/read_write.c64
-rw-r--r--fs/reiserfs/acl.h4
-rw-r--r--fs/reiserfs/do_balan.c895
-rw-r--r--fs/reiserfs/file.c1
-rw-r--r--fs/reiserfs/namei.c4
-rw-r--r--fs/reiserfs/procfs.c4
-rw-r--r--fs/reiserfs/reiserfs.h10
-rw-r--r--fs/reiserfs/super.c8
-rw-r--r--fs/reiserfs/xattr.c5
-rw-r--r--fs/reiserfs/xattr_acl.c190
-rw-r--r--fs/romfs/super.c6
-rw-r--r--fs/splice.c18
-rw-r--r--fs/super.c6
-rw-r--r--fs/sync.c32
-rw-r--r--fs/sysfs/Makefile2
-rw-r--r--fs/sysfs/dir.c1075
-rw-r--r--fs/sysfs/file.c963
-rw-r--r--fs/sysfs/group.c102
-rw-r--r--fs/sysfs/inode.c331
-rw-r--r--fs/sysfs/mount.c185
-rw-r--r--fs/sysfs/symlink.c219
-rw-r--r--fs/sysfs/sysfs.h236
-rw-r--r--fs/ubifs/debug.c22
-rw-r--r--fs/ubifs/log.c21
-rw-r--r--fs/ubifs/orphan.c21
-rw-r--r--fs/ubifs/recovery.c21
-rw-r--r--fs/ubifs/super.c24
-rw-r--r--fs/ubifs/tnc.c22
-rw-r--r--fs/udf/file.c14
-rw-r--r--fs/udf/inode.c1
-rw-r--r--fs/udf/namei.c2
-rw-r--r--fs/xattr_acl.c180
-rw-r--r--fs/xfs/xfs_acl.c151
-rw-r--r--fs/xfs/xfs_acl.h9
-rw-r--r--fs/xfs/xfs_aops.c4
-rw-r--r--fs/xfs/xfs_attr.c5
-rw-r--r--fs/xfs/xfs_attr_list.c8
-rw-r--r--fs/xfs/xfs_attr_remote.c2
-rw-r--r--fs/xfs/xfs_bmap.c36
-rw-r--r--fs/xfs/xfs_bmap_util.c50
-rw-r--r--fs/xfs/xfs_buf.c81
-rw-r--r--fs/xfs/xfs_buf.h31
-rw-r--r--fs/xfs/xfs_buf_item.c124
-rw-r--r--fs/xfs/xfs_dir2_node.c26
-rw-r--r--fs/xfs/xfs_dir2_readdir.c4
-rw-r--r--fs/xfs/xfs_dir2_sf.c58
-rw-r--r--fs/xfs/xfs_dquot.c7
-rw-r--r--fs/xfs/xfs_dquot_item.c67
-rw-r--r--fs/xfs/xfs_dquot_item.h3
-rw-r--r--fs/xfs/xfs_extfree_item.c21
-rw-r--r--fs/xfs/xfs_file.c19
-rw-r--r--fs/xfs/xfs_ialloc.c53
-rw-r--r--fs/xfs/xfs_ialloc.h21
-rw-r--r--fs/xfs/xfs_icreate_item.c10
-rw-r--r--fs/xfs/xfs_inode.c85
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_inode_fork.c17
-rw-r--r--fs/xfs/xfs_inode_item.c400
-rw-r--r--fs/xfs/xfs_inode_item.h5
-rw-r--r--fs/xfs/xfs_ioctl.c6
-rw-r--r--fs/xfs/xfs_iops.c148
-rw-r--r--fs/xfs/xfs_iops.h2
-rw-r--r--fs/xfs/xfs_itable.c22
-rw-r--r--fs/xfs/xfs_log.h46
-rw-r--r--fs/xfs/xfs_log_cil.c89
-rw-r--r--fs/xfs/xfs_log_recover.c46
-rw-r--r--fs/xfs/xfs_mount.c24
-rw-r--r--fs/xfs/xfs_qm.c86
-rw-r--r--fs/xfs/xfs_qm.h18
-rw-r--r--fs/xfs/xfs_qm_syscalls.c18
-rw-r--r--fs/xfs/xfs_quota_priv.h42
-rw-r--r--fs/xfs/xfs_sb.c10
-rw-r--r--fs/xfs/xfs_super.c2
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_buf.c13
-rw-r--r--fs/xfs/xfs_trans_dquot.c4
-rw-r--r--fs/xfs/xfs_trans_resv.c10
-rw-r--r--fs/xfs/xfs_trans_space.h2
-rw-r--r--fs/xfs/xfs_vnode.h9
-rw-r--r--fs/xfs/xfs_xattr.c4
539 files changed, 18962 insertions, 15721 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 7af425f53bee..8482f2d11606 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -156,7 +156,7 @@ int v9fs_acl_chmod(struct inode *inode, struct p9_fid *fid)
return -EOPNOTSUPP;
acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS);
if (acl) {
- retval = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+ retval = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
if (retval)
return retval;
set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
@@ -200,7 +200,7 @@ int v9fs_acl_mode(struct inode *dir, umode_t *modep,
if (acl) {
if (S_ISDIR(mode))
*dpacl = posix_acl_dup(acl);
- retval = posix_acl_create(&acl, GFP_NOFS, &mode);
+ retval = __posix_acl_create(&acl, GFP_NOFS, &mode);
if (retval < 0)
return retval;
if (retval > 0)
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 2b7a032c37bc..a69260f27555 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -239,13 +239,12 @@ void v9fs_cache_inode_flush_cookie(struct inode *inode)
void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp)
{
struct v9fs_inode *v9inode = V9FS_I(inode);
- struct p9_fid *fid;
if (!v9inode->fscache)
return;
spin_lock(&v9inode->fscache_lock);
- fid = filp->private_data;
+
if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
v9fs_cache_inode_flush_cookie(inode);
else
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 08f2e1e9a7e6..14da82564f4e 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -56,7 +56,7 @@ enum {
/* Options that take no arguments */
Opt_nodevmap,
/* Cache options */
- Opt_cache_loose, Opt_fscache,
+ Opt_cache_loose, Opt_fscache, Opt_mmap,
/* Access options */
Opt_access, Opt_posixacl,
/* Error token */
@@ -74,6 +74,7 @@ static const match_table_t tokens = {
{Opt_cache, "cache=%s"},
{Opt_cache_loose, "loose"},
{Opt_fscache, "fscache"},
+ {Opt_mmap, "mmap"},
{Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
{Opt_posixacl, "posixacl"},
@@ -91,6 +92,9 @@ static int get_cache_mode(char *s)
} else if (!strcmp(s, "fscache")) {
version = CACHE_FSCACHE;
p9_debug(P9_DEBUG_9P, "Cache mode: fscache\n");
+ } else if (!strcmp(s, "mmap")) {
+ version = CACHE_MMAP;
+ p9_debug(P9_DEBUG_9P, "Cache mode: mmap\n");
} else if (!strcmp(s, "none")) {
version = CACHE_NONE;
p9_debug(P9_DEBUG_9P, "Cache mode: none\n");
@@ -220,6 +224,9 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_fscache:
v9ses->cache = CACHE_FSCACHE;
break;
+ case Opt_mmap:
+ v9ses->cache = CACHE_MMAP;
+ break;
case Opt_cachetag:
#ifdef CONFIG_9P_FSCACHE
v9ses->cachetag = match_strdup(&args[0]);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index a8e127c89627..099c7712631c 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -64,6 +64,7 @@ enum p9_session_flags {
enum p9_cache_modes {
CACHE_NONE,
+ CACHE_MMAP,
CACHE_LOOSE,
CACHE_FSCACHE,
};
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index dc95a252523d..b83ebfbf3fdc 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -50,6 +50,8 @@ extern const struct dentry_operations v9fs_dentry_operations;
extern const struct dentry_operations v9fs_cached_dentry_operations;
extern const struct file_operations v9fs_cached_file_operations;
extern const struct file_operations v9fs_cached_file_operations_dotl;
+extern const struct file_operations v9fs_mmap_file_operations;
+extern const struct file_operations v9fs_mmap_file_operations_dotl;
extern struct kmem_cache *v9fs_inode_cache;
struct inode *v9fs_alloc_inode(struct super_block *sb);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 9ff073f4090a..c71e88602ff4 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -202,6 +202,8 @@ static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
{
int retval;
+ p9_debug(P9_DEBUG_VFS, "page %p\n", page);
+
retval = v9fs_vfs_writepage_locked(page);
if (retval < 0) {
if (retval == -EAGAIN) {
@@ -282,6 +284,9 @@ static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
struct inode *inode = mapping->host;
+
+ p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
+
v9inode = V9FS_I(inode);
start:
page = grab_cache_page_write_begin(mapping, index, flags);
@@ -312,6 +317,8 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping,
loff_t last_pos = pos + copied;
struct inode *inode = page->mapping->host;
+ p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
+
if (unlikely(copied < len)) {
/*
* zero out the rest of the area
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index a0df3e73c2b1..a16b0ff497ca 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -45,6 +45,7 @@
#include "cache.h"
static const struct vm_operations_struct v9fs_file_vm_ops;
+static const struct vm_operations_struct v9fs_mmap_file_vm_ops;
/**
* v9fs_file_open - open a file (or directory)
@@ -87,7 +88,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
file->private_data = fid;
mutex_lock(&v9inode->v_mutex);
- if (v9ses->cache && !v9inode->writeback_fid &&
+ if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
+ !v9inode->writeback_fid &&
((file->f_flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
@@ -105,7 +107,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
v9inode->writeback_fid = (void *) fid;
}
mutex_unlock(&v9inode->v_mutex);
- if (v9ses->cache)
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
v9fs_cache_inode_set_cookie(inode, file);
return 0;
out_error:
@@ -461,14 +463,12 @@ v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
int n;
loff_t i_size;
size_t total = 0;
- struct p9_client *clnt;
loff_t origin = *offset;
unsigned long pg_start, pg_end;
p9_debug(P9_DEBUG_VFS, "data %p count %d offset %x\n",
data, (int)count, (int)*offset);
- clnt = fid->clnt;
do {
n = p9_client_write(fid, NULL, data+total, origin+total, count);
if (n <= 0)
@@ -581,11 +581,12 @@ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
}
static int
-v9fs_file_mmap(struct file *file, struct vm_area_struct *vma)
+v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma)
{
int retval;
- retval = generic_file_mmap(file, vma);
+
+ retval = generic_file_mmap(filp, vma);
if (!retval)
vma->vm_ops = &v9fs_file_vm_ops;
@@ -593,6 +594,43 @@ v9fs_file_mmap(struct file *file, struct vm_area_struct *vma)
}
static int
+v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ int retval;
+ struct inode *inode;
+ struct v9fs_inode *v9inode;
+ struct p9_fid *fid;
+
+ inode = file_inode(filp);
+ v9inode = V9FS_I(inode);
+ mutex_lock(&v9inode->v_mutex);
+ if (!v9inode->writeback_fid &&
+ (vma->vm_flags & VM_WRITE)) {
+ /*
+ * clone a fid and add it to writeback_fid
+ * we do it during mmap instead of
+ * page dirty time via write_begin/page_mkwrite
+ * because we want write after unlink usecase
+ * to work.
+ */
+ fid = v9fs_writeback_fid(filp->f_path.dentry);
+ if (IS_ERR(fid)) {
+ retval = PTR_ERR(fid);
+ mutex_unlock(&v9inode->v_mutex);
+ return retval;
+ }
+ v9inode->writeback_fid = (void *) fid;
+ }
+ mutex_unlock(&v9inode->v_mutex);
+
+ retval = generic_file_mmap(filp, vma);
+ if (!retval)
+ vma->vm_ops = &v9fs_mmap_file_vm_ops;
+
+ return retval;
+}
+
+static int
v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct v9fs_inode *v9inode;
@@ -660,6 +698,22 @@ v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
return do_sync_read(filp, data, count, offset);
}
+/**
+ * v9fs_mmap_file_read - read from a file
+ * @filp: file pointer to read
+ * @udata: user data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+static ssize_t
+v9fs_mmap_file_read(struct file *filp, char __user *data, size_t count,
+ loff_t *offset)
+{
+ /* TODO: Check if there are dirty pages */
+ return v9fs_file_read(filp, data, count, offset);
+}
+
static ssize_t
v9fs_direct_write(struct file *filp, const char __user * data,
size_t count, loff_t *offsetp)
@@ -730,12 +784,65 @@ v9fs_cached_file_write(struct file *filp, const char __user * data,
return do_sync_write(filp, data, count, offset);
}
+
+/**
+ * v9fs_mmap_file_write - write to a file
+ * @filp: file pointer to write
+ * @data: data buffer to write data from
+ * @count: size of buffer
+ * @offset: offset at which to write data
+ *
+ */
+static ssize_t
+v9fs_mmap_file_write(struct file *filp, const char __user *data,
+ size_t count, loff_t *offset)
+{
+ /*
+ * TODO: invalidate mmaps on filp's inode between
+ * offset and offset+count
+ */
+ return v9fs_file_write(filp, data, count, offset);
+}
+
+static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
+{
+ struct inode *inode;
+
+ struct writeback_control wbc = {
+ .nr_to_write = LONG_MAX,
+ .sync_mode = WB_SYNC_ALL,
+ .range_start = vma->vm_pgoff * PAGE_SIZE,
+ /* absolute end, byte at end included */
+ .range_end = vma->vm_pgoff * PAGE_SIZE +
+ (vma->vm_end - vma->vm_start - 1),
+ };
+
+
+ p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma);
+
+ inode = file_inode(vma->vm_file);
+
+ if (!mapping_cap_writeback_dirty(inode->i_mapping))
+ wbc.nr_to_write = 0;
+
+ might_sleep();
+ sync_inode(inode, &wbc);
+}
+
+
static const struct vm_operations_struct v9fs_file_vm_ops = {
.fault = filemap_fault,
.page_mkwrite = v9fs_vm_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
+static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
+ .close = v9fs_mmap_vm_close,
+ .fault = filemap_fault,
+ .page_mkwrite = v9fs_vm_page_mkwrite,
+ .remap_pages = generic_file_remap_pages,
+};
+
const struct file_operations v9fs_cached_file_operations = {
.llseek = generic_file_llseek,
@@ -786,3 +893,26 @@ const struct file_operations v9fs_file_operations_dotl = {
.mmap = generic_file_readonly_mmap,
.fsync = v9fs_file_fsync_dotl,
};
+
+const struct file_operations v9fs_mmap_file_operations = {
+ .llseek = generic_file_llseek,
+ .read = v9fs_mmap_file_read,
+ .write = v9fs_mmap_file_write,
+ .open = v9fs_file_open,
+ .release = v9fs_dir_release,
+ .lock = v9fs_file_lock,
+ .mmap = v9fs_mmap_file_mmap,
+ .fsync = v9fs_file_fsync,
+};
+
+const struct file_operations v9fs_mmap_file_operations_dotl = {
+ .llseek = generic_file_llseek,
+ .read = v9fs_mmap_file_read,
+ .write = v9fs_mmap_file_write,
+ .open = v9fs_file_open,
+ .release = v9fs_dir_release,
+ .lock = v9fs_file_lock_dotl,
+ .flock = v9fs_file_flock_dotl,
+ .mmap = v9fs_mmap_file_mmap,
+ .fsync = v9fs_file_fsync_dotl,
+};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 4e65aa903345..bb7991c7e5c7 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -299,15 +299,22 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
case S_IFREG:
if (v9fs_proto_dotl(v9ses)) {
inode->i_op = &v9fs_file_inode_operations_dotl;
- if (v9ses->cache)
+ if (v9ses->cache == CACHE_LOOSE ||
+ v9ses->cache == CACHE_FSCACHE)
inode->i_fop =
&v9fs_cached_file_operations_dotl;
+ else if (v9ses->cache == CACHE_MMAP)
+ inode->i_fop = &v9fs_mmap_file_operations_dotl;
else
inode->i_fop = &v9fs_file_operations_dotl;
} else {
inode->i_op = &v9fs_file_inode_operations;
- if (v9ses->cache)
- inode->i_fop = &v9fs_cached_file_operations;
+ if (v9ses->cache == CACHE_LOOSE ||
+ v9ses->cache == CACHE_FSCACHE)
+ inode->i_fop =
+ &v9fs_cached_file_operations;
+ else if (v9ses->cache == CACHE_MMAP)
+ inode->i_fop = &v9fs_mmap_file_operations;
else
inode->i_fop = &v9fs_file_operations;
}
@@ -779,7 +786,6 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct dentry *res;
- struct super_block *sb;
struct v9fs_session_info *v9ses;
struct p9_fid *dfid, *fid;
struct inode *inode;
@@ -791,7 +797,6 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
- sb = dir->i_sb;
v9ses = v9fs_inode2v9ses(dir);
/* We can walk d_parent because we hold the dir->i_mutex */
dfid = v9fs_fid_lookup(dentry->d_parent);
@@ -812,7 +817,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
* unlink. For cached mode create calls request for new
* inode. But with cache disabled, lookup should do this.
*/
- if (v9ses->cache)
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
else
inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
@@ -863,7 +868,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
return finish_no_open(file, res);
err = 0;
- fid = NULL;
+
v9ses = v9fs_inode2v9ses(dir);
perm = unixmode2p9mode(v9ses, mode);
fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
@@ -878,7 +883,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
v9fs_invalidate_inode_attr(dir);
v9inode = V9FS_I(dentry->d_inode);
mutex_lock(&v9inode->v_mutex);
- if (v9ses->cache && !v9inode->writeback_fid &&
+ if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
+ !v9inode->writeback_fid &&
((flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
@@ -901,7 +907,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
goto error;
file->private_data = fid;
- if (v9ses->cache)
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
v9fs_cache_inode_set_cookie(dentry->d_inode, file);
*opened |= FILE_CREATED;
@@ -1479,7 +1485,7 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
*/
i_size = inode->i_size;
v9fs_stat2inode(st, inode, inode->i_sb);
- if (v9ses->cache)
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
inode->i_size = i_size;
spin_unlock(&inode->i_lock);
out:
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 4c10edec26a0..59dc8e87647f 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -330,7 +330,8 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
v9inode = V9FS_I(inode);
mutex_lock(&v9inode->v_mutex);
- if (v9ses->cache && !v9inode->writeback_fid &&
+ if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
+ !v9inode->writeback_fid &&
((flags & O_ACCMODE) != O_RDONLY)) {
/*
* clone a fid and add it to writeback_fid
@@ -353,7 +354,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
if (err)
goto err_clunk_old_fid;
file->private_data = ofid;
- if (v9ses->cache)
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
v9fs_cache_inode_set_cookie(inode, file);
*opened |= FILE_CREATED;
out:
@@ -473,13 +474,11 @@ static int
v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
- int err;
struct v9fs_session_info *v9ses;
struct p9_fid *fid;
struct p9_stat_dotl *st;
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
- err = -EPERM;
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
generic_fillattr(dentry->d_inode, stat);
@@ -556,7 +555,6 @@ static int v9fs_mapped_iattr_valid(int iattr_valid)
int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
{
int retval;
- struct v9fs_session_info *v9ses;
struct p9_fid *fid;
struct p9_iattr_dotl p9attr;
struct inode *inode = dentry->d_inode;
@@ -577,8 +575,6 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
p9attr.mtime_sec = iattr->ia_mtime.tv_sec;
p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
- retval = -EPERM;
- v9ses = v9fs_dentry2v9ses(dentry);
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
return PTR_ERR(fid);
@@ -715,7 +711,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
}
v9fs_invalidate_inode_attr(dir);
- if (v9ses->cache) {
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
/* Now walk from the parent so we can get an unopened fid. */
fid = p9_client_walk(dfid, 1, &name, 1);
if (IS_ERR(fid)) {
@@ -768,7 +764,6 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
int err;
- char *name;
struct dentry *dir_dentry;
struct p9_fid *dfid, *oldfid;
struct v9fs_session_info *v9ses;
@@ -786,8 +781,6 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
if (IS_ERR(oldfid))
return PTR_ERR(oldfid);
- name = (char *) dentry->d_name.name;
-
err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
if (err < 0) {
@@ -973,7 +966,7 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
*/
i_size = inode->i_size;
v9fs_stat2inode_dotl(st, inode);
- if (v9ses->cache)
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
inode->i_size = i_size;
spin_unlock(&inode->i_lock);
out:
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 2756dcd5de6e..0afd0382822b 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -144,7 +144,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
}
v9fs_fill_super(sb, v9ses, flags, data);
- if (v9ses->cache)
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
sb->s_d_op = &v9fs_cached_dentry_operations;
else
sb->s_d_op = &v9fs_dentry_operations;
@@ -282,7 +282,7 @@ static int v9fs_drop_inode(struct inode *inode)
{
struct v9fs_session_info *v9ses;
v9ses = v9fs_inode2v9ses(inode);
- if (v9ses->cache)
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
return generic_drop_inode(inode);
/*
* in case of non cached mode always drop the
@@ -325,10 +325,12 @@ static int v9fs_write_inode_dotl(struct inode *inode,
* send an fsync request to server irrespective of
* wbc->sync_mode.
*/
- p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
v9inode = V9FS_I(inode);
+ p9_debug(P9_DEBUG_VFS, "%s: inode %p, writeback_fid %p\n",
+ __func__, inode, v9inode->writeback_fid);
if (!v9inode->writeback_fid)
return 0;
+
ret = p9_client_fsync(v9inode->writeback_fid, 0);
if (ret < 0) {
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index 3c28cdfb8c47..04133a1fd9cb 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -138,8 +138,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
if (retval < 0) {
p9_debug(P9_DEBUG_VFS, "p9_client_xattrcreate failed %d\n",
retval);
- p9_client_clunk(fid);
- return retval;
+ goto err;
}
msize = fid->clnt->msize;
while (value_len) {
@@ -152,12 +151,15 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
if (write_count < 0) {
/* error in xattr write */
retval = write_count;
- break;
+ goto err;
}
offset += write_count;
value_len -= write_count;
}
- return p9_client_clunk(fid);
+ retval = offset;
+err:
+ p9_client_clunk(fid);
+ return retval;
}
ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
diff --git a/fs/Kconfig b/fs/Kconfig
index c229f828eb01..7385e54be4b9 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -68,10 +68,6 @@ source "fs/quota/Kconfig"
source "fs/autofs4/Kconfig"
source "fs/fuse/Kconfig"
-config GENERIC_ACL
- bool
- select FS_POSIX_ACL
-
menu "Caches"
source "fs/fscache/Kconfig"
@@ -119,7 +115,7 @@ config TMPFS_POSIX_ACL
bool "Tmpfs POSIX Access Control Lists"
depends on TMPFS
select TMPFS_XATTR
- select GENERIC_ACL
+ select FS_POSIX_ACL
help
POSIX Access Control Lists (ACLs) support additional access rights
for users and groups beyond the standard owner/group/world scheme,
diff --git a/fs/Makefile b/fs/Makefile
index 4fe6df3ec28f..47ac07bb4acc 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -42,9 +42,8 @@ obj-$(CONFIG_BINFMT_SOM) += binfmt_som.o
obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o
obj-$(CONFIG_FS_MBCACHE) += mbcache.o
-obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
+obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
-obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
obj-$(CONFIG_COREDUMP) += coredump.o
obj-$(CONFIG_SYSCTL) += drop_caches.o
@@ -53,7 +52,7 @@ obj-$(CONFIG_FHANDLE) += fhandle.o
obj-y += quota/
obj-$(CONFIG_PROC_FS) += proc/
-obj-$(CONFIG_SYSFS) += sysfs/
+obj-$(CONFIG_SYSFS) += sysfs/ kernfs/
obj-$(CONFIG_CONFIGFS_FS) += configfs/
obj-y += devpts/
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 45161a832bbc..d098731b82ff 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -49,11 +49,6 @@ affs_put_super(struct super_block *sb)
pr_debug("AFFS: put_super()\n");
cancel_delayed_work_sync(&sbi->sb_work);
- kfree(sbi->s_prefix);
- affs_free_bitmap(sb);
- affs_brelse(sbi->s_root_bh);
- kfree(sbi);
- sb->s_fs_info = NULL;
}
static int
@@ -316,7 +311,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
unsigned long mount_flags;
int tmp_flags; /* fix remount prototype... */
u8 sig[4];
- int ret = -EINVAL;
+ int ret;
save_mount_options(sb, data);
@@ -412,17 +407,19 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
if (!silent)
printk(KERN_ERR "AFFS: No valid root block on device %s\n",
sb->s_id);
- goto out_error;
+ return -EINVAL;
/* N.B. after this point bh must be released */
got_root:
+ /* Keep super block in cache */
+ sbi->s_root_bh = root_bh;
root_block = sbi->s_root_block;
/* Find out which kind of FS we have */
boot_bh = sb_bread(sb, 0);
if (!boot_bh) {
printk(KERN_ERR "AFFS: Cannot read boot block\n");
- goto out_error;
+ return -EINVAL;
}
memcpy(sig, boot_bh->b_data, 4);
brelse(boot_bh);
@@ -471,7 +468,7 @@ got_root:
default:
printk(KERN_ERR "AFFS: Unknown filesystem on device %s: %08X\n",
sb->s_id, chksum);
- goto out_error;
+ return -EINVAL;
}
if (mount_flags & SF_VERBOSE) {
@@ -488,22 +485,17 @@ got_root:
if (sbi->s_flags & SF_OFS)
sbi->s_data_blksize -= 24;
- /* Keep super block in cache */
- sbi->s_root_bh = root_bh;
- /* N.B. after this point s_root_bh must be released */
-
tmp_flags = sb->s_flags;
- if (affs_init_bitmap(sb, &tmp_flags))
- goto out_error;
+ ret = affs_init_bitmap(sb, &tmp_flags);
+ if (ret)
+ return ret;
sb->s_flags = tmp_flags;
/* set up enough so that it can read an inode */
root_inode = affs_iget(sb, root_block);
- if (IS_ERR(root_inode)) {
- ret = PTR_ERR(root_inode);
- goto out_error;
- }
+ if (IS_ERR(root_inode))
+ return PTR_ERR(root_inode);
if (AFFS_SB(sb)->s_flags & SF_INTL)
sb->s_d_op = &affs_intl_dentry_operations;
@@ -513,22 +505,11 @@ got_root:
sb->s_root = d_make_root(root_inode);
if (!sb->s_root) {
printk(KERN_ERR "AFFS: Get root inode failed\n");
- goto out_error;
+ return -ENOMEM;
}
pr_debug("AFFS: s_flags=%lX\n",sb->s_flags);
return 0;
-
- /*
- * Begin the cascaded cleanup ...
- */
-out_error:
- kfree(sbi->s_bitmap);
- affs_brelse(root_bh);
- kfree(sbi->s_prefix);
- kfree(sbi);
- sb->s_fs_info = NULL;
- return ret;
}
static int
@@ -615,11 +596,23 @@ static struct dentry *affs_mount(struct file_system_type *fs_type,
return mount_bdev(fs_type, flags, dev_name, data, affs_fill_super);
}
+static void affs_kill_sb(struct super_block *sb)
+{
+ struct affs_sb_info *sbi = AFFS_SB(sb);
+ kill_block_super(sb);
+ if (sbi) {
+ affs_free_bitmap(sb);
+ affs_brelse(sbi->s_root_bh);
+ kfree(sbi->s_prefix);
+ kfree(sbi);
+ }
+}
+
static struct file_system_type affs_fs_type = {
.owner = THIS_MODULE,
.name = "affs",
.mount = affs_mount,
- .kill_sb = kill_block_super,
+ .kill_sb = affs_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
};
MODULE_ALIAS_FS("affs");
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index a306bb6d88d9..6621f8008122 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -195,7 +195,6 @@ struct afs_cell {
struct list_head link; /* main cell list link */
struct key *anonymous_key; /* anonymous user key for this cell */
struct list_head proc_link; /* /proc cell list link */
- struct proc_dir_entry *proc_dir; /* /proc dir for this cell */
#ifdef CONFIG_AFS_FSCACHE
struct fscache_cookie *cache; /* caching cookie */
#endif
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 526e4bbbde59..24a905b076fd 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -41,11 +41,8 @@ static const struct file_operations afs_proc_cells_fops = {
.write = afs_proc_cells_write,
.llseek = seq_lseek,
.release = seq_release,
- .owner = THIS_MODULE,
};
-static int afs_proc_rootcell_open(struct inode *inode, struct file *file);
-static int afs_proc_rootcell_release(struct inode *inode, struct file *file);
static ssize_t afs_proc_rootcell_read(struct file *file, char __user *buf,
size_t size, loff_t *_pos);
static ssize_t afs_proc_rootcell_write(struct file *file,
@@ -53,17 +50,12 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
size_t size, loff_t *_pos);
static const struct file_operations afs_proc_rootcell_fops = {
- .open = afs_proc_rootcell_open,
.read = afs_proc_rootcell_read,
.write = afs_proc_rootcell_write,
.llseek = no_llseek,
- .release = afs_proc_rootcell_release,
- .owner = THIS_MODULE,
};
static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file);
-static int afs_proc_cell_volumes_release(struct inode *inode,
- struct file *file);
static void *afs_proc_cell_volumes_start(struct seq_file *p, loff_t *pos);
static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
loff_t *pos);
@@ -81,14 +73,11 @@ static const struct file_operations afs_proc_cell_volumes_fops = {
.open = afs_proc_cell_volumes_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = afs_proc_cell_volumes_release,
- .owner = THIS_MODULE,
+ .release = seq_release,
};
static int afs_proc_cell_vlservers_open(struct inode *inode,
struct file *file);
-static int afs_proc_cell_vlservers_release(struct inode *inode,
- struct file *file);
static void *afs_proc_cell_vlservers_start(struct seq_file *p, loff_t *pos);
static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
loff_t *pos);
@@ -106,13 +95,10 @@ static const struct file_operations afs_proc_cell_vlservers_fops = {
.open = afs_proc_cell_vlservers_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = afs_proc_cell_vlservers_release,
- .owner = THIS_MODULE,
+ .release = seq_release,
};
static int afs_proc_cell_servers_open(struct inode *inode, struct file *file);
-static int afs_proc_cell_servers_release(struct inode *inode,
- struct file *file);
static void *afs_proc_cell_servers_start(struct seq_file *p, loff_t *pos);
static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
loff_t *pos);
@@ -130,8 +116,7 @@ static const struct file_operations afs_proc_cell_servers_fops = {
.open = afs_proc_cell_servers_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = afs_proc_cell_servers_release,
- .owner = THIS_MODULE,
+ .release = seq_release,
};
/*
@@ -139,29 +124,21 @@ static const struct file_operations afs_proc_cell_servers_fops = {
*/
int afs_proc_init(void)
{
- struct proc_dir_entry *p;
-
_enter("");
proc_afs = proc_mkdir("fs/afs", NULL);
if (!proc_afs)
goto error_dir;
- p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops);
- if (!p)
- goto error_cells;
-
- p = proc_create("rootcell", 0, proc_afs, &afs_proc_rootcell_fops);
- if (!p)
- goto error_rootcell;
+ if (!proc_create("cells", 0644, proc_afs, &afs_proc_cells_fops) ||
+ !proc_create("rootcell", 0644, proc_afs, &afs_proc_rootcell_fops))
+ goto error_tree;
_leave(" = 0");
return 0;
-error_rootcell:
- remove_proc_entry("cells", proc_afs);
-error_cells:
- remove_proc_entry("fs/afs", NULL);
+error_tree:
+ remove_proc_subtree("fs/afs", NULL);
error_dir:
_leave(" = -ENOMEM");
return -ENOMEM;
@@ -172,9 +149,7 @@ error_dir:
*/
void afs_proc_cleanup(void)
{
- remove_proc_entry("rootcell", proc_afs);
- remove_proc_entry("cells", proc_afs);
- remove_proc_entry("fs/afs", NULL);
+ remove_proc_subtree("fs/afs", NULL);
}
/*
@@ -319,19 +294,6 @@ inval:
goto done;
}
-/*
- * Stubs for /proc/fs/afs/rootcell
- */
-static int afs_proc_rootcell_open(struct inode *inode, struct file *file)
-{
- return 0;
-}
-
-static int afs_proc_rootcell_release(struct inode *inode, struct file *file)
-{
- return 0;
-}
-
static ssize_t afs_proc_rootcell_read(struct file *file, char __user *buf,
size_t size, loff_t *_pos)
{
@@ -387,38 +349,27 @@ nomem:
*/
int afs_proc_cell_setup(struct afs_cell *cell)
{
- struct proc_dir_entry *p;
+ struct proc_dir_entry *dir;
_enter("%p{%s}", cell, cell->name);
- cell->proc_dir = proc_mkdir(cell->name, proc_afs);
- if (!cell->proc_dir)
+ dir = proc_mkdir(cell->name, proc_afs);
+ if (!dir)
goto error_dir;
- p = proc_create_data("servers", 0, cell->proc_dir,
- &afs_proc_cell_servers_fops, cell);
- if (!p)
- goto error_servers;
-
- p = proc_create_data("vlservers", 0, cell->proc_dir,
- &afs_proc_cell_vlservers_fops, cell);
- if (!p)
- goto error_vlservers;
-
- p = proc_create_data("volumes", 0, cell->proc_dir,
- &afs_proc_cell_volumes_fops, cell);
- if (!p)
- goto error_volumes;
+ if (!proc_create_data("servers", 0, dir,
+ &afs_proc_cell_servers_fops, cell) ||
+ !proc_create_data("vlservers", 0, dir,
+ &afs_proc_cell_vlservers_fops, cell) ||
+ !proc_create_data("volumes", 0, dir,
+ &afs_proc_cell_volumes_fops, cell))
+ goto error_tree;
_leave(" = 0");
return 0;
-error_volumes:
- remove_proc_entry("vlservers", cell->proc_dir);
-error_vlservers:
- remove_proc_entry("servers", cell->proc_dir);
-error_servers:
- remove_proc_entry(cell->name, proc_afs);
+error_tree:
+ remove_proc_subtree(cell->name, proc_afs);
error_dir:
_leave(" = -ENOMEM");
return -ENOMEM;
@@ -431,10 +382,7 @@ void afs_proc_cell_remove(struct afs_cell *cell)
{
_enter("");
- remove_proc_entry("volumes", cell->proc_dir);
- remove_proc_entry("vlservers", cell->proc_dir);
- remove_proc_entry("servers", cell->proc_dir);
- remove_proc_entry(cell->name, proc_afs);
+ remove_proc_subtree(cell->name, proc_afs);
_leave("");
}
@@ -463,14 +411,6 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
}
/*
- * close the file and release the ref to the cell
- */
-static int afs_proc_cell_volumes_release(struct inode *inode, struct file *file)
-{
- return seq_release(inode, file);
-}
-
-/*
* set up the iterator to start reading from the cells list and return the
* first item
*/
@@ -569,15 +509,6 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
}
/*
- * close the file and release the ref to the cell
- */
-static int afs_proc_cell_vlservers_release(struct inode *inode,
- struct file *file)
-{
- return seq_release(inode, file);
-}
-
-/*
* set up the iterator to start reading from the cells list and return the
* first item
*/
@@ -673,15 +604,6 @@ static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
}
/*
- * close the file and release the ref to the cell
- */
-static int afs_proc_cell_servers_release(struct inode *inode,
- struct file *file)
-{
- return seq_release(inode, file);
-}
-
-/*
* set up the iterator to start reading from the cells list and return the
* first item
*/
diff --git a/fs/aio.c b/fs/aio.c
index 6efb7f6cb22e..062a5f6a1448 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -244,9 +244,14 @@ static void aio_free_ring(struct kioctx *ctx)
int i;
for (i = 0; i < ctx->nr_pages; i++) {
+ struct page *page;
pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
page_count(ctx->ring_pages[i]));
- put_page(ctx->ring_pages[i]);
+ page = ctx->ring_pages[i];
+ if (!page)
+ continue;
+ ctx->ring_pages[i] = NULL;
+ put_page(page);
}
put_aio_ring_file(ctx);
@@ -280,18 +285,38 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
unsigned long flags;
int rc;
+ rc = 0;
+
+ /* Make sure the old page hasn't already been changed */
+ spin_lock(&mapping->private_lock);
+ ctx = mapping->private_data;
+ if (ctx) {
+ pgoff_t idx;
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ idx = old->index;
+ if (idx < (pgoff_t)ctx->nr_pages) {
+ if (ctx->ring_pages[idx] != old)
+ rc = -EAGAIN;
+ } else
+ rc = -EINVAL;
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ } else
+ rc = -EINVAL;
+ spin_unlock(&mapping->private_lock);
+
+ if (rc != 0)
+ return rc;
+
/* Writeback must be complete */
BUG_ON(PageWriteback(old));
- put_page(old);
+ get_page(new);
- rc = migrate_page_move_mapping(mapping, new, old, NULL, mode);
+ rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
if (rc != MIGRATEPAGE_SUCCESS) {
- get_page(old);
+ put_page(new);
return rc;
}
- get_page(new);
-
/* We can potentially race against kioctx teardown here. Use the
* address_space's private data lock to protect the mapping's
* private_data.
@@ -303,13 +328,24 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
spin_lock_irqsave(&ctx->completion_lock, flags);
migrate_page_copy(new, old);
idx = old->index;
- if (idx < (pgoff_t)ctx->nr_pages)
- ctx->ring_pages[idx] = new;
+ if (idx < (pgoff_t)ctx->nr_pages) {
+ /* And only do the move if things haven't changed */
+ if (ctx->ring_pages[idx] == old)
+ ctx->ring_pages[idx] = new;
+ else
+ rc = -EAGAIN;
+ } else
+ rc = -EINVAL;
spin_unlock_irqrestore(&ctx->completion_lock, flags);
} else
rc = -EBUSY;
spin_unlock(&mapping->private_lock);
+ if (rc == MIGRATEPAGE_SUCCESS)
+ put_page(old);
+ else
+ put_page(new);
+
return rc;
}
#endif
@@ -326,7 +362,7 @@ static int aio_setup_ring(struct kioctx *ctx)
struct aio_ring *ring;
unsigned nr_events = ctx->max_reqs;
struct mm_struct *mm = current->mm;
- unsigned long size, populate;
+ unsigned long size, unused;
int nr_pages;
int i;
struct file *file;
@@ -347,6 +383,20 @@ static int aio_setup_ring(struct kioctx *ctx)
return -EAGAIN;
}
+ ctx->aio_ring_file = file;
+ nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
+ / sizeof(struct io_event);
+
+ ctx->ring_pages = ctx->internal_pages;
+ if (nr_pages > AIO_RING_PAGES) {
+ ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
+ GFP_KERNEL);
+ if (!ctx->ring_pages) {
+ put_aio_ring_file(ctx);
+ return -ENOMEM;
+ }
+ }
+
for (i = 0; i < nr_pages; i++) {
struct page *page;
page = find_or_create_page(file->f_inode->i_mapping,
@@ -358,19 +408,14 @@ static int aio_setup_ring(struct kioctx *ctx)
SetPageUptodate(page);
SetPageDirty(page);
unlock_page(page);
+
+ ctx->ring_pages[i] = page;
}
- ctx->aio_ring_file = file;
- nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
- / sizeof(struct io_event);
+ ctx->nr_pages = i;
- ctx->ring_pages = ctx->internal_pages;
- if (nr_pages > AIO_RING_PAGES) {
- ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
- GFP_KERNEL);
- if (!ctx->ring_pages) {
- put_aio_ring_file(ctx);
- return -ENOMEM;
- }
+ if (unlikely(i != nr_pages)) {
+ aio_free_ring(ctx);
+ return -EAGAIN;
}
ctx->mmap_size = nr_pages * PAGE_SIZE;
@@ -379,9 +424,9 @@ static int aio_setup_ring(struct kioctx *ctx)
down_write(&mm->mmap_sem);
ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, 0, &populate);
+ MAP_SHARED, 0, &unused);
+ up_write(&mm->mmap_sem);
if (IS_ERR((void *)ctx->mmap_base)) {
- up_write(&mm->mmap_sem);
ctx->mmap_size = 0;
aio_free_ring(ctx);
return -EAGAIN;
@@ -389,27 +434,6 @@ static int aio_setup_ring(struct kioctx *ctx)
pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
- /* We must do this while still holding mmap_sem for write, as we
- * need to be protected against userspace attempting to mremap()
- * or munmap() the ring buffer.
- */
- ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
- 1, 0, ctx->ring_pages, NULL);
-
- /* Dropping the reference here is safe as the page cache will hold
- * onto the pages for us. It is also required so that page migration
- * can unmap the pages and get the right reference count.
- */
- for (i = 0; i < ctx->nr_pages; i++)
- put_page(ctx->ring_pages[i]);
-
- up_write(&mm->mmap_sem);
-
- if (unlikely(ctx->nr_pages != nr_pages)) {
- aio_free_ring(ctx);
- return -EAGAIN;
- }
-
ctx->user_id = ctx->mmap_base;
ctx->nr_events = nr_events; /* trusted copy */
@@ -652,7 +676,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
aio_nr += ctx->max_reqs;
spin_unlock(&aio_nr_lock);
- percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */
+ percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */
+ percpu_ref_get(&ctx->reqs); /* free_ioctx_users() will drop this */
err = ioctx_add_table(ctx, mm);
if (err)
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 24084732b1d0..80ef38c73e5a 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -41,19 +41,8 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- struct dentry *root;
- root = mount_pseudo(fs_type, "anon_inode:", NULL,
+ return mount_pseudo(fs_type, "anon_inode:", NULL,
&anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
- if (!IS_ERR(root)) {
- struct super_block *s = root->d_sb;
- anon_inode_inode = alloc_anon_inode(s);
- if (IS_ERR(anon_inode_inode)) {
- dput(root);
- deactivate_locked_super(s);
- root = ERR_CAST(anon_inode_inode);
- }
- }
- return root;
}
static struct file_system_type anon_inode_fs_type = {
@@ -175,22 +164,15 @@ EXPORT_SYMBOL_GPL(anon_inode_getfd);
static int __init anon_inode_init(void)
{
- int error;
-
- error = register_filesystem(&anon_inode_fs_type);
- if (error)
- goto err_exit;
anon_inode_mnt = kern_mount(&anon_inode_fs_type);
- if (IS_ERR(anon_inode_mnt)) {
- error = PTR_ERR(anon_inode_mnt);
- goto err_unregister_filesystem;
- }
- return 0;
+ if (IS_ERR(anon_inode_mnt))
+ panic("anon_inode_init() kernel mount failed (%ld)\n", PTR_ERR(anon_inode_mnt));
-err_unregister_filesystem:
- unregister_filesystem(&anon_inode_fs_type);
-err_exit:
- panic(KERN_ERR "anon_inode_init() failed (%d)\n", error);
+ anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
+ if (IS_ERR(anon_inode_inode))
+ panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode));
+
+ return 0;
}
fs_initcall(anon_inode_init);
diff --git a/fs/attr.c b/fs/attr.c
index 267968d94673..5d4e59d56e85 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -202,11 +202,6 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
return -EPERM;
}
- if ((ia_valid & ATTR_SIZE) && IS_I_VERSION(inode)) {
- if (attr->ia_size != inode->i_size)
- inode_inc_iversion(inode);
- }
-
if ((ia_valid & ATTR_MODE)) {
umode_t amode = attr->ia_mode;
/* Flag setting protected by i_mutex */
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 4218e26df916..acf32054edd8 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -104,7 +104,7 @@ struct autofs_sb_info {
u32 magic;
int pipefd;
struct file *pipe;
- pid_t oz_pgrp;
+ struct pid *oz_pgrp;
int catatonic;
int version;
int sub_version;
@@ -140,7 +140,7 @@ static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry)
filesystem without "magic".) */
static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
- return sbi->catatonic || task_pgrp_nr(current) == sbi->oz_pgrp;
+ return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
}
/* Does a dentry have some pending activity? */
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 1818ce7f5a06..3182c0e68b42 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -346,6 +346,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
{
int pipefd;
int err = 0;
+ struct pid *new_pid = NULL;
if (param->setpipefd.pipefd == -1)
return -EINVAL;
@@ -357,7 +358,17 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
mutex_unlock(&sbi->wq_mutex);
return -EBUSY;
} else {
- struct file *pipe = fget(pipefd);
+ struct file *pipe;
+
+ new_pid = get_task_pid(current, PIDTYPE_PGID);
+
+ if (ns_of_pid(new_pid) != ns_of_pid(sbi->oz_pgrp)) {
+ AUTOFS_WARN("Not allowed to change PID namespace");
+ err = -EINVAL;
+ goto out;
+ }
+
+ pipe = fget(pipefd);
if (!pipe) {
err = -EBADF;
goto out;
@@ -367,12 +378,13 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
fput(pipe);
goto out;
}
- sbi->oz_pgrp = task_pgrp_nr(current);
+ swap(sbi->oz_pgrp, new_pid);
sbi->pipefd = pipefd;
sbi->pipe = pipe;
sbi->catatonic = 0;
}
out:
+ put_pid(new_pid);
mutex_unlock(&sbi->wq_mutex);
return err;
}
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 3d9d3f5d5dda..394e90b02c5e 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -402,6 +402,20 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
goto next;
}
+ if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)) {
+ DPRINTK("checking symlink %p %.*s",
+ dentry, (int)dentry->d_name.len, dentry->d_name.name);
+ /*
+ * A symlink can't be "busy" in the usual sense so
+ * just check last used for expire timeout.
+ */
+ if (autofs4_can_expire(dentry, timeout, do_now)) {
+ expired = dentry;
+ goto found;
+ }
+ goto next;
+ }
+
if (simple_empty(dentry))
goto next;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 3b9cc9b973c2..d7bd395ab586 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -56,8 +56,11 @@ void autofs4_kill_sb(struct super_block *sb)
* just call kill_anon_super when we are called from
* deactivate_super.
*/
- if (sbi) /* Free wait queues, close pipe */
+ if (sbi) {
+ /* Free wait queues, close pipe */
autofs4_catatonic_mode(sbi);
+ put_pid(sbi->oz_pgrp);
+ }
DPRINTK("shutting down");
kill_litter_super(sb);
@@ -80,7 +83,7 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root)
if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID))
seq_printf(m, ",gid=%u",
from_kgid_munged(&init_user_ns, root_inode->i_gid));
- seq_printf(m, ",pgrp=%d", sbi->oz_pgrp);
+ seq_printf(m, ",pgrp=%d", pid_vnr(sbi->oz_pgrp));
seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ);
seq_printf(m, ",minproto=%d", sbi->min_proto);
seq_printf(m, ",maxproto=%d", sbi->max_proto);
@@ -124,7 +127,8 @@ static const match_table_t tokens = {
};
static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
- pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto)
+ int *pgrp, bool *pgrp_set, unsigned int *type,
+ int *minproto, int *maxproto)
{
char *p;
substring_t args[MAX_OPT_ARGS];
@@ -132,7 +136,6 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
*uid = current_uid();
*gid = current_gid();
- *pgrp = task_pgrp_nr(current);
*minproto = AUTOFS_MIN_PROTO_VERSION;
*maxproto = AUTOFS_MAX_PROTO_VERSION;
@@ -171,6 +174,7 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
if (match_int(args, &option))
return 1;
*pgrp = option;
+ *pgrp_set = true;
break;
case Opt_minproto:
if (match_int(args, &option))
@@ -206,10 +210,13 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
int pipefd;
struct autofs_sb_info *sbi;
struct autofs_info *ino;
+ int pgrp;
+ bool pgrp_set = false;
+ int ret = -EINVAL;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
- goto fail_unlock;
+ return -ENOMEM;
DPRINTK("starting up, sbi = %p",sbi);
s->s_fs_info = sbi;
@@ -218,7 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
sbi->pipe = NULL;
sbi->catatonic = 1;
sbi->exp_timeout = 0;
- sbi->oz_pgrp = task_pgrp_nr(current);
+ sbi->oz_pgrp = NULL;
sbi->sb = s;
sbi->version = 0;
sbi->sub_version = 0;
@@ -243,8 +250,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
* Get the root inode and dentry, but defer checking for errors.
*/
ino = autofs4_new_ino(sbi);
- if (!ino)
+ if (!ino) {
+ ret = -ENOMEM;
goto fail_free;
+ }
root_inode = autofs4_get_inode(s, S_IFDIR | 0755);
root = d_make_root(root_inode);
if (!root)
@@ -255,12 +264,23 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
/* Can this call block? */
if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid,
- &sbi->oz_pgrp, &sbi->type, &sbi->min_proto,
- &sbi->max_proto)) {
+ &pgrp, &pgrp_set, &sbi->type, &sbi->min_proto,
+ &sbi->max_proto)) {
printk("autofs: called with bogus options\n");
goto fail_dput;
}
+ if (pgrp_set) {
+ sbi->oz_pgrp = find_get_pid(pgrp);
+ if (!sbi->oz_pgrp) {
+ pr_warn("autofs: could not find process group %d\n",
+ pgrp);
+ goto fail_dput;
+ }
+ } else {
+ sbi->oz_pgrp = get_task_pid(current, PIDTYPE_PGID);
+ }
+
if (autofs_type_trigger(sbi->type))
__managed_dentry_set_managed(root);
@@ -284,14 +304,15 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
sbi->version = sbi->max_proto;
sbi->sub_version = AUTOFS_PROTO_SUBVERSION;
- DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp);
+ DPRINTK("pipe fd = %d, pgrp = %u", pipefd, pid_nr(sbi->oz_pgrp));
pipe = fget(pipefd);
-
+
if (!pipe) {
printk("autofs: could not open pipe file descriptor\n");
goto fail_dput;
}
- if (autofs_prepare_pipe(pipe) < 0)
+ ret = autofs_prepare_pipe(pipe);
+ if (ret < 0)
goto fail_fput;
sbi->pipe = pipe;
sbi->pipefd = pipefd;
@@ -316,10 +337,10 @@ fail_dput:
fail_ino:
kfree(ino);
fail_free:
+ put_pid(sbi->oz_pgrp);
kfree(sbi);
s->s_fs_info = NULL;
-fail_unlock:
- return -EINVAL;
+ return ret;
}
struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode)
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 92ef341ba0cf..2caf36ac3e93 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -558,7 +558,7 @@ static int autofs4_dir_symlink(struct inode *dir,
dget(dentry);
atomic_inc(&ino->count);
p_ino = autofs4_dentry_ino(dentry->d_parent);
- if (p_ino && dentry->d_parent != dentry)
+ if (p_ino && !IS_ROOT(dentry))
atomic_inc(&p_ino->count);
dir->i_mtime = CURRENT_TIME;
@@ -593,7 +593,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
if (atomic_dec_and_test(&ino->count)) {
p_ino = autofs4_dentry_ino(dentry->d_parent);
- if (p_ino && dentry->d_parent != dentry)
+ if (p_ino && !IS_ROOT(dentry))
atomic_dec(&p_ino->count);
}
dput(ino->dentry);
@@ -732,7 +732,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m
dget(dentry);
atomic_inc(&ino->count);
p_ino = autofs4_dentry_ino(dentry->d_parent);
- if (p_ino && dentry->d_parent != dentry)
+ if (p_ino && !IS_ROOT(dentry))
atomic_inc(&p_ino->count);
inc_nlink(dir);
dir->i_mtime = CURRENT_TIME;
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index f27c094a1919..1e8ea192be2b 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -14,6 +14,10 @@
static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
{
+ struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+ struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ if (ino && !autofs4_oz_mode(sbi))
+ ino->last_used = jiffies;
nd_set_link(nd, dentry->d_inode->i_private);
return NULL;
}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 689e40d983ad..116fd38ee472 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -347,11 +347,23 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
struct qstr qstr;
char *name;
int status, ret, type;
+ pid_t pid;
+ pid_t tgid;
/* In catatonic mode, we don't wait for nobody */
if (sbi->catatonic)
return -ENOENT;
+ /*
+ * Try translating pids to the namespace of the daemon.
+ *
+ * Zero means failure: we are in an unrelated pid namespace.
+ */
+ pid = task_pid_nr_ns(current, ns_of_pid(sbi->oz_pgrp));
+ tgid = task_tgid_nr_ns(current, ns_of_pid(sbi->oz_pgrp));
+ if (pid == 0 || tgid == 0)
+ return -ENOENT;
+
if (!dentry->d_inode) {
/*
* A wait for a negative dentry is invalid for certain
@@ -417,8 +429,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
wq->ino = autofs4_get_ino(sbi);
wq->uid = current_uid();
wq->gid = current_gid();
- wq->pid = current->pid;
- wq->tgid = current->tgid;
+ wq->pid = pid;
+ wq->tgid = tgid;
wq->status = -EINTR; /* Status return if interrupted */
wq->wait_ctr = 2;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index daa15d6ba450..845d2d690ce2 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -324,8 +324,8 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
befs_debug(sb, "---> befs_read_inode() " "inode = %lu", ino);
inode = iget_locked(sb, ino);
- if (IS_ERR(inode))
- return inode;
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
if (!(inode->i_state & I_NEW))
return inode;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 571a42326908..67be2951b98a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -543,9 +543,6 @@ out:
* libraries. There is no binary dependent code anywhere else.
*/
-#define INTERPRETER_NONE 0
-#define INTERPRETER_ELF 2
-
#ifndef STACK_RND_MASK
#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
#endif
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index fc60b31453ee..4f70f383132c 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -114,6 +114,14 @@ void bio_integrity_free(struct bio *bio)
}
EXPORT_SYMBOL(bio_integrity_free);
+static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip)
+{
+ if (bip->bip_slab == BIO_POOL_NONE)
+ return BIP_INLINE_VECS;
+
+ return bvec_nr_vecs(bip->bip_slab);
+}
+
/**
* bio_integrity_add_page - Attach integrity metadata
* @bio: bio to update
@@ -129,13 +137,12 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
struct bio_integrity_payload *bip = bio->bi_integrity;
struct bio_vec *iv;
- if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) {
+ if (bip->bip_vcnt >= bip_integrity_vecs(bip)) {
printk(KERN_ERR "%s: bip_vec full\n", __func__);
return 0;
}
- iv = bip_vec_idx(bip, bip->bip_vcnt);
- BUG_ON(iv == NULL);
+ iv = bip->bip_vec + bip->bip_vcnt;
iv->bv_page = page;
iv->bv_len = len;
@@ -203,6 +210,12 @@ static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi,
return sectors;
}
+static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
+ unsigned int sectors)
+{
+ return bio_integrity_hw_sectors(bi, sectors) * bi->tuple_size;
+}
+
/**
* bio_integrity_tag_size - Retrieve integrity tag space
* @bio: bio to inspect
@@ -215,13 +228,14 @@ unsigned int bio_integrity_tag_size(struct bio *bio)
{
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
- BUG_ON(bio->bi_size == 0);
+ BUG_ON(bio->bi_iter.bi_size == 0);
- return bi->tag_size * (bio->bi_size / bi->sector_size);
+ return bi->tag_size * (bio->bi_iter.bi_size / bi->sector_size);
}
EXPORT_SYMBOL(bio_integrity_tag_size);
-int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set)
+static int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len,
+ int set)
{
struct bio_integrity_payload *bip = bio->bi_integrity;
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
@@ -235,9 +249,9 @@ int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set)
nr_sectors = bio_integrity_hw_sectors(bi,
DIV_ROUND_UP(len, bi->tag_size));
- if (nr_sectors * bi->tuple_size > bip->bip_size) {
- printk(KERN_ERR "%s: tag too big for bio: %u > %u\n",
- __func__, nr_sectors * bi->tuple_size, bip->bip_size);
+ if (nr_sectors * bi->tuple_size > bip->bip_iter.bi_size) {
+ printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", __func__,
+ nr_sectors * bi->tuple_size, bip->bip_iter.bi_size);
return -1;
}
@@ -299,29 +313,30 @@ static void bio_integrity_generate(struct bio *bio)
{
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
struct blk_integrity_exchg bix;
- struct bio_vec *bv;
- sector_t sector = bio->bi_sector;
- unsigned int i, sectors, total;
+ struct bio_vec bv;
+ struct bvec_iter iter;
+ sector_t sector = bio->bi_iter.bi_sector;
+ unsigned int sectors, total;
void *prot_buf = bio->bi_integrity->bip_buf;
total = 0;
bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
bix.sector_size = bi->sector_size;
- bio_for_each_segment(bv, bio, i) {
- void *kaddr = kmap_atomic(bv->bv_page);
- bix.data_buf = kaddr + bv->bv_offset;
- bix.data_size = bv->bv_len;
+ bio_for_each_segment(bv, bio, iter) {
+ void *kaddr = kmap_atomic(bv.bv_page);
+ bix.data_buf = kaddr + bv.bv_offset;
+ bix.data_size = bv.bv_len;
bix.prot_buf = prot_buf;
bix.sector = sector;
bi->generate_fn(&bix);
- sectors = bv->bv_len / bi->sector_size;
+ sectors = bv.bv_len / bi->sector_size;
sector += sectors;
prot_buf += sectors * bi->tuple_size;
total += sectors * bi->tuple_size;
- BUG_ON(total > bio->bi_integrity->bip_size);
+ BUG_ON(total > bio->bi_integrity->bip_iter.bi_size);
kunmap_atomic(kaddr);
}
@@ -386,8 +401,8 @@ int bio_integrity_prep(struct bio *bio)
bip->bip_owns_buf = 1;
bip->bip_buf = buf;
- bip->bip_size = len;
- bip->bip_sector = bio->bi_sector;
+ bip->bip_iter.bi_size = len;
+ bip->bip_iter.bi_sector = bio->bi_iter.bi_sector;
/* Map it */
offset = offset_in_page(buf);
@@ -442,16 +457,17 @@ static int bio_integrity_verify(struct bio *bio)
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
struct blk_integrity_exchg bix;
struct bio_vec *bv;
- sector_t sector = bio->bi_integrity->bip_sector;
- unsigned int i, sectors, total, ret;
+ sector_t sector = bio->bi_integrity->bip_iter.bi_sector;
+ unsigned int sectors, ret = 0;
void *prot_buf = bio->bi_integrity->bip_buf;
+ int i;
- ret = total = 0;
bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
bix.sector_size = bi->sector_size;
- bio_for_each_segment(bv, bio, i) {
+ bio_for_each_segment_all(bv, bio, i) {
void *kaddr = kmap_atomic(bv->bv_page);
+
bix.data_buf = kaddr + bv->bv_offset;
bix.data_size = bv->bv_len;
bix.prot_buf = prot_buf;
@@ -467,8 +483,6 @@ static int bio_integrity_verify(struct bio *bio)
sectors = bv->bv_len / bi->sector_size;
sector += sectors;
prot_buf += sectors * bi->tuple_size;
- total += sectors * bi->tuple_size;
- BUG_ON(total > bio->bi_integrity->bip_size);
kunmap_atomic(kaddr);
}
@@ -495,7 +509,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
/* Restore original bio completion handler */
bio->bi_end_io = bip->bip_end_io;
- bio_endio(bio, error);
+ bio_endio_nodec(bio, error);
}
/**
@@ -533,56 +547,6 @@ void bio_integrity_endio(struct bio *bio, int error)
EXPORT_SYMBOL(bio_integrity_endio);
/**
- * bio_integrity_mark_head - Advance bip_vec skip bytes
- * @bip: Integrity vector to advance
- * @skip: Number of bytes to advance it
- */
-void bio_integrity_mark_head(struct bio_integrity_payload *bip,
- unsigned int skip)
-{
- struct bio_vec *iv;
- unsigned int i;
-
- bip_for_each_vec(iv, bip, i) {
- if (skip == 0) {
- bip->bip_idx = i;
- return;
- } else if (skip >= iv->bv_len) {
- skip -= iv->bv_len;
- } else { /* skip < iv->bv_len) */
- iv->bv_offset += skip;
- iv->bv_len -= skip;
- bip->bip_idx = i;
- return;
- }
- }
-}
-
-/**
- * bio_integrity_mark_tail - Truncate bip_vec to be len bytes long
- * @bip: Integrity vector to truncate
- * @len: New length of integrity vector
- */
-void bio_integrity_mark_tail(struct bio_integrity_payload *bip,
- unsigned int len)
-{
- struct bio_vec *iv;
- unsigned int i;
-
- bip_for_each_vec(iv, bip, i) {
- if (len == 0) {
- bip->bip_vcnt = i;
- return;
- } else if (len >= iv->bv_len) {
- len -= iv->bv_len;
- } else { /* len < iv->bv_len) */
- iv->bv_len = len;
- len = 0;
- }
- }
-}
-
-/**
* bio_integrity_advance - Advance integrity vector
* @bio: bio whose integrity vector to update
* @bytes_done: number of data bytes that have been completed
@@ -595,13 +559,9 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
{
struct bio_integrity_payload *bip = bio->bi_integrity;
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
- unsigned int nr_sectors;
-
- BUG_ON(bip == NULL);
- BUG_ON(bi == NULL);
+ unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
- nr_sectors = bio_integrity_hw_sectors(bi, bytes_done >> 9);
- bio_integrity_mark_head(bip, nr_sectors * bi->tuple_size);
+ bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
}
EXPORT_SYMBOL(bio_integrity_advance);
@@ -621,64 +581,13 @@ void bio_integrity_trim(struct bio *bio, unsigned int offset,
{
struct bio_integrity_payload *bip = bio->bi_integrity;
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
- unsigned int nr_sectors;
-
- BUG_ON(bip == NULL);
- BUG_ON(bi == NULL);
- BUG_ON(!bio_flagged(bio, BIO_CLONED));
- nr_sectors = bio_integrity_hw_sectors(bi, sectors);
- bip->bip_sector = bip->bip_sector + offset;
- bio_integrity_mark_head(bip, offset * bi->tuple_size);
- bio_integrity_mark_tail(bip, sectors * bi->tuple_size);
+ bio_integrity_advance(bio, offset << 9);
+ bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors);
}
EXPORT_SYMBOL(bio_integrity_trim);
/**
- * bio_integrity_split - Split integrity metadata
- * @bio: Protected bio
- * @bp: Resulting bio_pair
- * @sectors: Offset
- *
- * Description: Splits an integrity page into a bio_pair.
- */
-void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
-{
- struct blk_integrity *bi;
- struct bio_integrity_payload *bip = bio->bi_integrity;
- unsigned int nr_sectors;
-
- if (bio_integrity(bio) == 0)
- return;
-
- bi = bdev_get_integrity(bio->bi_bdev);
- BUG_ON(bi == NULL);
- BUG_ON(bip->bip_vcnt != 1);
-
- nr_sectors = bio_integrity_hw_sectors(bi, sectors);
-
- bp->bio1.bi_integrity = &bp->bip1;
- bp->bio2.bi_integrity = &bp->bip2;
-
- bp->iv1 = bip->bip_vec[bip->bip_idx];
- bp->iv2 = bip->bip_vec[bip->bip_idx];
-
- bp->bip1.bip_vec = &bp->iv1;
- bp->bip2.bip_vec = &bp->iv2;
-
- bp->iv1.bv_len = sectors * bi->tuple_size;
- bp->iv2.bv_offset += sectors * bi->tuple_size;
- bp->iv2.bv_len -= sectors * bi->tuple_size;
-
- bp->bip1.bip_sector = bio->bi_integrity->bip_sector;
- bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors;
-
- bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1;
- bp->bip1.bip_idx = bp->bip2.bip_idx = 0;
-}
-EXPORT_SYMBOL(bio_integrity_split);
-
-/**
* bio_integrity_clone - Callback for cloning bios with integrity metadata
* @bio: New bio
* @bio_src: Original bio
@@ -702,9 +611,8 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
memcpy(bip->bip_vec, bip_src->bip_vec,
bip_src->bip_vcnt * sizeof(struct bio_vec));
- bip->bip_sector = bip_src->bip_sector;
bip->bip_vcnt = bip_src->bip_vcnt;
- bip->bip_idx = bip_src->bip_idx;
+ bip->bip_iter = bip_src->bip_iter;
return 0;
}
diff --git a/fs/bio.c b/fs/bio.c
index 33d79a4eb92d..8754e7b6eb49 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -38,8 +38,6 @@
*/
#define BIO_INLINE_VECS 4
-static mempool_t *bio_split_pool __read_mostly;
-
/*
* if you change this list, also change bvec_alloc or things will
* break badly! cannot be bigger than what you can fit into an
@@ -273,6 +271,7 @@ void bio_init(struct bio *bio)
{
memset(bio, 0, sizeof(*bio));
bio->bi_flags = 1 << BIO_UPTODATE;
+ atomic_set(&bio->bi_remaining, 1);
atomic_set(&bio->bi_cnt, 1);
}
EXPORT_SYMBOL(bio_init);
@@ -295,9 +294,35 @@ void bio_reset(struct bio *bio)
memset(bio, 0, BIO_RESET_BYTES);
bio->bi_flags = flags|(1 << BIO_UPTODATE);
+ atomic_set(&bio->bi_remaining, 1);
}
EXPORT_SYMBOL(bio_reset);
+static void bio_chain_endio(struct bio *bio, int error)
+{
+ bio_endio(bio->bi_private, error);
+ bio_put(bio);
+}
+
+/**
+ * bio_chain - chain bio completions
+ *
+ * The caller won't have a bi_end_io called when @bio completes - instead,
+ * @parent's bi_end_io won't be called until both @parent and @bio have
+ * completed; the chained bio will also be freed when it completes.
+ *
+ * The caller must not set bi_private or bi_end_io in @bio.
+ */
+void bio_chain(struct bio *bio, struct bio *parent)
+{
+ BUG_ON(bio->bi_private || bio->bi_end_io);
+
+ bio->bi_private = parent;
+ bio->bi_end_io = bio_chain_endio;
+ atomic_inc(&parent->bi_remaining);
+}
+EXPORT_SYMBOL(bio_chain);
+
static void bio_alloc_rescue(struct work_struct *work)
{
struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
@@ -473,13 +498,13 @@ EXPORT_SYMBOL(bio_alloc_bioset);
void zero_fill_bio(struct bio *bio)
{
unsigned long flags;
- struct bio_vec *bv;
- int i;
+ struct bio_vec bv;
+ struct bvec_iter iter;
- bio_for_each_segment(bv, bio, i) {
- char *data = bvec_kmap_irq(bv, &flags);
- memset(data, 0, bv->bv_len);
- flush_dcache_page(bv->bv_page);
+ bio_for_each_segment(bv, bio, iter) {
+ char *data = bvec_kmap_irq(&bv, &flags);
+ memset(data, 0, bv.bv_len);
+ flush_dcache_page(bv.bv_page);
bvec_kunmap_irq(data, &flags);
}
}
@@ -515,51 +540,49 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
EXPORT_SYMBOL(bio_phys_segments);
/**
- * __bio_clone - clone a bio
+ * __bio_clone_fast - clone a bio that shares the original bio's biovec
* @bio: destination bio
* @bio_src: bio to clone
*
* Clone a &bio. Caller will own the returned bio, but not
* the actual data it points to. Reference count of returned
* bio will be one.
+ *
+ * Caller must ensure that @bio_src is not freed before @bio.
*/
-void __bio_clone(struct bio *bio, struct bio *bio_src)
+void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
{
- memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
- bio_src->bi_max_vecs * sizeof(struct bio_vec));
+ BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE);
/*
* most users will be overriding ->bi_bdev with a new target,
* so we don't set nor calculate new physical/hw segment counts here
*/
- bio->bi_sector = bio_src->bi_sector;
bio->bi_bdev = bio_src->bi_bdev;
bio->bi_flags |= 1 << BIO_CLONED;
bio->bi_rw = bio_src->bi_rw;
- bio->bi_vcnt = bio_src->bi_vcnt;
- bio->bi_size = bio_src->bi_size;
- bio->bi_idx = bio_src->bi_idx;
+ bio->bi_iter = bio_src->bi_iter;
+ bio->bi_io_vec = bio_src->bi_io_vec;
}
-EXPORT_SYMBOL(__bio_clone);
+EXPORT_SYMBOL(__bio_clone_fast);
/**
- * bio_clone_bioset - clone a bio
+ * bio_clone_fast - clone a bio that shares the original bio's biovec
* @bio: bio to clone
* @gfp_mask: allocation priority
* @bs: bio_set to allocate from
*
- * Like __bio_clone, only also allocates the returned bio
+ * Like __bio_clone_fast, only also allocates the returned bio
*/
-struct bio *bio_clone_bioset(struct bio *bio, gfp_t gfp_mask,
- struct bio_set *bs)
+struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
{
struct bio *b;
- b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, bs);
+ b = bio_alloc_bioset(gfp_mask, 0, bs);
if (!b)
return NULL;
- __bio_clone(b, bio);
+ __bio_clone_fast(b, bio);
if (bio_integrity(bio)) {
int ret;
@@ -574,6 +597,79 @@ struct bio *bio_clone_bioset(struct bio *bio, gfp_t gfp_mask,
return b;
}
+EXPORT_SYMBOL(bio_clone_fast);
+
+/**
+ * bio_clone_bioset - clone a bio
+ * @bio_src: bio to clone
+ * @gfp_mask: allocation priority
+ * @bs: bio_set to allocate from
+ *
+ * Clone bio. Caller will own the returned bio, but not the actual data it
+ * points to. Reference count of returned bio will be one.
+ */
+struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
+ struct bio_set *bs)
+{
+ struct bvec_iter iter;
+ struct bio_vec bv;
+ struct bio *bio;
+
+ /*
+ * Pre immutable biovecs, __bio_clone() used to just do a memcpy from
+ * bio_src->bi_io_vec to bio->bi_io_vec.
+ *
+ * We can't do that anymore, because:
+ *
+ * - The point of cloning the biovec is to produce a bio with a biovec
+ * the caller can modify: bi_idx and bi_bvec_done should be 0.
+ *
+ * - The original bio could've had more than BIO_MAX_PAGES biovecs; if
+ * we tried to clone the whole thing bio_alloc_bioset() would fail.
+ * But the clone should succeed as long as the number of biovecs we
+ * actually need to allocate is fewer than BIO_MAX_PAGES.
+ *
+ * - Lastly, bi_vcnt should not be looked at or relied upon by code
+ * that does not own the bio - reason being drivers don't use it for
+ * iterating over the biovec anymore, so expecting it to be kept up
+ * to date (i.e. for clones that share the parent biovec) is just
+ * asking for trouble and would force extra work on
+ * __bio_clone_fast() anyways.
+ */
+
+ bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
+ if (!bio)
+ return NULL;
+
+ bio->bi_bdev = bio_src->bi_bdev;
+ bio->bi_rw = bio_src->bi_rw;
+ bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
+ bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
+
+ if (bio->bi_rw & REQ_DISCARD)
+ goto integrity_clone;
+
+ if (bio->bi_rw & REQ_WRITE_SAME) {
+ bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
+ goto integrity_clone;
+ }
+
+ bio_for_each_segment(bv, bio_src, iter)
+ bio->bi_io_vec[bio->bi_vcnt++] = bv;
+
+integrity_clone:
+ if (bio_integrity(bio_src)) {
+ int ret;
+
+ ret = bio_integrity_clone(bio, bio_src, gfp_mask);
+ if (ret < 0) {
+ bio_put(bio);
+ return NULL;
+ }
+ }
+
+ return bio;
+}
EXPORT_SYMBOL(bio_clone_bioset);
/**
@@ -612,7 +708,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
if (unlikely(bio_flagged(bio, BIO_CLONED)))
return 0;
- if (((bio->bi_size + len) >> 9) > max_sectors)
+ if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
return 0;
/*
@@ -635,8 +731,9 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
simulate merging updated prev_bvec
as new bvec. */
.bi_bdev = bio->bi_bdev,
- .bi_sector = bio->bi_sector,
- .bi_size = bio->bi_size - prev_bv_len,
+ .bi_sector = bio->bi_iter.bi_sector,
+ .bi_size = bio->bi_iter.bi_size -
+ prev_bv_len,
.bi_rw = bio->bi_rw,
};
@@ -684,8 +781,8 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
if (q->merge_bvec_fn) {
struct bvec_merge_data bvm = {
.bi_bdev = bio->bi_bdev,
- .bi_sector = bio->bi_sector,
- .bi_size = bio->bi_size,
+ .bi_sector = bio->bi_iter.bi_sector,
+ .bi_size = bio->bi_iter.bi_size,
.bi_rw = bio->bi_rw,
};
@@ -708,7 +805,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
bio->bi_vcnt++;
bio->bi_phys_segments++;
done:
- bio->bi_size += len;
+ bio->bi_iter.bi_size += len;
return len;
}
@@ -807,28 +904,7 @@ void bio_advance(struct bio *bio, unsigned bytes)
if (bio_integrity(bio))
bio_integrity_advance(bio, bytes);
- bio->bi_sector += bytes >> 9;
- bio->bi_size -= bytes;
-
- if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK)
- return;
-
- while (bytes) {
- if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
- WARN_ONCE(1, "bio idx %d >= vcnt %d\n",
- bio->bi_idx, bio->bi_vcnt);
- break;
- }
-
- if (bytes >= bio_iovec(bio)->bv_len) {
- bytes -= bio_iovec(bio)->bv_len;
- bio->bi_idx++;
- } else {
- bio_iovec(bio)->bv_len -= bytes;
- bio_iovec(bio)->bv_offset += bytes;
- bytes = 0;
- }
- }
+ bio_advance_iter(bio, &bio->bi_iter, bytes);
}
EXPORT_SYMBOL(bio_advance);
@@ -874,117 +950,80 @@ EXPORT_SYMBOL(bio_alloc_pages);
*/
void bio_copy_data(struct bio *dst, struct bio *src)
{
- struct bio_vec *src_bv, *dst_bv;
- unsigned src_offset, dst_offset, bytes;
+ struct bvec_iter src_iter, dst_iter;
+ struct bio_vec src_bv, dst_bv;
void *src_p, *dst_p;
+ unsigned bytes;
- src_bv = bio_iovec(src);
- dst_bv = bio_iovec(dst);
-
- src_offset = src_bv->bv_offset;
- dst_offset = dst_bv->bv_offset;
+ src_iter = src->bi_iter;
+ dst_iter = dst->bi_iter;
while (1) {
- if (src_offset == src_bv->bv_offset + src_bv->bv_len) {
- src_bv++;
- if (src_bv == bio_iovec_idx(src, src->bi_vcnt)) {
- src = src->bi_next;
- if (!src)
- break;
-
- src_bv = bio_iovec(src);
- }
+ if (!src_iter.bi_size) {
+ src = src->bi_next;
+ if (!src)
+ break;
- src_offset = src_bv->bv_offset;
+ src_iter = src->bi_iter;
}
- if (dst_offset == dst_bv->bv_offset + dst_bv->bv_len) {
- dst_bv++;
- if (dst_bv == bio_iovec_idx(dst, dst->bi_vcnt)) {
- dst = dst->bi_next;
- if (!dst)
- break;
-
- dst_bv = bio_iovec(dst);
- }
+ if (!dst_iter.bi_size) {
+ dst = dst->bi_next;
+ if (!dst)
+ break;
- dst_offset = dst_bv->bv_offset;
+ dst_iter = dst->bi_iter;
}
- bytes = min(dst_bv->bv_offset + dst_bv->bv_len - dst_offset,
- src_bv->bv_offset + src_bv->bv_len - src_offset);
+ src_bv = bio_iter_iovec(src, src_iter);
+ dst_bv = bio_iter_iovec(dst, dst_iter);
- src_p = kmap_atomic(src_bv->bv_page);
- dst_p = kmap_atomic(dst_bv->bv_page);
+ bytes = min(src_bv.bv_len, dst_bv.bv_len);
- memcpy(dst_p + dst_offset,
- src_p + src_offset,
+ src_p = kmap_atomic(src_bv.bv_page);
+ dst_p = kmap_atomic(dst_bv.bv_page);
+
+ memcpy(dst_p + dst_bv.bv_offset,
+ src_p + src_bv.bv_offset,
bytes);
kunmap_atomic(dst_p);
kunmap_atomic(src_p);
- src_offset += bytes;
- dst_offset += bytes;
+ bio_advance_iter(src, &src_iter, bytes);
+ bio_advance_iter(dst, &dst_iter, bytes);
}
}
EXPORT_SYMBOL(bio_copy_data);
struct bio_map_data {
- struct bio_vec *iovecs;
- struct sg_iovec *sgvecs;
int nr_sgvecs;
int is_our_pages;
+ struct sg_iovec sgvecs[];
};
static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
struct sg_iovec *iov, int iov_count,
int is_our_pages)
{
- memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
bmd->nr_sgvecs = iov_count;
bmd->is_our_pages = is_our_pages;
bio->bi_private = bmd;
}
-static void bio_free_map_data(struct bio_map_data *bmd)
-{
- kfree(bmd->iovecs);
- kfree(bmd->sgvecs);
- kfree(bmd);
-}
-
static struct bio_map_data *bio_alloc_map_data(int nr_segs,
unsigned int iov_count,
gfp_t gfp_mask)
{
- struct bio_map_data *bmd;
-
if (iov_count > UIO_MAXIOV)
return NULL;
- bmd = kmalloc(sizeof(*bmd), gfp_mask);
- if (!bmd)
- return NULL;
-
- bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
- if (!bmd->iovecs) {
- kfree(bmd);
- return NULL;
- }
-
- bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
- if (bmd->sgvecs)
- return bmd;
-
- kfree(bmd->iovecs);
- kfree(bmd);
- return NULL;
+ return kmalloc(sizeof(struct bio_map_data) +
+ sizeof(struct sg_iovec) * iov_count, gfp_mask);
}
-static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
- struct sg_iovec *iov, int iov_count,
+static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
int to_user, int from_user, int do_free_page)
{
int ret = 0, i;
@@ -994,7 +1033,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
bio_for_each_segment_all(bvec, bio, i) {
char *bv_addr = page_address(bvec->bv_page);
- unsigned int bv_len = iovecs[i].bv_len;
+ unsigned int bv_len = bvec->bv_len;
while (bv_len && iov_idx < iov_count) {
unsigned int bytes;
@@ -1054,14 +1093,14 @@ int bio_uncopy_user(struct bio *bio)
* don't copy into a random user address space, just free.
*/
if (current->mm)
- ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
- bmd->nr_sgvecs, bio_data_dir(bio) == READ,
+ ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs,
+ bio_data_dir(bio) == READ,
0, bmd->is_our_pages);
else if (bmd->is_our_pages)
bio_for_each_segment_all(bvec, bio, i)
__free_page(bvec->bv_page);
}
- bio_free_map_data(bmd);
+ kfree(bmd);
bio_put(bio);
return ret;
}
@@ -1175,7 +1214,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
*/
if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
(map_data && map_data->from_user)) {
- ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0);
+ ret = __bio_copy_iov(bio, iov, iov_count, 0, 1, 0);
if (ret)
goto cleanup;
}
@@ -1189,7 +1228,7 @@ cleanup:
bio_put(bio);
out_bmd:
- bio_free_map_data(bmd);
+ kfree(bmd);
return ERR_PTR(ret);
}
@@ -1485,7 +1524,7 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
if (IS_ERR(bio))
return bio;
- if (bio->bi_size == len)
+ if (bio->bi_iter.bi_size == len)
return bio;
/*
@@ -1506,16 +1545,15 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
bio_for_each_segment_all(bvec, bio, i) {
char *addr = page_address(bvec->bv_page);
- int len = bmd->iovecs[i].bv_len;
if (read)
- memcpy(p, addr, len);
+ memcpy(p, addr, bvec->bv_len);
__free_page(bvec->bv_page);
- p += len;
+ p += bvec->bv_len;
}
- bio_free_map_data(bmd);
+ kfree(bmd);
bio_put(bio);
}
@@ -1686,11 +1724,11 @@ void bio_check_pages_dirty(struct bio *bio)
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
void bio_flush_dcache_pages(struct bio *bi)
{
- int i;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
- bio_for_each_segment(bvec, bi, i)
- flush_dcache_page(bvec->bv_page);
+ bio_for_each_segment(bvec, bi, iter)
+ flush_dcache_page(bvec.bv_page);
}
EXPORT_SYMBOL(bio_flush_dcache_pages);
#endif
@@ -1711,96 +1749,86 @@ EXPORT_SYMBOL(bio_flush_dcache_pages);
**/
void bio_endio(struct bio *bio, int error)
{
- if (error)
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
- else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
- error = -EIO;
+ while (bio) {
+ BUG_ON(atomic_read(&bio->bi_remaining) <= 0);
- if (bio->bi_end_io)
- bio->bi_end_io(bio, error);
-}
-EXPORT_SYMBOL(bio_endio);
+ if (error)
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+ error = -EIO;
-void bio_pair_release(struct bio_pair *bp)
-{
- if (atomic_dec_and_test(&bp->cnt)) {
- struct bio *master = bp->bio1.bi_private;
+ if (!atomic_dec_and_test(&bio->bi_remaining))
+ return;
- bio_endio(master, bp->error);
- mempool_free(bp, bp->bio2.bi_private);
+ /*
+ * Need to have a real endio function for chained bios,
+ * otherwise various corner cases will break (like stacking
+ * block devices that save/restore bi_end_io) - however, we want
+ * to avoid unbounded recursion and blowing the stack. Tail call
+ * optimization would handle this, but compiling with frame
+ * pointers also disables gcc's sibling call optimization.
+ */
+ if (bio->bi_end_io == bio_chain_endio) {
+ struct bio *parent = bio->bi_private;
+ bio_put(bio);
+ bio = parent;
+ } else {
+ if (bio->bi_end_io)
+ bio->bi_end_io(bio, error);
+ bio = NULL;
+ }
}
}
-EXPORT_SYMBOL(bio_pair_release);
-
-static void bio_pair_end_1(struct bio *bi, int err)
-{
- struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
-
- if (err)
- bp->error = err;
-
- bio_pair_release(bp);
-}
+EXPORT_SYMBOL(bio_endio);
-static void bio_pair_end_2(struct bio *bi, int err)
+/**
+ * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining
+ * @bio: bio
+ * @error: error, if any
+ *
+ * For code that has saved and restored bi_end_io; thing hard before using this
+ * function, probably you should've cloned the entire bio.
+ **/
+void bio_endio_nodec(struct bio *bio, int error)
{
- struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
-
- if (err)
- bp->error = err;
-
- bio_pair_release(bp);
+ atomic_inc(&bio->bi_remaining);
+ bio_endio(bio, error);
}
+EXPORT_SYMBOL(bio_endio_nodec);
-/*
- * split a bio - only worry about a bio with a single page in its iovec
+/**
+ * bio_split - split a bio
+ * @bio: bio to split
+ * @sectors: number of sectors to split from the front of @bio
+ * @gfp: gfp mask
+ * @bs: bio set to allocate from
+ *
+ * Allocates and returns a new bio which represents @sectors from the start of
+ * @bio, and updates @bio to represent the remaining sectors.
+ *
+ * The newly allocated bio will point to @bio's bi_io_vec; it is the caller's
+ * responsibility to ensure that @bio is not freed before the split.
*/
-struct bio_pair *bio_split(struct bio *bi, int first_sectors)
+struct bio *bio_split(struct bio *bio, int sectors,
+ gfp_t gfp, struct bio_set *bs)
{
- struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
-
- if (!bp)
- return bp;
-
- trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
- bi->bi_sector + first_sectors);
-
- BUG_ON(bio_segments(bi) > 1);
- atomic_set(&bp->cnt, 3);
- bp->error = 0;
- bp->bio1 = *bi;
- bp->bio2 = *bi;
- bp->bio2.bi_sector += first_sectors;
- bp->bio2.bi_size -= first_sectors << 9;
- bp->bio1.bi_size = first_sectors << 9;
-
- if (bi->bi_vcnt != 0) {
- bp->bv1 = *bio_iovec(bi);
- bp->bv2 = *bio_iovec(bi);
-
- if (bio_is_rw(bi)) {
- bp->bv2.bv_offset += first_sectors << 9;
- bp->bv2.bv_len -= first_sectors << 9;
- bp->bv1.bv_len = first_sectors << 9;
- }
+ struct bio *split = NULL;
- bp->bio1.bi_io_vec = &bp->bv1;
- bp->bio2.bi_io_vec = &bp->bv2;
+ BUG_ON(sectors <= 0);
+ BUG_ON(sectors >= bio_sectors(bio));
- bp->bio1.bi_max_vecs = 1;
- bp->bio2.bi_max_vecs = 1;
- }
+ split = bio_clone_fast(bio, gfp, bs);
+ if (!split)
+ return NULL;
- bp->bio1.bi_end_io = bio_pair_end_1;
- bp->bio2.bi_end_io = bio_pair_end_2;
+ split->bi_iter.bi_size = sectors << 9;
- bp->bio1.bi_private = bi;
- bp->bio2.bi_private = bio_split_pool;
+ if (bio_integrity(split))
+ bio_integrity_trim(split, 0, sectors);
- if (bio_integrity(bi))
- bio_integrity_split(bi, bp, first_sectors);
+ bio_advance(bio, split->bi_iter.bi_size);
- return bp;
+ return split;
}
EXPORT_SYMBOL(bio_split);
@@ -1814,80 +1842,20 @@ void bio_trim(struct bio *bio, int offset, int size)
{
/* 'bio' is a cloned bio which we need to trim to match
* the given offset and size.
- * This requires adjusting bi_sector, bi_size, and bi_io_vec
*/
- int i;
- struct bio_vec *bvec;
- int sofar = 0;
size <<= 9;
- if (offset == 0 && size == bio->bi_size)
+ if (offset == 0 && size == bio->bi_iter.bi_size)
return;
clear_bit(BIO_SEG_VALID, &bio->bi_flags);
bio_advance(bio, offset << 9);
- bio->bi_size = size;
-
- /* avoid any complications with bi_idx being non-zero*/
- if (bio->bi_idx) {
- memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
- (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
- bio->bi_vcnt -= bio->bi_idx;
- bio->bi_idx = 0;
- }
- /* Make sure vcnt and last bv are not too big */
- bio_for_each_segment(bvec, bio, i) {
- if (sofar + bvec->bv_len > size)
- bvec->bv_len = size - sofar;
- if (bvec->bv_len == 0) {
- bio->bi_vcnt = i;
- break;
- }
- sofar += bvec->bv_len;
- }
+ bio->bi_iter.bi_size = size;
}
EXPORT_SYMBOL_GPL(bio_trim);
-/**
- * bio_sector_offset - Find hardware sector offset in bio
- * @bio: bio to inspect
- * @index: bio_vec index
- * @offset: offset in bv_page
- *
- * Return the number of hardware sectors between beginning of bio
- * and an end point indicated by a bio_vec index and an offset
- * within that vector's page.
- */
-sector_t bio_sector_offset(struct bio *bio, unsigned short index,
- unsigned int offset)
-{
- unsigned int sector_sz;
- struct bio_vec *bv;
- sector_t sectors;
- int i;
-
- sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue);
- sectors = 0;
-
- if (index >= bio->bi_idx)
- index = bio->bi_vcnt - 1;
-
- bio_for_each_segment_all(bv, bio, i) {
- if (i == index) {
- if (offset > bv->bv_offset)
- sectors += (offset - bv->bv_offset) / sector_sz;
- break;
- }
-
- sectors += bv->bv_len / sector_sz;
- }
-
- return sectors;
-}
-EXPORT_SYMBOL(bio_sector_offset);
-
/*
* create memory pools for biovec's in a bio_set.
* use the global biovec slabs created for general use.
@@ -2065,11 +2033,6 @@ static int __init init_bio(void)
if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
panic("bio: can't create integrity pool\n");
- bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
- sizeof(struct bio_pair));
- if (!bio_split_pool)
- panic("bio: can't create split pool\n");
-
return 0;
}
subsys_initcall(init_bio);
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index aa976eced2d2..a66768ebc8d1 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -1,6 +1,7 @@
config BTRFS_FS
tristate "Btrfs filesystem support"
- select LIBCRC32C
+ select CRYPTO
+ select CRYPTO_CRC32C
select ZLIB_INFLATE
select ZLIB_DEFLATE
select LZO_COMPRESS
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 1a44e42d602a..f341a98031d2 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -9,7 +9,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
- uuid-tree.o
+ uuid-tree.o props.o hash.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 0890c83643e9..ff9b3995d453 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -35,13 +35,6 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
char *value = NULL;
struct posix_acl *acl;
- if (!IS_POSIXACL(inode))
- return NULL;
-
- acl = get_cached_acl(inode, type);
- if (acl != ACL_NOT_CACHED)
- return acl;
-
switch (type) {
case ACL_TYPE_ACCESS:
name = POSIX_ACL_XATTR_ACCESS;
@@ -76,31 +69,10 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
return acl;
}
-static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
- void *value, size_t size, int type)
-{
- struct posix_acl *acl;
- int ret = 0;
-
- if (!IS_POSIXACL(dentry->d_inode))
- return -EOPNOTSUPP;
-
- acl = btrfs_get_acl(dentry->d_inode, type);
-
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl == NULL)
- return -ENODATA;
- ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
- posix_acl_release(acl);
-
- return ret;
-}
-
/*
* Needs to be called with fs_mutex held
*/
-static int btrfs_set_acl(struct btrfs_trans_handle *trans,
+static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
struct inode *inode, struct posix_acl *acl, int type)
{
int ret, size = 0;
@@ -158,35 +130,9 @@ out:
return ret;
}
-static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags, int type)
+int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
- int ret;
- struct posix_acl *acl = NULL;
-
- if (!inode_owner_or_capable(dentry->d_inode))
- return -EPERM;
-
- if (!IS_POSIXACL(dentry->d_inode))
- return -EOPNOTSUPP;
-
- if (value) {
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
-
- if (acl) {
- ret = posix_acl_valid(acl);
- if (ret)
- goto out;
- }
- }
-
- ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
-out:
- posix_acl_release(acl);
-
- return ret;
+ return __btrfs_set_acl(NULL, inode, acl, type);
}
/*
@@ -197,83 +143,31 @@ out:
int btrfs_init_acl(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir)
{
- struct posix_acl *acl = NULL;
+ struct posix_acl *default_acl, *acl;
int ret = 0;
/* this happens with subvols */
if (!dir)
return 0;
- if (!S_ISLNK(inode->i_mode)) {
- if (IS_POSIXACL(dir)) {
- acl = btrfs_get_acl(dir, ACL_TYPE_DEFAULT);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- }
+ ret = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
+ if (ret)
+ return ret;
- if (!acl)
- inode->i_mode &= ~current_umask();
+ if (default_acl) {
+ ret = __btrfs_set_acl(trans, inode, default_acl,
+ ACL_TYPE_DEFAULT);
+ posix_acl_release(default_acl);
}
- if (IS_POSIXACL(dir) && acl) {
- if (S_ISDIR(inode->i_mode)) {
- ret = btrfs_set_acl(trans, inode, acl,
- ACL_TYPE_DEFAULT);
- if (ret)
- goto failed;
- }
- ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
- if (ret < 0)
- return ret;
-
- if (ret > 0) {
- /* we need an acl */
- ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
- } else if (ret < 0) {
- cache_no_acl(inode);
- }
- } else {
- cache_no_acl(inode);
+ if (acl) {
+ if (!ret)
+ ret = __btrfs_set_acl(trans, inode, acl,
+ ACL_TYPE_ACCESS);
+ posix_acl_release(acl);
}
-failed:
- posix_acl_release(acl);
-
- return ret;
-}
-int btrfs_acl_chmod(struct inode *inode)
-{
- struct posix_acl *acl;
- int ret = 0;
-
- if (S_ISLNK(inode->i_mode))
- return -EOPNOTSUPP;
-
- if (!IS_POSIXACL(inode))
- return 0;
-
- acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR_OR_NULL(acl))
- return PTR_ERR(acl);
-
- ret = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
- if (ret)
- return ret;
- ret = btrfs_set_acl(NULL, inode, acl, ACL_TYPE_ACCESS);
- posix_acl_release(acl);
+ if (!default_acl && !acl)
+ cache_no_acl(inode);
return ret;
}
-
-const struct xattr_handler btrfs_xattr_acl_default_handler = {
- .prefix = POSIX_ACL_XATTR_DEFAULT,
- .flags = ACL_TYPE_DEFAULT,
- .get = btrfs_xattr_acl_get,
- .set = btrfs_xattr_acl_set,
-};
-
-const struct xattr_handler btrfs_xattr_acl_access_handler = {
- .prefix = POSIX_ACL_XATTR_ACCESS,
- .flags = ACL_TYPE_ACCESS,
- .get = btrfs_xattr_acl_get,
- .set = btrfs_xattr_acl_set,
-};
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 3775947429b2..aded3ef3d3d4 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -66,6 +66,16 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb,
return 0;
}
+static void free_inode_elem_list(struct extent_inode_elem *eie)
+{
+ struct extent_inode_elem *eie_next;
+
+ for (; eie; eie = eie_next) {
+ eie_next = eie->next;
+ kfree(eie);
+ }
+}
+
static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte,
u64 extent_item_pos,
struct extent_inode_elem **eie)
@@ -209,18 +219,19 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
}
static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
- struct ulist *parents, int level,
- struct btrfs_key *key_for_search, u64 time_seq,
- u64 wanted_disk_byte,
- const u64 *extent_item_pos)
+ struct ulist *parents, struct __prelim_ref *ref,
+ int level, u64 time_seq, const u64 *extent_item_pos)
{
int ret = 0;
int slot;
struct extent_buffer *eb;
struct btrfs_key key;
+ struct btrfs_key *key_for_search = &ref->key_for_search;
struct btrfs_file_extent_item *fi;
struct extent_inode_elem *eie = NULL, *old = NULL;
u64 disk_byte;
+ u64 wanted_disk_byte = ref->wanted_disk_byte;
+ u64 count = 0;
if (level != 0) {
eb = path->nodes[level];
@@ -238,7 +249,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
ret = btrfs_next_old_leaf(root, path, time_seq);
- while (!ret) {
+ while (!ret && count < ref->count) {
eb = path->nodes[0];
slot = path->slots[0];
@@ -254,6 +265,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
if (disk_byte == wanted_disk_byte) {
eie = NULL;
old = NULL;
+ count++;
if (extent_item_pos) {
ret = check_extent_in_eb(&key, eb, fi,
*extent_item_pos,
@@ -273,6 +285,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
old = old->next;
old->next = eie;
}
+ eie = NULL;
}
next:
ret = btrfs_next_old_item(root, path, time_seq);
@@ -280,6 +293,8 @@ next:
if (ret > 0)
ret = 0;
+ else if (ret < 0)
+ free_inode_elem_list(eie);
return ret;
}
@@ -299,23 +314,34 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
int ret = 0;
int root_level;
int level = ref->level;
+ int index;
root_key.objectid = ref->root_id;
root_key.type = BTRFS_ROOT_ITEM_KEY;
root_key.offset = (u64)-1;
+
+ index = srcu_read_lock(&fs_info->subvol_srcu);
+
root = btrfs_read_fs_root_no_name(fs_info, &root_key);
if (IS_ERR(root)) {
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
ret = PTR_ERR(root);
goto out;
}
root_level = btrfs_old_root_level(root, time_seq);
- if (root_level + 1 == level)
+ if (root_level + 1 == level) {
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
goto out;
+ }
path->lowest_level = level;
ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
+
+ /* root node has been locked, we can release @subvol_srcu safely here */
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+
pr_debug("search slot in root %llu (level %d, ref count %d) returned "
"%d for key (%llu %u %llu)\n",
ref->root_id, level, ref->count, ret,
@@ -334,9 +360,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
eb = path->nodes[level];
}
- ret = add_all_parents(root, path, parents, level, &ref->key_for_search,
- time_seq, ref->wanted_disk_byte,
- extent_item_pos);
+ ret = add_all_parents(root, path, parents, ref, level, time_seq,
+ extent_item_pos);
out:
path->lowest_level = 0;
btrfs_release_path(path);
@@ -376,10 +401,16 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
continue;
err = __resolve_indirect_ref(fs_info, path, time_seq, ref,
parents, extent_item_pos);
- if (err == -ENOMEM)
- goto out;
- if (err)
+ /*
+ * we can only tolerate ENOENT,otherwise,we should catch error
+ * and return directly.
+ */
+ if (err == -ENOENT) {
continue;
+ } else if (err) {
+ ret = err;
+ goto out;
+ }
/* we put the first parent into the ref at hand */
ULIST_ITER_INIT(&uiter);
@@ -538,14 +569,13 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
if (extent_op && extent_op->update_key)
btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
- while ((n = rb_prev(n))) {
+ spin_lock(&head->lock);
+ n = rb_first(&head->ref_root);
+ while (n) {
struct btrfs_delayed_ref_node *node;
node = rb_entry(n, struct btrfs_delayed_ref_node,
rb_node);
- if (node->bytenr != head->node.bytenr)
- break;
- WARN_ON(node->is_head);
-
+ n = rb_next(n);
if (node->seq > seq)
continue;
@@ -612,10 +642,10 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
WARN_ON(1);
}
if (ret)
- return ret;
+ break;
}
-
- return 0;
+ spin_unlock(&head->lock);
+ return ret;
}
/*
@@ -828,6 +858,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct list_head prefs_delayed;
struct list_head prefs;
struct __prelim_ref *ref;
+ struct extent_inode_elem *eie = NULL;
INIT_LIST_HEAD(&prefs);
INIT_LIST_HEAD(&prefs_delayed);
@@ -882,15 +913,15 @@ again:
btrfs_put_delayed_ref(&head->node);
goto again;
}
+ spin_unlock(&delayed_refs->lock);
ret = __add_delayed_refs(head, time_seq,
&prefs_delayed);
mutex_unlock(&head->mutex);
- if (ret) {
- spin_unlock(&delayed_refs->lock);
+ if (ret)
goto out;
- }
+ } else {
+ spin_unlock(&delayed_refs->lock);
}
- spin_unlock(&delayed_refs->lock);
}
if (path->slots[0]) {
@@ -941,7 +972,6 @@ again:
goto out;
}
if (ref->count && ref->parent) {
- struct extent_inode_elem *eie = NULL;
if (extent_item_pos && !ref->inode_list) {
u32 bsz;
struct extent_buffer *eb;
@@ -976,6 +1006,7 @@ again:
eie = eie->next;
eie->next = ref->inode_list;
}
+ eie = NULL;
}
list_del(&ref->list);
kmem_cache_free(btrfs_prelim_ref_cache, ref);
@@ -994,7 +1025,8 @@ out:
list_del(&ref->list);
kmem_cache_free(btrfs_prelim_ref_cache, ref);
}
-
+ if (ret < 0)
+ free_inode_elem_list(eie);
return ret;
}
@@ -1002,7 +1034,6 @@ static void free_leaf_list(struct ulist *blocks)
{
struct ulist_node *node = NULL;
struct extent_inode_elem *eie;
- struct extent_inode_elem *eie_next;
struct ulist_iterator uiter;
ULIST_ITER_INIT(&uiter);
@@ -1010,10 +1041,7 @@ static void free_leaf_list(struct ulist *blocks)
if (!node->aux)
continue;
eie = (struct extent_inode_elem *)(uintptr_t)node->aux;
- for (; eie; eie = eie_next) {
- eie_next = eie->next;
- kfree(eie);
- }
+ free_inode_elem_list(eie);
node->aux = 0;
}
@@ -1101,44 +1129,13 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
if (!node)
break;
bytenr = node->val;
+ cond_resched();
}
ulist_free(tmp);
return 0;
}
-
-static int __inode_info(u64 inum, u64 ioff, u8 key_type,
- struct btrfs_root *fs_root, struct btrfs_path *path,
- struct btrfs_key *found_key)
-{
- int ret;
- struct btrfs_key key;
- struct extent_buffer *eb;
-
- key.type = key_type;
- key.objectid = inum;
- key.offset = ioff;
-
- ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
- if (ret < 0)
- return ret;
-
- eb = path->nodes[0];
- if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
- ret = btrfs_next_leaf(fs_root, path);
- if (ret)
- return ret;
- eb = path->nodes[0];
- }
-
- btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
- if (found_key->type != key.type || found_key->objectid != key.objectid)
- return 1;
-
- return 0;
-}
-
/*
* this makes the path point to (inum INODE_ITEM ioff)
*/
@@ -1146,16 +1143,16 @@ int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
struct btrfs_path *path)
{
struct btrfs_key key;
- return __inode_info(inum, ioff, BTRFS_INODE_ITEM_KEY, fs_root, path,
- &key);
+ return btrfs_find_item(fs_root, path, inum, ioff,
+ BTRFS_INODE_ITEM_KEY, &key);
}
static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
struct btrfs_path *path,
struct btrfs_key *found_key)
{
- return __inode_info(inum, ioff, BTRFS_INODE_REF_KEY, fs_root, path,
- found_key);
+ return btrfs_find_item(fs_root, path, inum, ioff,
+ BTRFS_INODE_REF_KEY, found_key);
}
int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
@@ -1335,20 +1332,45 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
- ret = btrfs_previous_item(fs_info->extent_root, path,
- 0, BTRFS_EXTENT_ITEM_KEY);
- if (ret < 0)
- return ret;
- btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]);
+ while (1) {
+ u32 nritems;
+ if (path->slots[0] == 0) {
+ btrfs_set_path_blocking(path);
+ ret = btrfs_prev_leaf(fs_info->extent_root, path);
+ if (ret != 0) {
+ if (ret > 0) {
+ pr_debug("logical %llu is not within "
+ "any extent\n", logical);
+ ret = -ENOENT;
+ }
+ return ret;
+ }
+ } else {
+ path->slots[0]--;
+ }
+ nritems = btrfs_header_nritems(path->nodes[0]);
+ if (nritems == 0) {
+ pr_debug("logical %llu is not within any extent\n",
+ logical);
+ return -ENOENT;
+ }
+ if (path->slots[0] == nritems)
+ path->slots[0]--;
+
+ btrfs_item_key_to_cpu(path->nodes[0], found_key,
+ path->slots[0]);
+ if (found_key->type == BTRFS_EXTENT_ITEM_KEY ||
+ found_key->type == BTRFS_METADATA_ITEM_KEY)
+ break;
+ }
+
if (found_key->type == BTRFS_METADATA_ITEM_KEY)
size = fs_info->extent_root->leafsize;
else if (found_key->type == BTRFS_EXTENT_ITEM_KEY)
size = found_key->offset;
- if ((found_key->type != BTRFS_EXTENT_ITEM_KEY &&
- found_key->type != BTRFS_METADATA_ITEM_KEY) ||
- found_key->objectid > logical ||
+ if (found_key->objectid > logical ||
found_key->objectid + size <= logical) {
pr_debug("logical %llu is not within any extent\n", logical);
return -ENOENT;
@@ -1601,7 +1623,6 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_key found_key;
while (!ret) {
- path->leave_spinning = 1;
ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
&found_key);
if (ret < 0)
@@ -1614,9 +1635,12 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
parent = found_key.offset;
slot = path->slots[0];
- eb = path->nodes[0];
- /* make sure we can use eb after releasing the path */
- atomic_inc(&eb->refs);
+ eb = btrfs_clone_extent_buffer(path->nodes[0]);
+ if (!eb) {
+ ret = -ENOMEM;
+ break;
+ }
+ extent_buffer_get(eb);
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
btrfs_release_path(path);
@@ -1674,17 +1698,20 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
++found;
slot = path->slots[0];
- eb = path->nodes[0];
- /* make sure we can use eb after releasing the path */
- atomic_inc(&eb->refs);
+ eb = btrfs_clone_extent_buffer(path->nodes[0]);
+ if (!eb) {
+ ret = -ENOMEM;
+ break;
+ }
+ extent_buffer_get(eb);
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
btrfs_release_path(path);
leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
- ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ item_size = btrfs_item_size_nr(leaf, slot);
+ ptr = btrfs_item_ptr_offset(leaf, slot);
cur_offset = 0;
while (cur_offset < item_size) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index ac0b39db27d1..8fed2125689e 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -43,6 +43,7 @@
#define BTRFS_INODE_COPY_EVERYTHING 8
#define BTRFS_INODE_IN_DELALLOC_LIST 9
#define BTRFS_INODE_READDIO_NEED_LOCK 10
+#define BTRFS_INODE_HAS_PROPS 11
/* in memory btrfs inode */
struct btrfs_inode {
@@ -135,6 +136,9 @@ struct btrfs_inode {
*/
u64 index_cnt;
+ /* Cache the directory index number to speed the dir/file remove */
+ u64 dir_index;
+
/* the fsync log has some corner cases that mean we have to check
* directories to see if any unlinks have been done before
* the directory was logged. See tree-log.c for all the
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 131d82800b3a..0e8388e72d8d 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -92,11 +92,11 @@
#include <linux/slab.h>
#include <linux/buffer_head.h>
#include <linux/mutex.h>
-#include <linux/crc32c.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include "ctree.h"
#include "disk-io.h"
+#include "hash.h"
#include "transaction.h"
#include "extent_io.h"
#include "volumes.h"
@@ -1456,10 +1456,14 @@ static int btrfsic_handle_extent_data(
btrfsic_read_from_block_data(block_ctx, &file_extent_item,
file_extent_item_offset,
sizeof(struct btrfs_file_extent_item));
- next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item) +
- btrfs_stack_file_extent_offset(&file_extent_item);
- generation = btrfs_stack_file_extent_generation(&file_extent_item);
- num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
+ next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item);
+ if (btrfs_stack_file_extent_compression(&file_extent_item) ==
+ BTRFS_COMPRESS_NONE) {
+ next_bytenr += btrfs_stack_file_extent_offset(&file_extent_item);
+ num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
+ } else {
+ num_bytes = btrfs_stack_file_extent_disk_num_bytes(&file_extent_item);
+ }
generation = btrfs_stack_file_extent_generation(&file_extent_item);
if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
@@ -1695,7 +1699,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
return -1;
}
bio->bi_bdev = block_ctx->dev->bdev;
- bio->bi_sector = dev_bytenr >> 9;
+ bio->bi_iter.bi_sector = dev_bytenr >> 9;
for (j = i; j < num_pages; j++) {
ret = bio_add_page(bio, block_ctx->pagev[j],
@@ -1819,7 +1823,7 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
size_t sublen = i ? PAGE_CACHE_SIZE :
(PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
- crc = crc32c(crc, data, sublen);
+ crc = btrfs_crc32c(crc, data, sublen);
}
btrfs_csum_final(crc, csum);
if (memcmp(csum, h->csum, state->csum_size))
@@ -3013,7 +3017,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
int bio_is_patched;
char **mapped_datav;
- dev_bytenr = 512 * bio->bi_sector;
+ dev_bytenr = 512 * bio->bi_iter.bi_sector;
bio_is_patched = 0;
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
@@ -3021,8 +3025,8 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
"submit_bio(rw=0x%x, bi_vcnt=%u,"
" bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
rw, bio->bi_vcnt,
- (unsigned long long)bio->bi_sector, dev_bytenr,
- bio->bi_bdev);
+ (unsigned long long)bio->bi_iter.bi_sector,
+ dev_bytenr, bio->bi_bdev);
mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
GFP_NOFS);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 1499b27b4186..b01fb6c527e3 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -128,11 +128,10 @@ static int check_compressed_csum(struct inode *inode,
kunmap_atomic(kaddr);
if (csum != *cb_sum) {
- printk(KERN_INFO "btrfs csum failed ino %llu "
- "extent %llu csum %u "
- "wanted %u mirror %d\n",
- btrfs_ino(inode), disk_start, csum, *cb_sum,
- cb->mirror_num);
+ btrfs_info(BTRFS_I(inode)->root->fs_info,
+ "csum failed ino %llu extent %llu csum %u wanted %u mirror %d",
+ btrfs_ino(inode), disk_start, csum, *cb_sum,
+ cb->mirror_num);
ret = -EIO;
goto fail;
}
@@ -172,7 +171,8 @@ static void end_compressed_bio_read(struct bio *bio, int err)
goto out;
inode = cb->inode;
- ret = check_compressed_csum(inode, cb, (u64)bio->bi_sector << 9);
+ ret = check_compressed_csum(inode, cb,
+ (u64)bio->bi_iter.bi_sector << 9);
if (ret)
goto csum_failed;
@@ -201,18 +201,16 @@ csum_failed:
if (cb->errors) {
bio_io_error(cb->orig_bio);
} else {
- int bio_index = 0;
- struct bio_vec *bvec = cb->orig_bio->bi_io_vec;
+ int i;
+ struct bio_vec *bvec;
/*
* we have verified the checksum already, set page
* checked so the end_io handlers know about it
*/
- while (bio_index < cb->orig_bio->bi_vcnt) {
+ bio_for_each_segment_all(bvec, cb->orig_bio, i)
SetPageChecked(bvec->bv_page);
- bvec++;
- bio_index++;
- }
+
bio_endio(cb->orig_bio, 0);
}
@@ -372,7 +370,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
page = compressed_pages[pg_index];
page->mapping = inode->i_mapping;
- if (bio->bi_size)
+ if (bio->bi_iter.bi_size)
ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
PAGE_CACHE_SIZE,
bio, 0);
@@ -412,7 +410,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
}
if (bytes_left < PAGE_CACHE_SIZE) {
- printk("bytes left %lu compress len %lu nr %lu\n",
+ btrfs_info(BTRFS_I(inode)->root->fs_info,
+ "bytes left %lu compress len %lu nr %lu",
bytes_left, cb->compressed_len, cb->nr_pages);
}
bytes_left -= PAGE_CACHE_SIZE;
@@ -506,7 +505,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
if (!em || last_offset < em->start ||
(last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
- (em->block_start >> 9) != cb->orig_bio->bi_sector) {
+ (em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
free_extent_map(em);
unlock_extent(tree, last_offset, end);
unlock_page(page);
@@ -552,7 +551,7 @@ next:
* in it. We don't actually do IO on those pages but allocate new ones
* to hold the compressed pages on disk.
*
- * bio->bi_sector points to the compressed extent on disk
+ * bio->bi_iter.bi_sector points to the compressed extent on disk
* bio->bi_io_vec points to all of the inode pages
* bio->bi_vcnt is a count of pages
*
@@ -573,7 +572,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
struct page *page;
struct block_device *bdev;
struct bio *comp_bio;
- u64 cur_disk_byte = (u64)bio->bi_sector << 9;
+ u64 cur_disk_byte = (u64)bio->bi_iter.bi_sector << 9;
u64 em_len;
u64 em_start;
struct extent_map *em;
@@ -659,7 +658,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
page->mapping = inode->i_mapping;
page->index = em_start >> PAGE_CACHE_SHIFT;
- if (comp_bio->bi_size)
+ if (comp_bio->bi_iter.bi_size)
ret = tree->ops->merge_bio_hook(READ, page, 0,
PAGE_CACHE_SIZE,
comp_bio, 0);
@@ -687,8 +686,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
comp_bio, sums);
BUG_ON(ret); /* -ENOMEM */
}
- sums += (comp_bio->bi_size + root->sectorsize - 1) /
- root->sectorsize;
+ sums += (comp_bio->bi_iter.bi_size +
+ root->sectorsize - 1) / root->sectorsize;
ret = btrfs_map_bio(root, READ, comp_bio,
mirror_num, 0);
@@ -1011,6 +1010,8 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
bytes = min(bytes, working_bytes);
kaddr = kmap_atomic(page_out);
memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
+ if (*pg_index == (vcnt - 1) && *pg_offset == 0)
+ memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
kunmap_atomic(kaddr);
flush_dcache_page(page_out);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 316136bd6dd7..cbd3a7d6fa68 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -39,9 +39,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
struct extent_buffer *src_buf);
static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
int level, int slot);
-static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb);
-static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
struct btrfs_path *btrfs_alloc_path(void)
{
@@ -475,6 +474,8 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
* the index is the shifted logical of the *new* root node for root replace
* operations, or the shifted logical of the affected block for all other
* operations.
+ *
+ * Note: must be called with write lock (tree_mod_log_write_lock).
*/
static noinline int
__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
@@ -483,24 +484,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
struct rb_node **new;
struct rb_node *parent = NULL;
struct tree_mod_elem *cur;
- int ret = 0;
BUG_ON(!tm);
- tree_mod_log_write_lock(fs_info);
- if (list_empty(&fs_info->tree_mod_seq_list)) {
- tree_mod_log_write_unlock(fs_info);
- /*
- * Ok we no longer care about logging modifications, free up tm
- * and return 0. Any callers shouldn't be using tm after
- * calling tree_mod_log_insert, but if they do we can just
- * change this to return a special error code to let the callers
- * do their own thing.
- */
- kfree(tm);
- return 0;
- }
-
spin_lock(&fs_info->tree_mod_seq_lock);
tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
@@ -518,18 +504,13 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
new = &((*new)->rb_left);
else if (cur->seq > tm->seq)
new = &((*new)->rb_right);
- else {
- ret = -EEXIST;
- kfree(tm);
- goto out;
- }
+ else
+ return -EEXIST;
}
rb_link_node(&tm->node, parent, new);
rb_insert_color(&tm->node, tm_root);
-out:
- tree_mod_log_write_unlock(fs_info);
- return ret;
+ return 0;
}
/*
@@ -545,19 +526,38 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
return 1;
if (eb && btrfs_header_level(eb) == 0)
return 1;
+
+ tree_mod_log_write_lock(fs_info);
+ if (list_empty(&(fs_info)->tree_mod_seq_list)) {
+ tree_mod_log_write_unlock(fs_info);
+ return 1;
+ }
+
return 0;
}
-static inline int
-__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, int slot,
- enum mod_log_op op, gfp_t flags)
+/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
+static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb)
+{
+ smp_mb();
+ if (list_empty(&(fs_info)->tree_mod_seq_list))
+ return 0;
+ if (eb && btrfs_header_level(eb) == 0)
+ return 0;
+
+ return 1;
+}
+
+static struct tree_mod_elem *
+alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
+ enum mod_log_op op, gfp_t flags)
{
struct tree_mod_elem *tm;
tm = kzalloc(sizeof(*tm), flags);
if (!tm)
- return -ENOMEM;
+ return NULL;
tm->index = eb->start >> PAGE_CACHE_SHIFT;
if (op != MOD_LOG_KEY_ADD) {
@@ -567,8 +567,9 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
tm->op = op;
tm->slot = slot;
tm->generation = btrfs_node_ptr_generation(eb, slot);
+ RB_CLEAR_NODE(&tm->node);
- return __tree_mod_log_insert(fs_info, tm);
+ return tm;
}
static noinline int
@@ -576,10 +577,27 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int slot,
enum mod_log_op op, gfp_t flags)
{
- if (tree_mod_dont_log(fs_info, eb))
+ struct tree_mod_elem *tm;
+ int ret;
+
+ if (!tree_mod_need_log(fs_info, eb))
return 0;
- return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
+ tm = alloc_tree_mod_elem(eb, slot, op, flags);
+ if (!tm)
+ return -ENOMEM;
+
+ if (tree_mod_dont_log(fs_info, eb)) {
+ kfree(tm);
+ return 0;
+ }
+
+ ret = __tree_mod_log_insert(fs_info, tm);
+ tree_mod_log_write_unlock(fs_info);
+ if (ret)
+ kfree(tm);
+
+ return ret;
}
static noinline int
@@ -587,53 +605,95 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int dst_slot, int src_slot,
int nr_items, gfp_t flags)
{
- struct tree_mod_elem *tm;
- int ret;
+ struct tree_mod_elem *tm = NULL;
+ struct tree_mod_elem **tm_list = NULL;
+ int ret = 0;
int i;
+ int locked = 0;
- if (tree_mod_dont_log(fs_info, eb))
+ if (!tree_mod_need_log(fs_info, eb))
return 0;
+ tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags);
+ if (!tm_list)
+ return -ENOMEM;
+
+ tm = kzalloc(sizeof(*tm), flags);
+ if (!tm) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+
+ tm->index = eb->start >> PAGE_CACHE_SHIFT;
+ tm->slot = src_slot;
+ tm->move.dst_slot = dst_slot;
+ tm->move.nr_items = nr_items;
+ tm->op = MOD_LOG_MOVE_KEYS;
+
+ for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
+ tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
+ MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags);
+ if (!tm_list[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+
+ if (tree_mod_dont_log(fs_info, eb))
+ goto free_tms;
+ locked = 1;
+
/*
* When we override something during the move, we log these removals.
* This can only happen when we move towards the beginning of the
* buffer, i.e. dst_slot < src_slot.
*/
for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
- ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
- MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
- BUG_ON(ret < 0);
+ ret = __tree_mod_log_insert(fs_info, tm_list[i]);
+ if (ret)
+ goto free_tms;
}
- tm = kzalloc(sizeof(*tm), flags);
- if (!tm)
- return -ENOMEM;
+ ret = __tree_mod_log_insert(fs_info, tm);
+ if (ret)
+ goto free_tms;
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
- tm->index = eb->start >> PAGE_CACHE_SHIFT;
- tm->slot = src_slot;
- tm->move.dst_slot = dst_slot;
- tm->move.nr_items = nr_items;
- tm->op = MOD_LOG_MOVE_KEYS;
+ return 0;
+free_tms:
+ for (i = 0; i < nr_items; i++) {
+ if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
+ rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
+ kfree(tm_list[i]);
+ }
+ if (locked)
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+ kfree(tm);
- return __tree_mod_log_insert(fs_info, tm);
+ return ret;
}
-static inline void
-__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
+static inline int
+__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+ struct tree_mod_elem **tm_list,
+ int nritems)
{
- int i;
- u32 nritems;
+ int i, j;
int ret;
- if (btrfs_header_level(eb) == 0)
- return;
-
- nritems = btrfs_header_nritems(eb);
for (i = nritems - 1; i >= 0; i--) {
- ret = __tree_mod_log_insert_key(fs_info, eb, i,
- MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
- BUG_ON(ret < 0);
+ ret = __tree_mod_log_insert(fs_info, tm_list[i]);
+ if (ret) {
+ for (j = nritems - 1; j > i; j--)
+ rb_erase(&tm_list[j]->node,
+ &fs_info->tree_mod_log);
+ return ret;
+ }
}
+
+ return 0;
}
static noinline int
@@ -642,17 +702,38 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
struct extent_buffer *new_root, gfp_t flags,
int log_removal)
{
- struct tree_mod_elem *tm;
+ struct tree_mod_elem *tm = NULL;
+ struct tree_mod_elem **tm_list = NULL;
+ int nritems = 0;
+ int ret = 0;
+ int i;
- if (tree_mod_dont_log(fs_info, NULL))
+ if (!tree_mod_need_log(fs_info, NULL))
return 0;
- if (log_removal)
- __tree_mod_log_free_eb(fs_info, old_root);
+ if (log_removal && btrfs_header_level(old_root) > 0) {
+ nritems = btrfs_header_nritems(old_root);
+ tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
+ flags);
+ if (!tm_list) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ for (i = 0; i < nritems; i++) {
+ tm_list[i] = alloc_tree_mod_elem(old_root, i,
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags);
+ if (!tm_list[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+ }
tm = kzalloc(sizeof(*tm), flags);
- if (!tm)
- return -ENOMEM;
+ if (!tm) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
tm->index = new_root->start >> PAGE_CACHE_SHIFT;
tm->old_root.logical = old_root->start;
@@ -660,7 +741,30 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
tm->generation = btrfs_header_generation(old_root);
tm->op = MOD_LOG_ROOT_REPLACE;
- return __tree_mod_log_insert(fs_info, tm);
+ if (tree_mod_dont_log(fs_info, NULL))
+ goto free_tms;
+
+ if (tm_list)
+ ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
+ if (!ret)
+ ret = __tree_mod_log_insert(fs_info, tm);
+
+ tree_mod_log_write_unlock(fs_info);
+ if (ret)
+ goto free_tms;
+ kfree(tm_list);
+
+ return ret;
+
+free_tms:
+ if (tm_list) {
+ for (i = 0; i < nritems; i++)
+ kfree(tm_list[i]);
+ kfree(tm_list);
+ }
+ kfree(tm);
+
+ return ret;
}
static struct tree_mod_elem *
@@ -729,31 +833,75 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
return __tree_mod_log_search(fs_info, start, min_seq, 0);
}
-static noinline void
+static noinline int
tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
struct extent_buffer *src, unsigned long dst_offset,
unsigned long src_offset, int nr_items)
{
- int ret;
+ int ret = 0;
+ struct tree_mod_elem **tm_list = NULL;
+ struct tree_mod_elem **tm_list_add, **tm_list_rem;
int i;
+ int locked = 0;
- if (tree_mod_dont_log(fs_info, NULL))
- return;
+ if (!tree_mod_need_log(fs_info, NULL))
+ return 0;
if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
- return;
+ return 0;
+
+ tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *),
+ GFP_NOFS);
+ if (!tm_list)
+ return -ENOMEM;
+ tm_list_add = tm_list;
+ tm_list_rem = tm_list + nr_items;
for (i = 0; i < nr_items; i++) {
- ret = __tree_mod_log_insert_key(fs_info, src,
- i + src_offset,
- MOD_LOG_KEY_REMOVE, GFP_NOFS);
- BUG_ON(ret < 0);
- ret = __tree_mod_log_insert_key(fs_info, dst,
- i + dst_offset,
- MOD_LOG_KEY_ADD,
- GFP_NOFS);
- BUG_ON(ret < 0);
+ tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
+ MOD_LOG_KEY_REMOVE, GFP_NOFS);
+ if (!tm_list_rem[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+
+ tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
+ MOD_LOG_KEY_ADD, GFP_NOFS);
+ if (!tm_list_add[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
}
+
+ if (tree_mod_dont_log(fs_info, NULL))
+ goto free_tms;
+ locked = 1;
+
+ for (i = 0; i < nr_items; i++) {
+ ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
+ if (ret)
+ goto free_tms;
+ ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
+ if (ret)
+ goto free_tms;
+ }
+
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+
+ return 0;
+
+free_tms:
+ for (i = 0; i < nr_items * 2; i++) {
+ if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
+ rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
+ kfree(tm_list[i]);
+ }
+ if (locked)
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+
+ return ret;
}
static inline void
@@ -772,18 +920,58 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
{
int ret;
- ret = __tree_mod_log_insert_key(fs_info, eb, slot,
+ ret = tree_mod_log_insert_key(fs_info, eb, slot,
MOD_LOG_KEY_REPLACE,
atomic ? GFP_ATOMIC : GFP_NOFS);
BUG_ON(ret < 0);
}
-static noinline void
+static noinline int
tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
{
+ struct tree_mod_elem **tm_list = NULL;
+ int nritems = 0;
+ int i;
+ int ret = 0;
+
+ if (btrfs_header_level(eb) == 0)
+ return 0;
+
+ if (!tree_mod_need_log(fs_info, NULL))
+ return 0;
+
+ nritems = btrfs_header_nritems(eb);
+ tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
+ GFP_NOFS);
+ if (!tm_list)
+ return -ENOMEM;
+
+ for (i = 0; i < nritems; i++) {
+ tm_list[i] = alloc_tree_mod_elem(eb, i,
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
+ if (!tm_list[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+
if (tree_mod_dont_log(fs_info, eb))
- return;
- __tree_mod_log_free_eb(fs_info, eb);
+ goto free_tms;
+
+ ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
+ tree_mod_log_write_unlock(fs_info);
+ if (ret)
+ goto free_tms;
+ kfree(tm_list);
+
+ return 0;
+
+free_tms:
+ for (i = 0; i < nritems; i++)
+ kfree(tm_list[i]);
+ kfree(tm_list);
+
+ return ret;
}
static noinline void
@@ -1041,8 +1229,13 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
- if (last_ref)
- tree_mod_log_free_eb(root->fs_info, buf);
+ if (last_ref) {
+ ret = tree_mod_log_free_eb(root->fs_info, buf);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
+ }
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
}
@@ -1287,8 +1480,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
old = read_tree_block(root, logical, blocksize, 0);
if (WARN_ON(!old || !extent_buffer_uptodate(old))) {
free_extent_buffer(old);
- pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
- logical);
+ btrfs_warn(root->fs_info,
+ "failed to read tree block %llu from get_old_root", logical);
} else {
eb = btrfs_clone_extent_buffer(old);
free_extent_buffer(old);
@@ -2462,6 +2655,49 @@ static int key_search(struct extent_buffer *b, struct btrfs_key *key,
return 0;
}
+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
+ u64 iobjectid, u64 ioff, u8 key_type,
+ struct btrfs_key *found_key)
+{
+ int ret;
+ struct btrfs_key key;
+ struct extent_buffer *eb;
+ struct btrfs_path *path;
+
+ key.type = key_type;
+ key.objectid = iobjectid;
+ key.offset = ioff;
+
+ if (found_path == NULL) {
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ } else
+ path = found_path;
+
+ ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
+ if ((ret < 0) || (found_key == NULL)) {
+ if (path != found_path)
+ btrfs_free_path(path);
+ return ret;
+ }
+
+ eb = path->nodes[0];
+ if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
+ ret = btrfs_next_leaf(fs_root, path);
+ if (ret)
+ return ret;
+ eb = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
+ if (found_key->type != key.type ||
+ found_key->objectid != key.objectid)
+ return 1;
+
+ return 0;
+}
+
/*
* look for key in the tree. path is filled in with nodes along the way
* if key is found, we return zero and you can find the item in the leaf
@@ -2495,6 +2731,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
lowest_level = p->lowest_level;
WARN_ON(lowest_level && ins_len > 0);
WARN_ON(p->nodes[0] != NULL);
+ BUG_ON(!cow && ins_len);
if (ins_len < 0) {
lowest_unlock = 2;
@@ -2603,8 +2840,6 @@ again:
}
}
cow_done:
- BUG_ON(!cow && ins_len);
-
p->nodes[level] = b;
btrfs_clear_path_blocking(p, NULL, 0);
@@ -2614,13 +2849,19 @@ cow_done:
* It is safe to drop the lock on our parent before we
* go through the expensive btree search on b.
*
- * If cow is true, then we might be changing slot zero,
- * which may require changing the parent. So, we can't
- * drop the lock until after we know which slot we're
- * operating on.
+ * If we're inserting or deleting (ins_len != 0), then we might
+ * be changing slot zero, which may require changing the parent.
+ * So, we can't drop the lock until after we know which slot
+ * we're operating on.
*/
- if (!cow)
- btrfs_unlock_up_safe(p, level + 1);
+ if (!ins_len && !p->keep_locks) {
+ int u = level + 1;
+
+ if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
+ btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
+ p->locks[u] = 0;
+ }
+ }
ret = key_search(b, key, level, &prev_cmp, &slot);
@@ -2648,7 +2889,7 @@ cow_done:
* which means we must have a write lock
* on the parent
*/
- if (slot == 0 && cow &&
+ if (slot == 0 && ins_len &&
write_lock_level < level + 1) {
write_lock_level = level + 1;
btrfs_release_path(p);
@@ -2901,7 +3142,9 @@ again:
if (ret < 0)
return ret;
if (!ret) {
- p->slots[0] = btrfs_header_nritems(leaf) - 1;
+ leaf = p->nodes[0];
+ if (p->slots[0] == btrfs_header_nritems(leaf))
+ p->slots[0]--;
return 0;
}
if (!return_any)
@@ -3022,8 +3265,12 @@ static int push_node_left(struct btrfs_trans_handle *trans,
} else
push_items = min(src_nritems - 8, push_items);
- tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
- push_items);
+ ret = tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
+ push_items);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
copy_extent_buffer(dst, src,
btrfs_node_key_ptr_offset(dst_nritems),
btrfs_node_key_ptr_offset(0),
@@ -3093,8 +3340,12 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
(dst_nritems) *
sizeof(struct btrfs_key_ptr));
- tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
- src_nritems - push_items, push_items);
+ ret = tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
+ src_nritems - push_items, push_items);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
copy_extent_buffer(dst, src,
btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -3295,7 +3546,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_header_chunk_tree_uuid(split),
BTRFS_UUID_SIZE);
- tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
+ ret = tree_mod_log_eb_copy(root->fs_info, split, c, 0,
+ mid, c_nritems - mid);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
copy_extent_buffer(split, c,
btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(mid),
@@ -3362,8 +3618,8 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
int ret;
ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
if (ret < 0) {
- printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, "
- "used %d nritems %d\n",
+ btrfs_crit(root->fs_info,
+ "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
leaf_space_used(leaf, 0, nritems), nritems);
}
@@ -3571,6 +3827,19 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (left_nritems == 0)
goto out_unlock;
+ if (path->slots[0] == left_nritems && !empty) {
+ /* Key greater than all keys in the leaf, right neighbor has
+ * enough room for it and we're not emptying our leaf to delete
+ * it, therefore use right neighbor to insert the new item and
+ * no need to touch/dirty our left leaft. */
+ btrfs_tree_unlock(left);
+ free_extent_buffer(left);
+ path->nodes[0] = right;
+ path->slots[0] = 0;
+ path->slots[1]++;
+ return 0;
+ }
+
return __push_leaf_right(trans, root, path, min_data_size, empty,
right, free_space, left_nritems, min_slot);
out_unlock:
@@ -3887,14 +4156,17 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
int progress = 0;
int slot;
u32 nritems;
+ int space_needed = data_size;
slot = path->slots[0];
+ if (slot < btrfs_header_nritems(path->nodes[0]))
+ space_needed -= btrfs_leaf_free_space(root, path->nodes[0]);
/*
* try to push all the items after our slot into the
* right leaf
*/
- ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
+ ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
if (ret < 0)
return ret;
@@ -3914,7 +4186,7 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
/* try to push all the items before our slot into the next leaf */
slot = path->slots[0];
- ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
+ ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
if (ret < 0)
return ret;
@@ -3958,13 +4230,18 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
/* first try to make some room by pushing left and right */
if (data_size && path->nodes[1]) {
- wret = push_leaf_right(trans, root, path, data_size,
- data_size, 0, 0);
+ int space_needed = data_size;
+
+ if (slot < btrfs_header_nritems(l))
+ space_needed -= btrfs_leaf_free_space(root, l);
+
+ wret = push_leaf_right(trans, root, path, space_needed,
+ space_needed, 0, 0);
if (wret < 0)
return wret;
if (wret) {
- wret = push_leaf_left(trans, root, path, data_size,
- data_size, 0, (u32)-1);
+ wret = push_leaf_left(trans, root, path, space_needed,
+ space_needed, 0, (u32)-1);
if (wret < 0)
return wret;
}
@@ -4432,7 +4709,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
BUG_ON(slot < 0);
if (slot >= nritems) {
btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "slot %d too large, nritems %d\n",
+ btrfs_crit(root->fs_info, "slot %d too large, nritems %d",
slot, nritems);
BUG_ON(1);
}
@@ -4495,7 +4772,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
if (btrfs_leaf_free_space(root, leaf) < total_size) {
btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "not enough freespace need %u have %d\n",
+ btrfs_crit(root->fs_info, "not enough freespace need %u have %d",
total_size, btrfs_leaf_free_space(root, leaf));
BUG();
}
@@ -4505,7 +4782,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
if (old_data < data_end) {
btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
+ btrfs_crit(root->fs_info, "slot %d old_data %d data_end %d",
slot, old_data, data_end);
BUG_ON(1);
}
@@ -4817,7 +5094,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
* This may release the path, and so you may lose any locks held at the
* time you call it.
*/
-static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
struct btrfs_key key;
struct btrfs_disk_key found_key;
@@ -5240,7 +5517,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
if (!left_start_ctransid || !right_start_ctransid) {
WARN(1, KERN_WARNING
- "btrfs: btrfs_compare_tree detected "
+ "BTRFS: btrfs_compare_tree detected "
"a change in one of the trees while "
"iterating. This is probably a "
"bug.\n");
@@ -5680,3 +5957,46 @@ int btrfs_previous_item(struct btrfs_root *root,
}
return 1;
}
+
+/*
+ * search in extent tree to find a previous Metadata/Data extent item with
+ * min objecitd.
+ *
+ * returns 0 if something is found, 1 if nothing was found and < 0 on error
+ */
+int btrfs_previous_extent_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 min_objectid)
+{
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+ u32 nritems;
+ int ret;
+
+ while (1) {
+ if (path->slots[0] == 0) {
+ btrfs_set_path_blocking(path);
+ ret = btrfs_prev_leaf(root, path);
+ if (ret != 0)
+ return ret;
+ } else {
+ path->slots[0]--;
+ }
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+ if (nritems == 0)
+ return 1;
+ if (path->slots[0] == nritems)
+ path->slots[0]--;
+
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.objectid < min_objectid)
+ break;
+ if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
+ found_key.type == BTRFS_METADATA_ITEM_KEY)
+ return 0;
+ if (found_key.objectid == min_objectid &&
+ found_key.type < BTRFS_EXTENT_ITEM_KEY)
+ break;
+ }
+ return 1;
+}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 54ab86127f7a..2c1a42ca519f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -521,9 +521,15 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)
#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
+#define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9)
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
+#define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL
+#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
+#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
+#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
+
#define BTRFS_FEATURE_INCOMPAT_SUPP \
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
@@ -532,7 +538,12 @@ struct btrfs_super_block {
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
BTRFS_FEATURE_INCOMPAT_RAID56 | \
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
- BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
+ BTRFS_FEATURE_INCOMPAT_NO_HOLES)
+
+#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \
+ (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
+#define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR 0ULL
/*
* A leaf is full of items. offset and size tell us where to find
@@ -1094,7 +1105,7 @@ struct btrfs_qgroup_limit_item {
} __attribute__ ((__packed__));
struct btrfs_space_info {
- u64 flags;
+ spinlock_t lock;
u64 total_bytes; /* total bytes in the space,
this doesn't take mirrors into account */
@@ -1104,14 +1115,25 @@ struct btrfs_space_info {
transaction finishes */
u64 bytes_reserved; /* total bytes the allocator has reserved for
current allocations */
- u64 bytes_readonly; /* total bytes that are read only */
-
u64 bytes_may_use; /* number of bytes that may be used for
delalloc/allocations */
+ u64 bytes_readonly; /* total bytes that are read only */
+
+ unsigned int full:1; /* indicates that we cannot allocate any more
+ chunks for this space */
+ unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
+
+ unsigned int flush:1; /* set if we are trying to make space */
+
+ unsigned int force_alloc; /* set if we need to force a chunk
+ alloc for this space */
+
u64 disk_used; /* total bytes used on disk */
u64 disk_total; /* total bytes on disk, takes mirrors into
account */
+ u64 flags;
+
/*
* bytes_pinned is kept in line with what is actually pinned, as in
* we've called update_block_group and dropped the bytes_used counter
@@ -1124,22 +1146,15 @@ struct btrfs_space_info {
*/
struct percpu_counter total_bytes_pinned;
- unsigned int full:1; /* indicates that we cannot allocate any more
- chunks for this space */
- unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
-
- unsigned int flush:1; /* set if we are trying to make space */
-
- unsigned int force_alloc; /* set if we need to force a chunk
- alloc for this space */
-
struct list_head list;
+ struct rw_semaphore groups_sem;
/* for block groups in our same type */
struct list_head block_groups[BTRFS_NR_RAID_TYPES];
- spinlock_t lock;
- struct rw_semaphore groups_sem;
wait_queue_head_t wait;
+
+ struct kobject kobj;
+ struct kobject block_group_kobjs[BTRFS_NR_RAID_TYPES];
};
#define BTRFS_BLOCK_RSV_GLOBAL 1
@@ -1346,6 +1361,7 @@ struct btrfs_fs_info {
u64 generation;
u64 last_trans_committed;
+ u64 avg_delayed_ref_runtime;
/*
* this is updated to the current trans every time a full commit
@@ -1448,7 +1464,6 @@ struct btrfs_fs_info {
spinlock_t tree_mod_seq_lock;
atomic64_t tree_mod_seq;
struct list_head tree_mod_seq_list;
- struct seq_list tree_mod_seq_elem;
/* this protects tree_mod_log */
rwlock_t tree_mod_log_lock;
@@ -1515,6 +1530,8 @@ struct btrfs_fs_info {
int thread_pool_size;
struct kobject super_kobj;
+ struct kobject *space_info_kobj;
+ struct kobject *device_dir_kobj;
struct completion kobj_unregister;
int do_barriers;
int closing;
@@ -1643,6 +1660,10 @@ struct btrfs_fs_info {
spinlock_t reada_lock;
struct radix_tree_root reada_tree;
+ /* Extent buffer radix tree */
+ spinlock_t buffer_lock;
+ struct radix_tree_root buffer_radix;
+
/* next backup root to be overwritten */
int backup_root_index;
@@ -1795,6 +1816,12 @@ struct btrfs_root {
struct list_head ordered_extents;
struct list_head ordered_root;
u64 nr_ordered_extents;
+
+ /*
+ * Number of currently running SEND ioctls to prevent
+ * manipulation with the read-only status via SUBVOL_SETFLAGS
+ */
+ int send_in_progress;
};
struct btrfs_ioctl_defrag_range_args {
@@ -1997,6 +2024,7 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23)
+#define BTRFS_MOUNT_CHANGE_INODE_CACHE (1 << 24)
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
@@ -2925,6 +2953,10 @@ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation,
struct btrfs_file_extent_item, generation, 64);
BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes,
struct btrfs_file_extent_item, num_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_num_bytes,
+ struct btrfs_file_extent_item, disk_num_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression,
+ struct btrfs_file_extent_item, compression, 8);
static inline unsigned long
btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e)
@@ -2958,15 +2990,6 @@ BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item,
BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
other_encoding, 16);
-/* this returns the number of file bytes represented by the inline item.
- * If an item is compressed, this is the uncompressed size
- */
-static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
- struct btrfs_file_extent_item *e)
-{
- return btrfs_file_extent_ram_bytes(eb, e);
-}
-
/*
* this returns the number of bytes used by the item on disk, minus the
* size of any extent headers. If a file is compressed on disk, this is
@@ -2980,6 +3003,32 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
return btrfs_item_size(eb, e) - offset;
}
+/* this returns the number of file bytes represented by the inline item.
+ * If an item is compressed, this is the uncompressed size
+ */
+static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
+ int slot,
+ struct btrfs_file_extent_item *fi)
+{
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
+ /*
+ * return the space used on disk if this item isn't
+ * compressed or encoded
+ */
+ if (btrfs_token_file_extent_compression(eb, fi, &token) == 0 &&
+ btrfs_token_file_extent_encryption(eb, fi, &token) == 0 &&
+ btrfs_token_file_extent_other_encoding(eb, fi, &token) == 0) {
+ return btrfs_file_extent_inline_item_len(eb,
+ btrfs_item_nr(slot));
+ }
+
+ /* otherwise use the ram bytes field */
+ return btrfs_token_file_extent_ram_bytes(eb, fi, &token);
+}
+
+
/* btrfs_dev_stats_item */
static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb,
struct btrfs_dev_stats_item *ptr,
@@ -3143,6 +3192,8 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root,
int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
@@ -3163,6 +3214,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
struct btrfs_fs_info *info,
u64 bytenr);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
+int get_block_group_index(struct btrfs_block_group_cache *cache);
struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize,
u64 parent, u64 root_objectid,
@@ -3301,6 +3353,8 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
int type);
+int btrfs_previous_extent_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 min_objectid);
void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *new_key);
struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
@@ -3350,6 +3404,8 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_key *new_key);
+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
+ u64 inum, u64 ioff, u8 key_type, struct btrfs_key *found_key);
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_key *key, struct btrfs_path *p, int
ins_len, int cow);
@@ -3399,6 +3455,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
}
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
u64 time_seq);
static inline int btrfs_next_old_item(struct btrfs_root *root,
@@ -3563,12 +3620,6 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, u64 *index);
-int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- const char *name, int name_len,
- u64 inode_objectid, u64 ref_objectid, int mod,
- u64 *ret_index);
int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid);
@@ -3676,7 +3727,9 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *new_root, u64 new_dirid);
+ struct btrfs_root *new_root,
+ struct btrfs_root *parent_root,
+ u64 new_dirid);
int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags);
@@ -3745,7 +3798,10 @@ extern const struct file_operations btrfs_file_operations;
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
struct btrfs_path *path, u64 start, u64 end,
- u64 *drop_end, int drop_cache);
+ u64 *drop_end, int drop_cache,
+ int replace_extent,
+ u32 extent_item_size,
+ int *key_inserted);
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode, u64 start,
u64 end, int drop_cache);
@@ -3764,6 +3820,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
/* sysfs.c */
int btrfs_init_sysfs(void);
void btrfs_exit_sysfs(void);
+int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info);
+void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info);
/* xattr.c */
ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
@@ -3796,14 +3854,20 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
btrfs_printk(fs_info, KERN_NOTICE fmt, ##args)
#define btrfs_info(fs_info, fmt, args...) \
btrfs_printk(fs_info, KERN_INFO fmt, ##args)
+
+#ifdef DEBUG
#define btrfs_debug(fs_info, fmt, args...) \
btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
+#else
+#define btrfs_debug(fs_info, fmt, args...) \
+ no_printk(KERN_DEBUG fmt, ##args)
+#endif
#ifdef CONFIG_BTRFS_ASSERT
static inline void assfail(char *expr, char *file, int line)
{
- printk(KERN_ERR "BTRFS assertion failed: %s, file: %s, line: %d",
+ pr_err("BTRFS: assertion failed: %s, file: %s, line: %d",
expr, file, line);
BUG();
}
@@ -3841,7 +3905,7 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
if (!(features & flag)) {
features |= flag;
btrfs_set_super_incompat_flags(disk_super, features);
- printk(KERN_INFO "btrfs: setting %llu feature flag\n",
+ btrfs_info(fs_info, "setting %llu feature flag",
flag);
}
spin_unlock(&fs_info->super_lock);
@@ -3899,20 +3963,17 @@ do { \
/* acl.c */
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
struct posix_acl *btrfs_get_acl(struct inode *inode, int type);
+int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
int btrfs_init_acl(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir);
-int btrfs_acl_chmod(struct inode *inode);
#else
#define btrfs_get_acl NULL
+#define btrfs_set_acl NULL
static inline int btrfs_init_acl(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir)
{
return 0;
}
-static inline int btrfs_acl_chmod(struct inode *inode)
-{
- return 0;
-}
#endif
/* relocation.c */
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 8d292fbae659..451b00c86f6c 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -55,8 +55,7 @@ static inline void btrfs_init_delayed_node(
delayed_node->inode_id = inode_id;
atomic_set(&delayed_node->refs, 0);
delayed_node->count = 0;
- delayed_node->in_list = 0;
- delayed_node->inode_dirty = 0;
+ delayed_node->flags = 0;
delayed_node->ins_root = RB_ROOT;
delayed_node->del_root = RB_ROOT;
mutex_init(&delayed_node->mutex);
@@ -172,7 +171,7 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
int mod)
{
spin_lock(&root->lock);
- if (node->in_list) {
+ if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
if (!list_empty(&node->p_list))
list_move_tail(&node->p_list, &root->prepare_list);
else if (mod)
@@ -182,7 +181,7 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
list_add_tail(&node->p_list, &root->prepare_list);
atomic_inc(&node->refs); /* inserted into list */
root->nodes++;
- node->in_list = 1;
+ set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
}
spin_unlock(&root->lock);
}
@@ -192,13 +191,13 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
struct btrfs_delayed_node *node)
{
spin_lock(&root->lock);
- if (node->in_list) {
+ if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
root->nodes--;
atomic_dec(&node->refs); /* not in the list */
list_del_init(&node->n_list);
if (!list_empty(&node->p_list))
list_del_init(&node->p_list);
- node->in_list = 0;
+ clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
}
spin_unlock(&root->lock);
}
@@ -231,7 +230,8 @@ static struct btrfs_delayed_node *btrfs_next_delayed_node(
delayed_root = node->root->fs_info->delayed_root;
spin_lock(&delayed_root->lock);
- if (!node->in_list) { /* not in the list */
+ if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
+ /* not in the list */
if (list_empty(&delayed_root->node_list))
goto out;
p = delayed_root->node_list.next;
@@ -1004,9 +1004,10 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
{
struct btrfs_delayed_root *delayed_root;
- if (delayed_node && delayed_node->inode_dirty) {
+ if (delayed_node &&
+ test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
BUG_ON(!delayed_node->root);
- delayed_node->inode_dirty = 0;
+ clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
delayed_node->count--;
delayed_root = delayed_node->root->fs_info->delayed_root;
@@ -1014,6 +1015,18 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
}
}
+static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
+{
+ struct btrfs_delayed_root *delayed_root;
+
+ ASSERT(delayed_node->root);
+ clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
+ delayed_node->count--;
+
+ delayed_root = delayed_node->root->fs_info->delayed_root;
+ finish_one_item(delayed_root);
+}
+
static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
@@ -1022,13 +1035,19 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_key key;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
+ int mod;
int ret;
key.objectid = node->inode_id;
btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
key.offset = 0;
- ret = btrfs_lookup_inode(trans, root, path, &key, 1);
+ if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
+ mod = -1;
+ else
+ mod = 1;
+
+ ret = btrfs_lookup_inode(trans, root, path, &key, mod);
if (ret > 0) {
btrfs_release_path(path);
return -ENOENT;
@@ -1036,19 +1055,58 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
return ret;
}
- btrfs_unlock_up_safe(path, 1);
leaf = path->nodes[0];
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
sizeof(struct btrfs_inode_item));
btrfs_mark_buffer_dirty(leaf);
- btrfs_release_path(path);
+ if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
+ goto no_iref;
+
+ path->slots[0]++;
+ if (path->slots[0] >= btrfs_header_nritems(leaf))
+ goto search;
+again:
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid != node->inode_id)
+ goto out;
+
+ if (key.type != BTRFS_INODE_REF_KEY &&
+ key.type != BTRFS_INODE_EXTREF_KEY)
+ goto out;
+
+ /*
+ * Delayed iref deletion is for the inode who has only one link,
+ * so there is only one iref. The case that several irefs are
+ * in the same item doesn't exist.
+ */
+ btrfs_del_item(trans, root, path);
+out:
+ btrfs_release_delayed_iref(node);
+no_iref:
+ btrfs_release_path(path);
+err_out:
btrfs_delayed_inode_release_metadata(root, node);
btrfs_release_delayed_inode(node);
- return 0;
+ return ret;
+
+search:
+ btrfs_release_path(path);
+
+ btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY);
+ key.offset = -1;
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret < 0)
+ goto err_out;
+ ASSERT(ret);
+
+ ret = 0;
+ leaf = path->nodes[0];
+ path->slots[0]--;
+ goto again;
}
static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
@@ -1059,7 +1117,7 @@ static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
int ret;
mutex_lock(&node->mutex);
- if (!node->inode_dirty) {
+ if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
mutex_unlock(&node->mutex);
return 0;
}
@@ -1203,7 +1261,7 @@ int btrfs_commit_inode_delayed_inode(struct inode *inode)
return 0;
mutex_lock(&delayed_node->mutex);
- if (!delayed_node->inode_dirty) {
+ if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_node(delayed_node);
return 0;
@@ -1227,7 +1285,7 @@ int btrfs_commit_inode_delayed_inode(struct inode *inode)
trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
mutex_lock(&delayed_node->mutex);
- if (delayed_node->inode_dirty)
+ if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
path, delayed_node);
else
@@ -1300,36 +1358,9 @@ again:
trans->block_rsv = &root->fs_info->delayed_block_rsv;
__btrfs_commit_inode_delayed_items(trans, path, delayed_node);
- /*
- * Maybe new delayed items have been inserted, so we need requeue
- * the work. Besides that, we must dequeue the empty delayed nodes
- * to avoid the race between delayed items balance and the worker.
- * The race like this:
- * Task1 Worker thread
- * count == 0, needn't requeue
- * also needn't insert the
- * delayed node into prepare
- * list again.
- * add lots of delayed items
- * queue the delayed node
- * already in the list,
- * and not in the prepare
- * list, it means the delayed
- * node is being dealt with
- * by the worker.
- * do delayed items balance
- * the delayed node is being
- * dealt with by the worker
- * now, just wait.
- * the worker goto idle.
- * Task1 will sleep until the transaction is commited.
- */
- mutex_lock(&delayed_node->mutex);
- btrfs_dequeue_delayed_node(root->fs_info->delayed_root, delayed_node);
- mutex_unlock(&delayed_node->mutex);
trans->block_rsv = block_rsv;
- btrfs_end_transaction_dmeta(trans, root);
+ btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty_nodelay(root);
release_path:
@@ -1376,52 +1407,41 @@ void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
WARN_ON(btrfs_first_delayed_node(delayed_root));
}
-static int refs_newer(struct btrfs_delayed_root *delayed_root,
- int seq, int count)
+static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
{
int val = atomic_read(&delayed_root->items_seq);
- if (val < seq || val >= seq + count)
+ if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
+ return 1;
+
+ if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
return 1;
+
return 0;
}
void btrfs_balance_delayed_items(struct btrfs_root *root)
{
struct btrfs_delayed_root *delayed_root;
- int seq;
delayed_root = btrfs_get_delayed_root(root);
if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
return;
- seq = atomic_read(&delayed_root->items_seq);
-
if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
+ int seq;
int ret;
- DEFINE_WAIT(__wait);
+
+ seq = atomic_read(&delayed_root->items_seq);
ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
if (ret)
return;
- while (1) {
- prepare_to_wait(&delayed_root->wait, &__wait,
- TASK_INTERRUPTIBLE);
-
- if (refs_newer(delayed_root, seq,
- BTRFS_DELAYED_BATCH) ||
- atomic_read(&delayed_root->items) <
- BTRFS_DELAYED_BACKGROUND) {
- break;
- }
- if (!signal_pending(current))
- schedule();
- else
- break;
- }
- finish_wait(&delayed_root->wait, &__wait);
+ wait_event_interruptible(delayed_root->wait,
+ could_end_wait(delayed_root, seq));
+ return;
}
btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
@@ -1472,9 +1492,9 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
mutex_lock(&delayed_node->mutex);
ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
if (unlikely(ret)) {
- printk(KERN_ERR "err add delayed dir index item(name: %.*s) "
+ btrfs_err(root->fs_info, "err add delayed dir index item(name: %.*s) "
"into the insertion tree of the delayed node"
- "(root id: %llu, inode id: %llu, errno: %d)\n",
+ "(root id: %llu, inode id: %llu, errno: %d)",
name_len, name, delayed_node->root->objectid,
delayed_node->inode_id, ret);
BUG();
@@ -1544,9 +1564,9 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
mutex_lock(&node->mutex);
ret = __btrfs_add_delayed_deletion_item(node, item);
if (unlikely(ret)) {
- printk(KERN_ERR "err add delayed dir index item(index: %llu) "
+ btrfs_err(root->fs_info, "err add delayed dir index item(index: %llu) "
"into the deletion tree of the delayed node"
- "(root id: %llu, inode id: %llu, errno: %d)\n",
+ "(root id: %llu, inode id: %llu, errno: %d)",
index, node->root->objectid, node->inode_id,
ret);
BUG();
@@ -1759,7 +1779,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
return -ENOENT;
mutex_lock(&delayed_node->mutex);
- if (!delayed_node->inode_dirty) {
+ if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_node(delayed_node);
return -ENOENT;
@@ -1810,7 +1830,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
return PTR_ERR(delayed_node);
mutex_lock(&delayed_node->mutex);
- if (delayed_node->inode_dirty) {
+ if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
goto release_node;
}
@@ -1821,7 +1841,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
goto release_node;
fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
- delayed_node->inode_dirty = 1;
+ set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
delayed_node->count++;
atomic_inc(&root->fs_info->delayed_root->items);
release_node:
@@ -1830,6 +1850,41 @@ release_node:
return ret;
}
+int btrfs_delayed_delete_inode_ref(struct inode *inode)
+{
+ struct btrfs_delayed_node *delayed_node;
+
+ delayed_node = btrfs_get_or_create_delayed_node(inode);
+ if (IS_ERR(delayed_node))
+ return PTR_ERR(delayed_node);
+
+ /*
+ * We don't reserve space for inode ref deletion is because:
+ * - We ONLY do async inode ref deletion for the inode who has only
+ * one link(i_nlink == 1), it means there is only one inode ref.
+ * And in most case, the inode ref and the inode item are in the
+ * same leaf, and we will deal with them at the same time.
+ * Since we are sure we will reserve the space for the inode item,
+ * it is unnecessary to reserve space for inode ref deletion.
+ * - If the inode ref and the inode item are not in the same leaf,
+ * We also needn't worry about enospc problem, because we reserve
+ * much more space for the inode update than it needs.
+ * - At the worst, we can steal some space from the global reservation.
+ * It is very rare.
+ */
+ mutex_lock(&delayed_node->mutex);
+ if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
+ goto release_node;
+
+ set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
+ delayed_node->count++;
+ atomic_inc(&BTRFS_I(inode)->root->fs_info->delayed_root->items);
+release_node:
+ mutex_unlock(&delayed_node->mutex);
+ btrfs_release_delayed_node(delayed_node);
+ return 0;
+}
+
static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
{
struct btrfs_root *root = delayed_node->root;
@@ -1852,7 +1907,10 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
btrfs_release_delayed_item(prev_item);
}
- if (delayed_node->inode_dirty) {
+ if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
+ btrfs_release_delayed_iref(delayed_node);
+
+ if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
btrfs_delayed_inode_release_metadata(root, delayed_node);
btrfs_release_delayed_inode(delayed_node);
}
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index a4b38f934d14..f70119f25421 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -48,6 +48,10 @@ struct btrfs_delayed_root {
wait_queue_head_t wait;
};
+#define BTRFS_DELAYED_NODE_IN_LIST 0
+#define BTRFS_DELAYED_NODE_INODE_DIRTY 1
+#define BTRFS_DELAYED_NODE_DEL_IREF 2
+
struct btrfs_delayed_node {
u64 inode_id;
u64 bytes_reserved;
@@ -65,8 +69,7 @@ struct btrfs_delayed_node {
struct btrfs_inode_item inode_item;
atomic_t refs;
u64 index_cnt;
- bool in_list;
- bool inode_dirty;
+ unsigned long flags;
int count;
};
@@ -125,6 +128,7 @@ int btrfs_commit_inode_delayed_inode(struct inode *inode);
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode);
int btrfs_fill_inode(struct inode *inode, u32 *rdev);
+int btrfs_delayed_delete_inode_ref(struct inode *inode);
/* Used for drop dead root */
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index e4d467be2dd4..f3bff89eecf0 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -161,35 +161,61 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
return NULL;
}
+/* insert a new ref to head ref rbtree */
+static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
+ struct rb_node *node)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent_node = NULL;
+ struct btrfs_delayed_ref_head *entry;
+ struct btrfs_delayed_ref_head *ins;
+ u64 bytenr;
+
+ ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
+ bytenr = ins->node.bytenr;
+ while (*p) {
+ parent_node = *p;
+ entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
+ href_node);
+
+ if (bytenr < entry->node.bytenr)
+ p = &(*p)->rb_left;
+ else if (bytenr > entry->node.bytenr)
+ p = &(*p)->rb_right;
+ else
+ return entry;
+ }
+
+ rb_link_node(node, parent_node, p);
+ rb_insert_color(node, root);
+ return NULL;
+}
+
/*
* find an head entry based on bytenr. This returns the delayed ref
* head if it was able to find one, or NULL if nothing was in that spot.
* If return_bigger is given, the next bigger entry is returned if no exact
* match is found.
*/
-static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
- u64 bytenr,
- struct btrfs_delayed_ref_node **last,
- int return_bigger)
+static struct btrfs_delayed_ref_head *
+find_ref_head(struct rb_root *root, u64 bytenr,
+ struct btrfs_delayed_ref_head **last, int return_bigger)
{
struct rb_node *n;
- struct btrfs_delayed_ref_node *entry;
+ struct btrfs_delayed_ref_head *entry;
int cmp = 0;
again:
n = root->rb_node;
entry = NULL;
while (n) {
- entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
- WARN_ON(!entry->in_tree);
+ entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
if (last)
*last = entry;
- if (bytenr < entry->bytenr)
+ if (bytenr < entry->node.bytenr)
cmp = -1;
- else if (bytenr > entry->bytenr)
- cmp = 1;
- else if (!btrfs_delayed_ref_is_head(entry))
+ else if (bytenr > entry->node.bytenr)
cmp = 1;
else
cmp = 0;
@@ -203,12 +229,12 @@ again:
}
if (entry && return_bigger) {
if (cmp > 0) {
- n = rb_next(&entry->rb_node);
+ n = rb_next(&entry->href_node);
if (!n)
n = rb_first(root);
- entry = rb_entry(n, struct btrfs_delayed_ref_node,
- rb_node);
- bytenr = entry->bytenr;
+ entry = rb_entry(n, struct btrfs_delayed_ref_head,
+ href_node);
+ bytenr = entry->node.bytenr;
return_bigger = 0;
goto again;
}
@@ -243,33 +269,38 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head,
struct btrfs_delayed_ref_node *ref)
{
- rb_erase(&ref->rb_node, &delayed_refs->root);
+ if (btrfs_delayed_ref_is_head(ref)) {
+ head = btrfs_delayed_node_to_head(ref);
+ rb_erase(&head->href_node, &delayed_refs->href_root);
+ } else {
+ assert_spin_locked(&head->lock);
+ rb_erase(&ref->rb_node, &head->ref_root);
+ }
ref->in_tree = 0;
btrfs_put_delayed_ref(ref);
- delayed_refs->num_entries--;
+ atomic_dec(&delayed_refs->num_entries);
if (trans->delayed_ref_updates)
trans->delayed_ref_updates--;
}
static int merge_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head,
struct btrfs_delayed_ref_node *ref, u64 seq)
{
struct rb_node *node;
- int merged = 0;
int mod = 0;
int done = 0;
- node = rb_prev(&ref->rb_node);
- while (node) {
+ node = rb_next(&ref->rb_node);
+ while (!done && node) {
struct btrfs_delayed_ref_node *next;
next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- node = rb_prev(node);
- if (next->bytenr != ref->bytenr)
- break;
+ node = rb_next(node);
if (seq && next->seq >= seq)
break;
if (comp_entry(ref, next, 0))
@@ -289,12 +320,11 @@ static int merge_ref(struct btrfs_trans_handle *trans,
mod = -next->ref_mod;
}
- merged++;
- drop_delayed_ref(trans, delayed_refs, next);
+ drop_delayed_ref(trans, delayed_refs, head, next);
ref->ref_mod += mod;
if (ref->ref_mod == 0) {
- drop_delayed_ref(trans, delayed_refs, ref);
- break;
+ drop_delayed_ref(trans, delayed_refs, head, ref);
+ done = 1;
} else {
/*
* You can't have multiples of the same ref on a tree
@@ -303,13 +333,8 @@ static int merge_ref(struct btrfs_trans_handle *trans,
WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
}
-
- if (done)
- break;
- node = rb_prev(&ref->rb_node);
}
-
- return merged;
+ return done;
}
void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
@@ -320,6 +345,14 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
struct rb_node *node;
u64 seq = 0;
+ assert_spin_locked(&head->lock);
+ /*
+ * We don't have too much refs to merge in the case of delayed data
+ * refs.
+ */
+ if (head->is_data)
+ return;
+
spin_lock(&fs_info->tree_mod_seq_lock);
if (!list_empty(&fs_info->tree_mod_seq_list)) {
struct seq_list *elem;
@@ -330,22 +363,19 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
}
spin_unlock(&fs_info->tree_mod_seq_lock);
- node = rb_prev(&head->node.rb_node);
+ node = rb_first(&head->ref_root);
while (node) {
struct btrfs_delayed_ref_node *ref;
ref = rb_entry(node, struct btrfs_delayed_ref_node,
rb_node);
- if (ref->bytenr != head->node.bytenr)
- break;
-
/* We can't merge refs that are outside of our seq count */
if (seq && ref->seq >= seq)
break;
- if (merge_ref(trans, delayed_refs, ref, seq))
- node = rb_prev(&head->node.rb_node);
+ if (merge_ref(trans, delayed_refs, head, ref, seq))
+ node = rb_first(&head->ref_root);
else
- node = rb_prev(node);
+ node = rb_next(&ref->rb_node);
}
}
@@ -373,71 +403,52 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
return ret;
}
-int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
- struct list_head *cluster, u64 start)
+struct btrfs_delayed_ref_head *
+btrfs_select_ref_head(struct btrfs_trans_handle *trans)
{
- int count = 0;
struct btrfs_delayed_ref_root *delayed_refs;
- struct rb_node *node;
- struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_ref_head *head;
+ u64 start;
+ bool loop = false;
delayed_refs = &trans->transaction->delayed_refs;
- if (start == 0) {
- node = rb_first(&delayed_refs->root);
- } else {
- ref = NULL;
- find_ref_head(&delayed_refs->root, start + 1, &ref, 1);
- if (ref) {
- node = &ref->rb_node;
- } else
- node = rb_first(&delayed_refs->root);
- }
+
again:
- while (node && count < 32) {
- ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- if (btrfs_delayed_ref_is_head(ref)) {
- head = btrfs_delayed_node_to_head(ref);
- if (list_empty(&head->cluster)) {
- list_add_tail(&head->cluster, cluster);
- delayed_refs->run_delayed_start =
- head->node.bytenr;
- count++;
-
- WARN_ON(delayed_refs->num_heads_ready == 0);
- delayed_refs->num_heads_ready--;
- } else if (count) {
- /* the goal of the clustering is to find extents
- * that are likely to end up in the same extent
- * leaf on disk. So, we don't want them spread
- * all over the tree. Stop now if we've hit
- * a head that was already in use
- */
- break;
- }
- }
- node = rb_next(node);
- }
- if (count) {
- return 0;
- } else if (start) {
- /*
- * we've gone to the end of the rbtree without finding any
- * clusters. start from the beginning and try again
- */
+ start = delayed_refs->run_delayed_start;
+ head = find_ref_head(&delayed_refs->href_root, start, NULL, 1);
+ if (!head && !loop) {
+ delayed_refs->run_delayed_start = 0;
start = 0;
- node = rb_first(&delayed_refs->root);
- goto again;
+ loop = true;
+ head = find_ref_head(&delayed_refs->href_root, start, NULL, 1);
+ if (!head)
+ return NULL;
+ } else if (!head && loop) {
+ return NULL;
}
- return 1;
-}
-void btrfs_release_ref_cluster(struct list_head *cluster)
-{
- struct list_head *pos, *q;
+ while (head->processing) {
+ struct rb_node *node;
+
+ node = rb_next(&head->href_node);
+ if (!node) {
+ if (loop)
+ return NULL;
+ delayed_refs->run_delayed_start = 0;
+ start = 0;
+ loop = true;
+ goto again;
+ }
+ head = rb_entry(node, struct btrfs_delayed_ref_head,
+ href_node);
+ }
- list_for_each_safe(pos, q, cluster)
- list_del_init(pos);
+ head->processing = 1;
+ WARN_ON(delayed_refs->num_heads_ready == 0);
+ delayed_refs->num_heads_ready--;
+ delayed_refs->run_delayed_start = head->node.bytenr +
+ head->node.num_bytes;
+ return head;
}
/*
@@ -451,6 +462,7 @@ void btrfs_release_ref_cluster(struct list_head *cluster)
static noinline void
update_existing_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs,
+ struct btrfs_delayed_ref_head *head,
struct btrfs_delayed_ref_node *existing,
struct btrfs_delayed_ref_node *update)
{
@@ -463,7 +475,7 @@ update_existing_ref(struct btrfs_trans_handle *trans,
*/
existing->ref_mod--;
if (existing->ref_mod == 0)
- drop_delayed_ref(trans, delayed_refs, existing);
+ drop_delayed_ref(trans, delayed_refs, head, existing);
else
WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
@@ -533,9 +545,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
}
}
/*
- * update the reference mod on the head to reflect this new operation
+ * update the reference mod on the head to reflect this new operation,
+ * only need the lock for this case cause we could be processing it
+ * currently, for refs we just added we know we're a-ok.
*/
+ spin_lock(&existing_ref->lock);
existing->ref_mod += update->ref_mod;
+ spin_unlock(&existing_ref->lock);
}
/*
@@ -543,13 +559,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
* this does all the dirty work in terms of maintaining the correct
* overall modification count.
*/
-static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_node *ref,
- u64 bytenr, u64 num_bytes,
- int action, int is_data)
+static noinline struct btrfs_delayed_ref_head *
+add_delayed_ref_head(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_node *ref, u64 bytenr,
+ u64 num_bytes, int action, int is_data)
{
- struct btrfs_delayed_ref_node *existing;
+ struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_head *head_ref = NULL;
struct btrfs_delayed_ref_root *delayed_refs;
int count_mod = 1;
@@ -596,38 +612,43 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
head_ref = btrfs_delayed_node_to_head(ref);
head_ref->must_insert_reserved = must_insert_reserved;
head_ref->is_data = is_data;
+ head_ref->ref_root = RB_ROOT;
+ head_ref->processing = 0;
- INIT_LIST_HEAD(&head_ref->cluster);
+ spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
trace_add_delayed_ref_head(ref, head_ref, action);
- existing = tree_insert(&delayed_refs->root, &ref->rb_node);
-
+ existing = htree_insert(&delayed_refs->href_root,
+ &head_ref->href_node);
if (existing) {
- update_existing_head_ref(existing, ref);
+ update_existing_head_ref(&existing->node, ref);
/*
* we've updated the existing ref, free the newly
* allocated ref
*/
kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
+ head_ref = existing;
} else {
delayed_refs->num_heads++;
delayed_refs->num_heads_ready++;
- delayed_refs->num_entries++;
+ atomic_inc(&delayed_refs->num_entries);
trans->delayed_ref_updates++;
}
+ return head_ref;
}
/*
* helper to insert a delayed tree ref into the rbtree.
*/
-static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_node *ref,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 ref_root, int level, int action,
- int for_cow)
+static noinline void
+add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_head *head_ref,
+ struct btrfs_delayed_ref_node *ref, u64 bytenr,
+ u64 num_bytes, u64 parent, u64 ref_root, int level,
+ int action, int for_cow)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_tree_ref *full_ref;
@@ -663,30 +684,33 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
trace_add_delayed_tree_ref(ref, full_ref, action);
- existing = tree_insert(&delayed_refs->root, &ref->rb_node);
-
+ spin_lock(&head_ref->lock);
+ existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
if (existing) {
- update_existing_ref(trans, delayed_refs, existing, ref);
+ update_existing_ref(trans, delayed_refs, head_ref, existing,
+ ref);
/*
* we've updated the existing ref, free the newly
* allocated ref
*/
kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
} else {
- delayed_refs->num_entries++;
+ atomic_inc(&delayed_refs->num_entries);
trans->delayed_ref_updates++;
}
+ spin_unlock(&head_ref->lock);
}
/*
* helper to insert a delayed data ref into the rbtree.
*/
-static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_node *ref,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 ref_root, u64 owner, u64 offset,
- int action, int for_cow)
+static noinline void
+add_delayed_data_ref(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_ref_head *head_ref,
+ struct btrfs_delayed_ref_node *ref, u64 bytenr,
+ u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
+ u64 offset, int action, int for_cow)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_data_ref *full_ref;
@@ -724,19 +748,21 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
trace_add_delayed_data_ref(ref, full_ref, action);
- existing = tree_insert(&delayed_refs->root, &ref->rb_node);
-
+ spin_lock(&head_ref->lock);
+ existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
if (existing) {
- update_existing_ref(trans, delayed_refs, existing, ref);
+ update_existing_ref(trans, delayed_refs, head_ref, existing,
+ ref);
/*
* we've updated the existing ref, free the newly
* allocated ref
*/
kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
} else {
- delayed_refs->num_entries++;
+ atomic_inc(&delayed_refs->num_entries);
trans->delayed_ref_updates++;
}
+ spin_unlock(&head_ref->lock);
}
/*
@@ -775,10 +801,10 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
- num_bytes, action, 0);
+ head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+ bytenr, num_bytes, action, 0);
- add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
+ add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
for_cow);
spin_unlock(&delayed_refs->lock);
@@ -823,10 +849,10 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
- num_bytes, action, 1);
+ head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+ bytenr, num_bytes, action, 1);
- add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
+ add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action, for_cow);
spin_unlock(&delayed_refs->lock);
@@ -869,14 +895,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *
btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
{
- struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_ref_root *delayed_refs;
delayed_refs = &trans->transaction->delayed_refs;
- ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0);
- if (ref)
- return btrfs_delayed_node_to_head(ref);
- return NULL;
+ return find_ref_head(&delayed_refs->href_root, bytenr, NULL, 0);
}
void btrfs_delayed_ref_exit(void)
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 70b962cc177d..4ba9b93022ff 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -81,7 +81,10 @@ struct btrfs_delayed_ref_head {
*/
struct mutex mutex;
- struct list_head cluster;
+ spinlock_t lock;
+ struct rb_root ref_root;
+
+ struct rb_node href_node;
struct btrfs_delayed_extent_op *extent_op;
/*
@@ -98,6 +101,7 @@ struct btrfs_delayed_ref_head {
*/
unsigned int must_insert_reserved:1;
unsigned int is_data:1;
+ unsigned int processing:1;
};
struct btrfs_delayed_tree_ref {
@@ -116,7 +120,8 @@ struct btrfs_delayed_data_ref {
};
struct btrfs_delayed_ref_root {
- struct rb_root root;
+ /* head ref rbtree */
+ struct rb_root href_root;
/* this spin lock protects the rbtree and the entries inside */
spinlock_t lock;
@@ -124,7 +129,7 @@ struct btrfs_delayed_ref_root {
/* how many delayed ref updates we've queued, used by the
* throttling code
*/
- unsigned long num_entries;
+ atomic_t num_entries;
/* total number of head nodes in tree */
unsigned long num_heads;
@@ -133,15 +138,6 @@ struct btrfs_delayed_ref_root {
unsigned long num_heads_ready;
/*
- * bumped when someone is making progress on the delayed
- * refs, so that other procs know they are just adding to
- * contention intead of helping
- */
- atomic_t procs_running_refs;
- atomic_t ref_seq;
- wait_queue_head_t wait;
-
- /*
* set when the tree is flushing before a transaction commit,
* used by the throttling code to decide if new updates need
* to be run right away
@@ -226,9 +222,9 @@ static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
mutex_unlock(&head->mutex);
}
-int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
- struct list_head *cluster, u64 search_start);
-void btrfs_release_ref_cluster(struct list_head *cluster);
+
+struct btrfs_delayed_ref_head *
+btrfs_select_ref_head(struct btrfs_trans_handle *trans);
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 2cfc3dfff64f..564c92638b20 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -102,7 +102,8 @@ no_valid_dev_replace_entry_found:
ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_replace_item);
if (item_size != sizeof(struct btrfs_dev_replace_item)) {
- pr_warn("btrfs: dev_replace entry found has unexpected size, ignore entry\n");
+ btrfs_warn(fs_info,
+ "dev_replace entry found has unexpected size, ignore entry");
goto no_valid_dev_replace_entry_found;
}
@@ -145,13 +146,19 @@ no_valid_dev_replace_entry_found:
if (!dev_replace->srcdev &&
!btrfs_test_opt(dev_root, DEGRADED)) {
ret = -EIO;
- pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n",
- src_devid);
+ btrfs_warn(fs_info,
+ "cannot mount because device replace operation is ongoing and");
+ btrfs_warn(fs_info,
+ "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?",
+ src_devid);
}
if (!dev_replace->tgtdev &&
!btrfs_test_opt(dev_root, DEGRADED)) {
ret = -EIO;
- pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n",
+ btrfs_warn(fs_info,
+ "cannot mount because device replace operation is ongoing and");
+ btrfs_warn(fs_info,
+ "tgtdev (devid %llu) is missing, need to run 'btrfs dev scan'?",
BTRFS_DEV_REPLACE_DEVID);
}
if (dev_replace->tgtdev) {
@@ -210,7 +217,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
}
ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
if (ret < 0) {
- pr_warn("btrfs: error %d while searching for dev_replace item!\n",
+ btrfs_warn(fs_info, "error %d while searching for dev_replace item!",
ret);
goto out;
}
@@ -230,7 +237,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
*/
ret = btrfs_del_item(trans, dev_root, path);
if (ret != 0) {
- pr_warn("btrfs: delete too small dev_replace item failed %d!\n",
+ btrfs_warn(fs_info, "delete too small dev_replace item failed %d!",
ret);
goto out;
}
@@ -243,7 +250,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, dev_root, path,
&key, sizeof(*ptr));
if (ret < 0) {
- pr_warn("btrfs: insert dev_replace item failed %d!\n",
+ btrfs_warn(fs_info, "insert dev_replace item failed %d!",
ret);
goto out;
}
@@ -305,7 +312,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
struct btrfs_device *src_device = NULL;
if (btrfs_fs_incompat(fs_info, RAID56)) {
- pr_warn("btrfs: dev_replace cannot yet handle RAID5/RAID6\n");
+ btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6");
return -EINVAL;
}
@@ -325,7 +332,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name,
&tgt_device);
if (ret) {
- pr_err("btrfs: target device %s is invalid!\n",
+ btrfs_err(fs_info, "target device %s is invalid!",
args->start.tgtdev_name);
mutex_unlock(&fs_info->volume_mutex);
return -EINVAL;
@@ -341,7 +348,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
}
if (tgt_device->total_bytes < src_device->total_bytes) {
- pr_err("btrfs: target device is smaller than source device!\n");
+ btrfs_err(fs_info, "target device is smaller than source device!");
ret = -EINVAL;
goto leave_no_lock;
}
@@ -366,7 +373,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
dev_replace->tgtdev = tgt_device;
printk_in_rcu(KERN_INFO
- "btrfs: dev_replace from %s (devid %llu) to %s started\n",
+ "BTRFS: dev_replace from %s (devid %llu) to %s started\n",
src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name),
src_device->devid,
@@ -489,7 +496,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
if (scrub_ret) {
printk_in_rcu(KERN_ERR
- "btrfs: btrfs_scrub_dev(%s, %llu, %s) failed %d\n",
+ "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n",
src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name),
src_device->devid,
@@ -504,7 +511,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
}
printk_in_rcu(KERN_INFO
- "btrfs: dev_replace from %s (devid %llu) to %s) finished\n",
+ "BTRFS: dev_replace from %s (devid %llu) to %s) finished\n",
src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name),
src_device->devid,
@@ -699,7 +706,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED;
dev_replace->time_stopped = get_seconds();
dev_replace->item_needs_writeback = 1;
- pr_info("btrfs: suspending dev_replace for unmount\n");
+ btrfs_info(fs_info, "suspending dev_replace for unmount");
break;
}
@@ -728,8 +735,9 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
break;
}
if (!dev_replace->tgtdev || !dev_replace->tgtdev->bdev) {
- pr_info("btrfs: cannot continue dev_replace, tgtdev is missing\n"
- "btrfs: you may cancel the operation after 'mount -o degraded'\n");
+ btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing");
+ btrfs_info(fs_info,
+ "you may cancel the operation after 'mount -o degraded'");
btrfs_dev_replace_unlock(dev_replace);
return 0;
}
@@ -755,14 +763,14 @@ static int btrfs_dev_replace_kthread(void *data)
kfree(status_args);
do_div(progress, 10);
printk_in_rcu(KERN_INFO
- "btrfs: continuing dev_replace from %s (devid %llu) to %s @%u%%\n",
- dev_replace->srcdev->missing ? "<missing disk>" :
- rcu_str_deref(dev_replace->srcdev->name),
- dev_replace->srcdev->devid,
- dev_replace->tgtdev ?
- rcu_str_deref(dev_replace->tgtdev->name) :
- "<missing target disk>",
- (unsigned int)progress);
+ "BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n",
+ dev_replace->srcdev->missing ? "<missing disk>" :
+ rcu_str_deref(dev_replace->srcdev->name),
+ dev_replace->srcdev->devid,
+ dev_replace->tgtdev ?
+ rcu_str_deref(dev_replace->tgtdev->name) :
+ "<missing target disk>",
+ (unsigned int)progress);
}
btrfs_dev_replace_continue_on_mount(fs_info);
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index c031ea3fd70f..a0691df5dcea 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -261,7 +261,7 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
* see if there is room in the item to insert this
* name
*/
- data_size = sizeof(*di) + name_len + sizeof(struct btrfs_item);
+ data_size = sizeof(*di) + name_len;
leaf = path->nodes[0];
slot = path->slots[0];
if (data_size + btrfs_item_size_nr(leaf, slot) +
@@ -459,7 +459,7 @@ int verify_dir_item(struct btrfs_root *root,
u8 type = btrfs_dir_type(leaf, dir_item);
if (type >= BTRFS_FT_MAX) {
- printk(KERN_CRIT "btrfs: invalid dir item type: %d\n",
+ btrfs_crit(root->fs_info, "invalid dir item type: %d",
(int)type);
return 1;
}
@@ -468,7 +468,7 @@ int verify_dir_item(struct btrfs_root *root,
namelen = XATTR_NAME_MAX;
if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
- printk(KERN_CRIT "btrfs: invalid dir item name len: %u\n",
+ btrfs_crit(root->fs_info, "invalid dir item name len: %u",
(unsigned)btrfs_dir_data_len(leaf, dir_item));
return 1;
}
@@ -476,7 +476,7 @@ int verify_dir_item(struct btrfs_root *root,
/* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
if ((btrfs_dir_data_len(leaf, dir_item) +
btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) {
- printk(KERN_CRIT "btrfs: invalid dir item name + data len: %u + %u\n",
+ btrfs_crit(root->fs_info, "invalid dir item name + data len: %u + %u",
(unsigned)btrfs_dir_name_len(leaf, dir_item),
(unsigned)btrfs_dir_data_len(leaf, dir_item));
return 1;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8072cfa8a3b1..81ea55314b1f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -26,7 +26,6 @@
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
-#include <linux/crc32c.h>
#include <linux/slab.h>
#include <linux/migrate.h>
#include <linux/ratelimit.h>
@@ -35,6 +34,7 @@
#include <asm/unaligned.h>
#include "ctree.h"
#include "disk-io.h"
+#include "hash.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "volumes.h"
@@ -48,6 +48,7 @@
#include "rcu-string.h"
#include "dev-replace.h"
#include "raid56.h"
+#include "sysfs.h"
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
@@ -243,7 +244,7 @@ out:
u32 btrfs_csum_data(char *data, u32 seed, size_t len)
{
- return crc32c(seed, data, len);
+ return btrfs_crc32c(seed, data, len);
}
void btrfs_csum_final(u32 crc, char *result)
@@ -299,11 +300,11 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
memcpy(&found, result, csum_size);
read_extent_buffer(buf, &val, 0, csum_size);
- printk_ratelimited(KERN_INFO "btrfs: %s checksum verify "
- "failed on %llu wanted %X found %X "
- "level %d\n",
- root->fs_info->sb->s_id, buf->start,
- val, found, btrfs_header_level(buf));
+ printk_ratelimited(KERN_INFO
+ "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
+ "level %d\n",
+ root->fs_info->sb->s_id, buf->start,
+ val, found, btrfs_header_level(buf));
if (result != (char *)&inline_result)
kfree(result);
return 1;
@@ -382,13 +383,14 @@ static int btrfs_check_super_csum(char *raw_disk_sb)
ret = 1;
if (ret && btrfs_super_generation(disk_sb) < 10) {
- printk(KERN_WARNING "btrfs: super block crcs don't match, older mkfs detected\n");
+ printk(KERN_WARNING
+ "BTRFS: super block crcs don't match, older mkfs detected\n");
ret = 0;
}
}
if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
- printk(KERN_ERR "btrfs: unsupported checksum algorithm %u\n",
+ printk(KERN_ERR "BTRFS: unsupported checksum algorithm %u\n",
csum_type);
ret = 1;
}
@@ -464,13 +466,10 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
{
- struct extent_io_tree *tree;
u64 start = page_offset(page);
u64 found_start;
struct extent_buffer *eb;
- tree = &BTRFS_I(page->mapping->host)->io_tree;
-
eb = (struct extent_buffer *)page->private;
if (page != eb->pages[0])
return 0;
@@ -500,8 +499,8 @@ static int check_tree_block_fsid(struct btrfs_root *root,
}
#define CORRUPT(reason, eb, root, slot) \
- printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
- "root=%llu, slot=%d\n", reason, \
+ btrfs_crit(root->fs_info, "corrupt leaf, %s: block=%llu," \
+ "root=%llu, slot=%d", reason, \
btrfs_header_bytenr(eb), root->objectid, slot)
static noinline int check_leaf(struct btrfs_root *root,
@@ -569,7 +568,6 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
u64 phy_offset, struct page *page,
u64 start, u64 end, int mirror)
{
- struct extent_io_tree *tree;
u64 found_start;
int found_level;
struct extent_buffer *eb;
@@ -580,7 +578,6 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
if (!page->private)
goto out;
- tree = &BTRFS_I(page->mapping->host)->io_tree;
eb = (struct extent_buffer *)page->private;
/* the pending IO might have been the only thing that kept this buffer
@@ -600,21 +597,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
- printk_ratelimited(KERN_INFO "btrfs bad tree block start "
+ printk_ratelimited(KERN_INFO "BTRFS: bad tree block start "
"%llu %llu\n",
found_start, eb->start);
ret = -EIO;
goto err;
}
if (check_tree_block_fsid(root, eb)) {
- printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
+ printk_ratelimited(KERN_INFO "BTRFS: bad fsid on block %llu\n",
eb->start);
ret = -EIO;
goto err;
}
found_level = btrfs_header_level(eb);
if (found_level >= BTRFS_MAX_LEVEL) {
- btrfs_info(root->fs_info, "bad tree block level %d\n",
+ btrfs_info(root->fs_info, "bad tree block level %d",
(int)btrfs_header_level(eb));
ret = -EIO;
goto err;
@@ -842,20 +839,17 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
static int btree_csum_one_bio(struct bio *bio)
{
- struct bio_vec *bvec = bio->bi_io_vec;
- int bio_index = 0;
+ struct bio_vec *bvec;
struct btrfs_root *root;
- int ret = 0;
+ int i, ret = 0;
- WARN_ON(bio->bi_vcnt <= 0);
- while (bio_index < bio->bi_vcnt) {
+ bio_for_each_segment_all(bvec, bio, i) {
root = BTRFS_I(bvec->bv_page->mapping->host)->root;
ret = csum_dirty_buffer(root, bvec->bv_page);
if (ret)
break;
- bio_index++;
- bvec++;
}
+
return ret;
}
@@ -967,11 +961,9 @@ static int btree_migratepage(struct address_space *mapping,
static int btree_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- struct extent_io_tree *tree;
struct btrfs_fs_info *fs_info;
int ret;
- tree = &BTRFS_I(mapping->host)->io_tree;
if (wbc->sync_mode == WB_SYNC_NONE) {
if (wbc->for_kupdate)
@@ -1010,8 +1002,9 @@ static void btree_invalidatepage(struct page *page, unsigned int offset,
extent_invalidatepage(tree, page, offset);
btree_releasepage(page, GFP_NOFS);
if (PagePrivate(page)) {
- printk(KERN_WARNING "btrfs warning page private not zero "
- "on page %llu\n", (unsigned long long)page_offset(page));
+ btrfs_warn(BTRFS_I(page->mapping->host)->root->fs_info,
+ "page private not zero on page %llu",
+ (unsigned long long)page_offset(page));
ClearPagePrivate(page);
set_page_private(page, 0);
page_cache_release(page);
@@ -1095,21 +1088,13 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize)
{
- struct inode *btree_inode = root->fs_info->btree_inode;
- struct extent_buffer *eb;
- eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr);
- return eb;
+ return find_extent_buffer(root->fs_info, bytenr);
}
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize)
{
- struct inode *btree_inode = root->fs_info->btree_inode;
- struct extent_buffer *eb;
-
- eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
- bytenr, blocksize);
- return eb;
+ return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
}
@@ -1273,7 +1258,6 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root;
struct btrfs_key key;
int ret = 0;
- u64 bytenr;
uuid_le uuid;
root = btrfs_alloc_root(fs_info);
@@ -1295,7 +1279,6 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
goto fail;
}
- bytenr = leaf->start;
memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_bytenr(leaf, leaf->start);
btrfs_set_header_generation(leaf, trans->transid);
@@ -1616,7 +1599,8 @@ again:
if (ret)
goto fail;
- ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
+ ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID,
+ location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL);
if (ret < 0)
goto fail;
if (ret == 0)
@@ -1684,18 +1668,16 @@ static void end_workqueue_fn(struct btrfs_work *work)
{
struct bio *bio;
struct end_io_wq *end_io_wq;
- struct btrfs_fs_info *fs_info;
int error;
end_io_wq = container_of(work, struct end_io_wq, work);
bio = end_io_wq->bio;
- fs_info = end_io_wq->info;
error = end_io_wq->error;
bio->bi_private = end_io_wq->private;
bio->bi_end_io = end_io_wq->end_io;
kfree(end_io_wq);
- bio_endio(bio, error);
+ bio_endio_nodec(bio, error);
}
static int cleaner_kthread(void *arg)
@@ -2080,6 +2062,12 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info)
for (i = 0; i < ret; i++)
btrfs_drop_and_free_fs_root(fs_info, gang[i]);
}
+
+ if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+ btrfs_free_log_root_tree(NULL, fs_info);
+ btrfs_destroy_pinned_extent(fs_info->tree_root,
+ fs_info->pinned_extents);
+ }
}
int open_ctree(struct super_block *sb,
@@ -2154,6 +2142,7 @@ int open_ctree(struct super_block *sb,
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
+ INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->delayed_iputs);
@@ -2167,6 +2156,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->free_chunk_lock);
spin_lock_init(&fs_info->tree_mod_seq_lock);
spin_lock_init(&fs_info->super_lock);
+ spin_lock_init(&fs_info->buffer_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->reloc_mutex);
seqlock_init(&fs_info->profiles_lock);
@@ -2198,7 +2188,7 @@ int open_ctree(struct super_block *sb,
fs_info->free_chunk_space = 0;
fs_info->tree_mod_log = RB_ROOT;
fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
-
+ fs_info->avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64);
/* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
spin_lock_init(&fs_info->reada_lock);
@@ -2337,7 +2327,7 @@ int open_ctree(struct super_block *sb,
* Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
*/
if (btrfs_check_super_csum(bh->b_data)) {
- printk(KERN_ERR "btrfs: superblock checksum mismatch\n");
+ printk(KERN_ERR "BTRFS: superblock checksum mismatch\n");
err = -EINVAL;
goto fail_alloc;
}
@@ -2356,7 +2346,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
if (ret) {
- printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
+ printk(KERN_ERR "BTRFS: superblock contains fatal errors\n");
err = -EINVAL;
goto fail_alloc;
}
@@ -2421,7 +2411,7 @@ int open_ctree(struct super_block *sb,
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
- printk(KERN_ERR "btrfs: has skinny extents\n");
+ printk(KERN_ERR "BTRFS: has skinny extents\n");
/*
* flag our filesystem as having big metadata blocks if
@@ -2429,7 +2419,7 @@ int open_ctree(struct super_block *sb,
*/
if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) {
if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
- printk(KERN_INFO "btrfs flagging fs with big metadata feature\n");
+ printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n");
features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
}
@@ -2446,7 +2436,7 @@ int open_ctree(struct super_block *sb,
*/
if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
(sectorsize != leafsize)) {
- printk(KERN_WARNING "btrfs: unequal leaf/node/sector sizes "
+ printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes "
"are not allowed for mixed block groups on %s\n",
sb->s_id);
goto fail_alloc;
@@ -2583,12 +2573,12 @@ int open_ctree(struct super_block *sb,
sb->s_blocksize_bits = blksize_bits(sectorsize);
if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
- printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
+ printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id);
goto fail_sb_buffer;
}
if (sectorsize != PAGE_SIZE) {
- printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) "
+ printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) "
"found on %s\n", (unsigned long)sectorsize, sb->s_id);
goto fail_sb_buffer;
}
@@ -2597,7 +2587,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_read_sys_array(tree_root);
mutex_unlock(&fs_info->chunk_mutex);
if (ret) {
- printk(KERN_WARNING "btrfs: failed to read the system "
+ printk(KERN_WARNING "BTRFS: failed to read the system "
"array on %s\n", sb->s_id);
goto fail_sb_buffer;
}
@@ -2614,7 +2604,7 @@ int open_ctree(struct super_block *sb,
blocksize, generation);
if (!chunk_root->node ||
!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
- printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
+ printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n",
sb->s_id);
goto fail_tree_roots;
}
@@ -2626,7 +2616,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_read_chunk_tree(chunk_root);
if (ret) {
- printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n",
+ printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n",
sb->s_id);
goto fail_tree_roots;
}
@@ -2638,7 +2628,7 @@ int open_ctree(struct super_block *sb,
btrfs_close_extra_devices(fs_info, fs_devices, 0);
if (!fs_devices->latest_bdev) {
- printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
+ printk(KERN_CRIT "BTRFS: failed to read devices on %s\n",
sb->s_id);
goto fail_tree_roots;
}
@@ -2653,7 +2643,7 @@ retry_root_backup:
blocksize, generation);
if (!tree_root->node ||
!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
- printk(KERN_WARNING "btrfs: failed to read tree root on %s\n",
+ printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
sb->s_id);
goto recovery_tree_root;
@@ -2724,50 +2714,56 @@ retry_root_backup:
ret = btrfs_recover_balance(fs_info);
if (ret) {
- printk(KERN_WARNING "btrfs: failed to recover balance\n");
+ printk(KERN_WARNING "BTRFS: failed to recover balance\n");
goto fail_block_groups;
}
ret = btrfs_init_dev_stats(fs_info);
if (ret) {
- printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
+ printk(KERN_ERR "BTRFS: failed to init dev_stats: %d\n",
ret);
goto fail_block_groups;
}
ret = btrfs_init_dev_replace(fs_info);
if (ret) {
- pr_err("btrfs: failed to init dev_replace: %d\n", ret);
+ pr_err("BTRFS: failed to init dev_replace: %d\n", ret);
goto fail_block_groups;
}
btrfs_close_extra_devices(fs_info, fs_devices, 1);
- ret = btrfs_init_space_info(fs_info);
+ ret = btrfs_sysfs_add_one(fs_info);
if (ret) {
- printk(KERN_ERR "Failed to initial space info: %d\n", ret);
+ pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
goto fail_block_groups;
}
+ ret = btrfs_init_space_info(fs_info);
+ if (ret) {
+ printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret);
+ goto fail_sysfs;
+ }
+
ret = btrfs_read_block_groups(extent_root);
if (ret) {
- printk(KERN_ERR "Failed to read block groups: %d\n", ret);
- goto fail_block_groups;
+ printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret);
+ goto fail_sysfs;
}
fs_info->num_tolerated_disk_barrier_failures =
btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
if (fs_info->fs_devices->missing_devices >
fs_info->num_tolerated_disk_barrier_failures &&
!(sb->s_flags & MS_RDONLY)) {
- printk(KERN_WARNING
- "Btrfs: too many missing devices, writeable mount is not allowed\n");
- goto fail_block_groups;
+ printk(KERN_WARNING "BTRFS: "
+ "too many missing devices, writeable mount is not allowed\n");
+ goto fail_sysfs;
}
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
"btrfs-cleaner");
if (IS_ERR(fs_info->cleaner_kthread))
- goto fail_block_groups;
+ goto fail_sysfs;
fs_info->transaction_kthread = kthread_run(transaction_kthread,
tree_root,
@@ -2778,11 +2774,15 @@ retry_root_backup:
if (!btrfs_test_opt(tree_root, SSD) &&
!btrfs_test_opt(tree_root, NOSSD) &&
!fs_info->fs_devices->rotating) {
- printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD "
+ printk(KERN_INFO "BTRFS: detected SSD devices, enabling SSD "
"mode\n");
btrfs_set_opt(fs_info->mount_opt, SSD);
}
+ /* Set the real inode map cache flag */
+ if (btrfs_test_opt(tree_root, CHANGE_INODE_CACHE))
+ btrfs_set_opt(tree_root->fs_info->mount_opt, INODE_MAP_CACHE);
+
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) {
ret = btrfsic_mount(tree_root, fs_devices,
@@ -2791,7 +2791,7 @@ retry_root_backup:
1 : 0,
fs_info->check_integrity_print_mask);
if (ret)
- printk(KERN_WARNING "btrfs: failed to initialize"
+ printk(KERN_WARNING "BTRFS: failed to initialize"
" integrity check module %s\n", sb->s_id);
}
#endif
@@ -2804,7 +2804,7 @@ retry_root_backup:
u64 bytenr = btrfs_super_log_root(disk_super);
if (fs_devices->rw_devices == 0) {
- printk(KERN_WARNING "Btrfs log replay required "
+ printk(KERN_WARNING "BTRFS: log replay required "
"on RO media\n");
err = -EIO;
goto fail_qgroup;
@@ -2827,7 +2827,7 @@ retry_root_backup:
generation + 1);
if (!log_tree_root->node ||
!extent_buffer_uptodate(log_tree_root->node)) {
- printk(KERN_ERR "btrfs: failed to read log tree\n");
+ printk(KERN_ERR "BTRFS: failed to read log tree\n");
free_extent_buffer(log_tree_root->node);
kfree(log_tree_root);
goto fail_trans_kthread;
@@ -2861,7 +2861,7 @@ retry_root_backup:
ret = btrfs_recover_relocation(tree_root);
if (ret < 0) {
printk(KERN_WARNING
- "btrfs: failed to recover relocation\n");
+ "BTRFS: failed to recover relocation\n");
err = -EINVAL;
goto fail_qgroup;
}
@@ -2891,14 +2891,14 @@ retry_root_backup:
ret = btrfs_resume_balance_async(fs_info);
if (ret) {
- printk(KERN_WARNING "btrfs: failed to resume balance\n");
+ printk(KERN_WARNING "BTRFS: failed to resume balance\n");
close_ctree(tree_root);
return ret;
}
ret = btrfs_resume_dev_replace_async(fs_info);
if (ret) {
- pr_warn("btrfs: failed to resume dev_replace\n");
+ pr_warn("BTRFS: failed to resume dev_replace\n");
close_ctree(tree_root);
return ret;
}
@@ -2906,20 +2906,20 @@ retry_root_backup:
btrfs_qgroup_rescan_resume(fs_info);
if (create_uuid_tree) {
- pr_info("btrfs: creating UUID tree\n");
+ pr_info("BTRFS: creating UUID tree\n");
ret = btrfs_create_uuid_tree(fs_info);
if (ret) {
- pr_warn("btrfs: failed to create the UUID tree %d\n",
+ pr_warn("BTRFS: failed to create the UUID tree %d\n",
ret);
close_ctree(tree_root);
return ret;
}
} else if (check_uuid_tree ||
btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) {
- pr_info("btrfs: checking UUID tree\n");
+ pr_info("BTRFS: checking UUID tree\n");
ret = btrfs_check_uuid_tree(fs_info);
if (ret) {
- pr_warn("btrfs: failed to check the UUID tree %d\n",
+ pr_warn("BTRFS: failed to check the UUID tree %d\n",
ret);
close_ctree(tree_root);
return ret;
@@ -2945,6 +2945,9 @@ fail_cleaner:
*/
filemap_write_and_wait(fs_info->btree_inode->i_mapping);
+fail_sysfs:
+ btrfs_sysfs_remove_one(fs_info);
+
fail_block_groups:
btrfs_put_block_group_cache(fs_info);
btrfs_free_block_groups(fs_info);
@@ -3000,7 +3003,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
struct btrfs_device *device = (struct btrfs_device *)
bh->b_private;
- printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to "
+ printk_ratelimited_in_rcu(KERN_WARNING "BTRFS: lost page write due to "
"I/O error on %s\n",
rcu_str_deref(device->name));
/* note, we dont' set_buffer_write_io_error because we have
@@ -3119,7 +3122,7 @@ static int write_dev_supers(struct btrfs_device *device,
bh = __getblk(device->bdev, bytenr / 4096,
BTRFS_SUPER_INFO_SIZE);
if (!bh) {
- printk(KERN_ERR "btrfs: couldn't get super "
+ printk(KERN_ERR "BTRFS: couldn't get super "
"buffer head for bytenr %Lu\n", bytenr);
errors++;
continue;
@@ -3140,7 +3143,10 @@ static int write_dev_supers(struct btrfs_device *device,
* we fua the first super. The others we allow
* to go down lazy.
*/
- ret = btrfsic_submit_bh(WRITE_FUA, bh);
+ if (i == 0)
+ ret = btrfsic_submit_bh(WRITE_FUA, bh);
+ else
+ ret = btrfsic_submit_bh(WRITE_SYNC, bh);
if (ret)
errors++;
}
@@ -3186,7 +3192,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
wait_for_completion(&device->flush_wait);
if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
- printk_in_rcu("btrfs: disabling barriers on dev %s\n",
+ printk_in_rcu("BTRFS: disabling barriers on dev %s\n",
rcu_str_deref(device->name));
device->nobarriers = 1;
} else if (!bio_flagged(bio, BIO_UPTODATE)) {
@@ -3407,7 +3413,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
total_errors++;
}
if (total_errors > max_errors) {
- printk(KERN_ERR "btrfs: %d errors while writing supers\n",
+ btrfs_err(root->fs_info, "%d errors while writing supers",
total_errors);
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
@@ -3455,10 +3461,8 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
if (btrfs_root_refs(&root->root_item) == 0)
synchronize_srcu(&fs_info->subvol_srcu);
- if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+ if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
btrfs_free_log(NULL, root);
- btrfs_free_log_root_tree(NULL, fs_info);
- }
__btrfs_remove_free_space_cache(root->free_ino_pinned);
__btrfs_remove_free_space_cache(root->free_ino_ctl);
@@ -3563,14 +3567,12 @@ int close_ctree(struct btrfs_root *root)
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root);
if (ret)
- printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
+ btrfs_err(root->fs_info, "commit super ret %d", ret);
}
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
btrfs_error_commit_super(root);
- btrfs_put_block_group_cache(fs_info);
-
kthread_stop(fs_info->transaction_kthread);
kthread_stop(fs_info->cleaner_kthread);
@@ -3580,12 +3582,16 @@ int close_ctree(struct btrfs_root *root)
btrfs_free_qgroup_config(root->fs_info);
if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
- printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n",
+ btrfs_info(root->fs_info, "at unmount delalloc count %lld",
percpu_counter_sum(&fs_info->delalloc_bytes));
}
+ btrfs_sysfs_remove_one(fs_info);
+
del_fs_roots(fs_info);
+ btrfs_put_block_group_cache(fs_info);
+
btrfs_free_block_groups(fs_info);
btrfs_stop_all_workers(fs_info);
@@ -3803,55 +3809,54 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
delayed_refs = &trans->delayed_refs;
spin_lock(&delayed_refs->lock);
- if (delayed_refs->num_entries == 0) {
+ if (atomic_read(&delayed_refs->num_entries) == 0) {
spin_unlock(&delayed_refs->lock);
- printk(KERN_INFO "delayed_refs has NO entry\n");
+ btrfs_info(root->fs_info, "delayed_refs has NO entry");
return ret;
}
- while ((node = rb_first(&delayed_refs->root)) != NULL) {
- struct btrfs_delayed_ref_head *head = NULL;
+ while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
+ struct btrfs_delayed_ref_head *head;
bool pin_bytes = false;
- ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- atomic_set(&ref->refs, 1);
- if (btrfs_delayed_ref_is_head(ref)) {
-
- head = btrfs_delayed_node_to_head(ref);
- if (!mutex_trylock(&head->mutex)) {
- atomic_inc(&ref->refs);
- spin_unlock(&delayed_refs->lock);
-
- /* Need to wait for the delayed ref to run */
- mutex_lock(&head->mutex);
- mutex_unlock(&head->mutex);
- btrfs_put_delayed_ref(ref);
-
- spin_lock(&delayed_refs->lock);
- continue;
- }
+ head = rb_entry(node, struct btrfs_delayed_ref_head,
+ href_node);
+ if (!mutex_trylock(&head->mutex)) {
+ atomic_inc(&head->node.refs);
+ spin_unlock(&delayed_refs->lock);
- if (head->must_insert_reserved)
- pin_bytes = true;
- btrfs_free_delayed_extent_op(head->extent_op);
- delayed_refs->num_heads--;
- if (list_empty(&head->cluster))
- delayed_refs->num_heads_ready--;
- list_del_init(&head->cluster);
- }
-
- ref->in_tree = 0;
- rb_erase(&ref->rb_node, &delayed_refs->root);
- delayed_refs->num_entries--;
- spin_unlock(&delayed_refs->lock);
- if (head) {
- if (pin_bytes)
- btrfs_pin_extent(root, ref->bytenr,
- ref->num_bytes, 1);
+ mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
+ btrfs_put_delayed_ref(&head->node);
+ spin_lock(&delayed_refs->lock);
+ continue;
+ }
+ spin_lock(&head->lock);
+ while ((node = rb_first(&head->ref_root)) != NULL) {
+ ref = rb_entry(node, struct btrfs_delayed_ref_node,
+ rb_node);
+ ref->in_tree = 0;
+ rb_erase(&ref->rb_node, &head->ref_root);
+ atomic_dec(&delayed_refs->num_entries);
+ btrfs_put_delayed_ref(ref);
}
- btrfs_put_delayed_ref(ref);
+ if (head->must_insert_reserved)
+ pin_bytes = true;
+ btrfs_free_delayed_extent_op(head->extent_op);
+ delayed_refs->num_heads--;
+ if (head->processing == 0)
+ delayed_refs->num_heads_ready--;
+ atomic_dec(&delayed_refs->num_entries);
+ head->node.in_tree = 0;
+ rb_erase(&head->href_node, &delayed_refs->href_root);
+ spin_unlock(&head->lock);
+ spin_unlock(&delayed_refs->lock);
+ mutex_unlock(&head->mutex);
+ if (pin_bytes)
+ btrfs_pin_extent(root, head->node.bytenr,
+ head->node.num_bytes, 1);
+ btrfs_put_delayed_ref(&head->node);
cond_resched();
spin_lock(&delayed_refs->lock);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9c01509dd8ab..32312e09f0f5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -35,6 +35,7 @@
#include "locking.h"
#include "free-space-cache.h"
#include "math.h"
+#include "sysfs.h"
#undef SCRAMBLE_DELAYED_REFS
@@ -441,7 +442,8 @@ next:
if (ret)
break;
- if (need_resched()) {
+ if (need_resched() ||
+ rwsem_is_contended(&fs_info->extent_commit_sem)) {
caching_ctl->progress = last;
btrfs_release_path(path);
up_read(&fs_info->extent_commit_sem);
@@ -855,12 +857,14 @@ again:
btrfs_put_delayed_ref(&head->node);
goto search_again;
}
+ spin_lock(&head->lock);
if (head->extent_op && head->extent_op->update_flags)
extent_flags |= head->extent_op->flags_to_set;
else
BUG_ON(num_refs == 0);
num_refs += head->node.ref_mod;
+ spin_unlock(&head->lock);
mutex_unlock(&head->mutex);
}
spin_unlock(&delayed_refs->lock);
@@ -1070,11 +1074,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
__le64 lenum;
lenum = cpu_to_le64(root_objectid);
- high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
+ high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
lenum = cpu_to_le64(owner);
- low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
+ low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
lenum = cpu_to_le64(offset);
- low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
+ low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
return ((u64)high_crc << 31) ^ (u64)low_crc;
}
@@ -2285,64 +2289,62 @@ static noinline struct btrfs_delayed_ref_node *
select_delayed_ref(struct btrfs_delayed_ref_head *head)
{
struct rb_node *node;
- struct btrfs_delayed_ref_node *ref;
- int action = BTRFS_ADD_DELAYED_REF;
-again:
+ struct btrfs_delayed_ref_node *ref, *last = NULL;;
+
/*
* select delayed ref of type BTRFS_ADD_DELAYED_REF first.
* this prevents ref count from going down to zero when
* there still are pending delayed ref.
*/
- node = rb_prev(&head->node.rb_node);
- while (1) {
- if (!node)
- break;
+ node = rb_first(&head->ref_root);
+ while (node) {
ref = rb_entry(node, struct btrfs_delayed_ref_node,
rb_node);
- if (ref->bytenr != head->node.bytenr)
- break;
- if (ref->action == action)
+ if (ref->action == BTRFS_ADD_DELAYED_REF)
return ref;
- node = rb_prev(node);
- }
- if (action == BTRFS_ADD_DELAYED_REF) {
- action = BTRFS_DROP_DELAYED_REF;
- goto again;
+ else if (last == NULL)
+ last = ref;
+ node = rb_next(node);
}
- return NULL;
+ return last;
}
/*
* Returns 0 on success or if called with an already aborted transaction.
* Returns -ENOMEM or -EIO on failure and will abort the transaction.
*/
-static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct list_head *cluster)
+static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ unsigned long nr)
{
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_ref_head *locked_ref = NULL;
struct btrfs_delayed_extent_op *extent_op;
struct btrfs_fs_info *fs_info = root->fs_info;
+ ktime_t start = ktime_get();
int ret;
- int count = 0;
+ unsigned long count = 0;
+ unsigned long actual_count = 0;
int must_insert_reserved = 0;
delayed_refs = &trans->transaction->delayed_refs;
while (1) {
if (!locked_ref) {
- /* pick a new head ref from the cluster list */
- if (list_empty(cluster))
+ if (count >= nr)
break;
- locked_ref = list_entry(cluster->next,
- struct btrfs_delayed_ref_head, cluster);
+ spin_lock(&delayed_refs->lock);
+ locked_ref = btrfs_select_ref_head(trans);
+ if (!locked_ref) {
+ spin_unlock(&delayed_refs->lock);
+ break;
+ }
/* grab the lock that says we are going to process
* all the refs for this head */
ret = btrfs_delayed_ref_lock(trans, locked_ref);
-
+ spin_unlock(&delayed_refs->lock);
/*
* we may have dropped the spin lock to get the head
* mutex lock, and that might have given someone else
@@ -2363,6 +2365,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
* finish. If we merged anything we need to re-loop so we can
* get a good ref.
*/
+ spin_lock(&locked_ref->lock);
btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
locked_ref);
@@ -2374,17 +2377,15 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
if (ref && ref->seq &&
btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
- /*
- * there are still refs with lower seq numbers in the
- * process of being added. Don't run this ref yet.
- */
- list_del_init(&locked_ref->cluster);
+ spin_unlock(&locked_ref->lock);
btrfs_delayed_ref_unlock(locked_ref);
- locked_ref = NULL;
+ spin_lock(&delayed_refs->lock);
+ locked_ref->processing = 0;
delayed_refs->num_heads_ready++;
spin_unlock(&delayed_refs->lock);
+ locked_ref = NULL;
cond_resched();
- spin_lock(&delayed_refs->lock);
+ count++;
continue;
}
@@ -2399,6 +2400,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
locked_ref->extent_op = NULL;
if (!ref) {
+
+
/* All delayed refs have been processed, Go ahead
* and send the head node to run_one_delayed_ref,
* so that any accounting fixes can happen
@@ -2411,8 +2414,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
}
if (extent_op) {
- spin_unlock(&delayed_refs->lock);
-
+ spin_unlock(&locked_ref->lock);
ret = run_delayed_extent_op(trans, root,
ref, extent_op);
btrfs_free_delayed_extent_op(extent_op);
@@ -2426,19 +2428,39 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
*/
if (must_insert_reserved)
locked_ref->must_insert_reserved = 1;
+ locked_ref->processing = 0;
btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
- spin_lock(&delayed_refs->lock);
btrfs_delayed_ref_unlock(locked_ref);
return ret;
}
+ continue;
+ }
- goto next;
+ /*
+ * Need to drop our head ref lock and re-aqcuire the
+ * delayed ref lock and then re-check to make sure
+ * nobody got added.
+ */
+ spin_unlock(&locked_ref->lock);
+ spin_lock(&delayed_refs->lock);
+ spin_lock(&locked_ref->lock);
+ if (rb_first(&locked_ref->ref_root)) {
+ spin_unlock(&locked_ref->lock);
+ spin_unlock(&delayed_refs->lock);
+ continue;
}
+ ref->in_tree = 0;
+ delayed_refs->num_heads--;
+ rb_erase(&locked_ref->href_node,
+ &delayed_refs->href_root);
+ spin_unlock(&delayed_refs->lock);
+ } else {
+ actual_count++;
+ ref->in_tree = 0;
+ rb_erase(&ref->rb_node, &locked_ref->ref_root);
}
+ atomic_dec(&delayed_refs->num_entries);
- ref->in_tree = 0;
- rb_erase(&ref->rb_node, &delayed_refs->root);
- delayed_refs->num_entries--;
if (!btrfs_delayed_ref_is_head(ref)) {
/*
* when we play the delayed ref, also correct the
@@ -2455,20 +2477,18 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
default:
WARN_ON(1);
}
- } else {
- list_del_init(&locked_ref->cluster);
}
- spin_unlock(&delayed_refs->lock);
+ spin_unlock(&locked_ref->lock);
ret = run_one_delayed_ref(trans, root, ref, extent_op,
must_insert_reserved);
btrfs_free_delayed_extent_op(extent_op);
if (ret) {
+ locked_ref->processing = 0;
btrfs_delayed_ref_unlock(locked_ref);
btrfs_put_delayed_ref(ref);
btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
- spin_lock(&delayed_refs->lock);
return ret;
}
@@ -2484,11 +2504,29 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
}
btrfs_put_delayed_ref(ref);
count++;
-next:
cond_resched();
+ }
+
+ /*
+ * We don't want to include ref heads since we can have empty ref heads
+ * and those will drastically skew our runtime down since we just do
+ * accounting, no actual extent tree updates.
+ */
+ if (actual_count > 0) {
+ u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
+ u64 avg;
+
+ /*
+ * We weigh the current average higher than our current runtime
+ * to avoid large swings in the average.
+ */
spin_lock(&delayed_refs->lock);
+ avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
+ avg = div64_u64(avg, 4);
+ fs_info->avg_delayed_ref_runtime = avg;
+ spin_unlock(&delayed_refs->lock);
}
- return count;
+ return 0;
}
#ifdef SCRAMBLE_DELAYED_REFS
@@ -2570,16 +2608,6 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
return ret;
}
-static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq,
- int count)
-{
- int val = atomic_read(&delayed_refs->ref_seq);
-
- if (val < seq || val >= seq + count)
- return 1;
- return 0;
-}
-
static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
{
u64 num_bytes;
@@ -2596,7 +2624,7 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
}
-int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
+int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_block_rsv *global_rsv;
@@ -2625,6 +2653,22 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
return ret;
}
+int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ u64 num_entries =
+ atomic_read(&trans->transaction->delayed_refs.num_entries);
+ u64 avg_runtime;
+
+ smp_mb();
+ avg_runtime = fs_info->avg_delayed_ref_runtime;
+ if (num_entries * avg_runtime >= NSEC_PER_SEC)
+ return 1;
+
+ return btrfs_check_space_for_delayed_refs(trans, root);
+}
+
/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
@@ -2640,13 +2684,10 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
{
struct rb_node *node;
struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_delayed_ref_node *ref;
- struct list_head cluster;
+ struct btrfs_delayed_ref_head *head;
int ret;
- u64 delayed_start;
int run_all = count == (unsigned long)-1;
int run_most = 0;
- int loops;
/* We'll clean this up in btrfs_cleanup_transaction */
if (trans->aborted)
@@ -2658,130 +2699,40 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
delayed_refs = &trans->transaction->delayed_refs;
- INIT_LIST_HEAD(&cluster);
if (count == 0) {
- count = delayed_refs->num_entries * 2;
+ count = atomic_read(&delayed_refs->num_entries) * 2;
run_most = 1;
}
- if (!run_all && !run_most) {
- int old;
- int seq = atomic_read(&delayed_refs->ref_seq);
-
-progress:
- old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
- if (old) {
- DEFINE_WAIT(__wait);
- if (delayed_refs->flushing ||
- !btrfs_should_throttle_delayed_refs(trans, root))
- return 0;
-
- prepare_to_wait(&delayed_refs->wait, &__wait,
- TASK_UNINTERRUPTIBLE);
-
- old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
- if (old) {
- schedule();
- finish_wait(&delayed_refs->wait, &__wait);
-
- if (!refs_newer(delayed_refs, seq, 256))
- goto progress;
- else
- return 0;
- } else {
- finish_wait(&delayed_refs->wait, &__wait);
- goto again;
- }
- }
-
- } else {
- atomic_inc(&delayed_refs->procs_running_refs);
- }
-
again:
- loops = 0;
- spin_lock(&delayed_refs->lock);
-
#ifdef SCRAMBLE_DELAYED_REFS
delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
#endif
-
- while (1) {
- if (!(run_all || run_most) &&
- !btrfs_should_throttle_delayed_refs(trans, root))
- break;
-
- /*
- * go find something we can process in the rbtree. We start at
- * the beginning of the tree, and then build a cluster
- * of refs to process starting at the first one we are able to
- * lock
- */
- delayed_start = delayed_refs->run_delayed_start;
- ret = btrfs_find_ref_cluster(trans, &cluster,
- delayed_refs->run_delayed_start);
- if (ret)
- break;
-
- ret = run_clustered_refs(trans, root, &cluster);
- if (ret < 0) {
- btrfs_release_ref_cluster(&cluster);
- spin_unlock(&delayed_refs->lock);
- btrfs_abort_transaction(trans, root, ret);
- atomic_dec(&delayed_refs->procs_running_refs);
- wake_up(&delayed_refs->wait);
- return ret;
- }
-
- atomic_add(ret, &delayed_refs->ref_seq);
-
- count -= min_t(unsigned long, ret, count);
-
- if (count == 0)
- break;
-
- if (delayed_start >= delayed_refs->run_delayed_start) {
- if (loops == 0) {
- /*
- * btrfs_find_ref_cluster looped. let's do one
- * more cycle. if we don't run any delayed ref
- * during that cycle (because we can't because
- * all of them are blocked), bail out.
- */
- loops = 1;
- } else {
- /*
- * no runnable refs left, stop trying
- */
- BUG_ON(run_all);
- break;
- }
- }
- if (ret) {
- /* refs were run, let's reset staleness detection */
- loops = 0;
- }
+ ret = __btrfs_run_delayed_refs(trans, root, count);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
}
if (run_all) {
- if (!list_empty(&trans->new_bgs)) {
- spin_unlock(&delayed_refs->lock);
+ if (!list_empty(&trans->new_bgs))
btrfs_create_pending_block_groups(trans, root);
- spin_lock(&delayed_refs->lock);
- }
- node = rb_first(&delayed_refs->root);
- if (!node)
+ spin_lock(&delayed_refs->lock);
+ node = rb_first(&delayed_refs->href_root);
+ if (!node) {
+ spin_unlock(&delayed_refs->lock);
goto out;
+ }
count = (unsigned long)-1;
while (node) {
- ref = rb_entry(node, struct btrfs_delayed_ref_node,
- rb_node);
- if (btrfs_delayed_ref_is_head(ref)) {
- struct btrfs_delayed_ref_head *head;
+ head = rb_entry(node, struct btrfs_delayed_ref_head,
+ href_node);
+ if (btrfs_delayed_ref_is_head(&head->node)) {
+ struct btrfs_delayed_ref_node *ref;
- head = btrfs_delayed_node_to_head(ref);
+ ref = &head->node;
atomic_inc(&ref->refs);
spin_unlock(&delayed_refs->lock);
@@ -2795,20 +2746,16 @@ again:
btrfs_put_delayed_ref(ref);
cond_resched();
goto again;
+ } else {
+ WARN_ON(1);
}
node = rb_next(node);
}
spin_unlock(&delayed_refs->lock);
- schedule_timeout(1);
+ cond_resched();
goto again;
}
out:
- atomic_dec(&delayed_refs->procs_running_refs);
- smp_mb();
- if (waitqueue_active(&delayed_refs->wait))
- wake_up(&delayed_refs->wait);
-
- spin_unlock(&delayed_refs->lock);
assert_qgroups_uptodate(trans);
return 0;
}
@@ -2850,12 +2797,13 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
struct rb_node *node;
int ret = 0;
- ret = -ENOENT;
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
head = btrfs_find_delayed_ref_head(trans, bytenr);
- if (!head)
- goto out;
+ if (!head) {
+ spin_unlock(&delayed_refs->lock);
+ return 0;
+ }
if (!mutex_trylock(&head->mutex)) {
atomic_inc(&head->node.refs);
@@ -2872,40 +2820,35 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
btrfs_put_delayed_ref(&head->node);
return -EAGAIN;
}
+ spin_unlock(&delayed_refs->lock);
- node = rb_prev(&head->node.rb_node);
- if (!node)
- goto out_unlock;
-
- ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-
- if (ref->bytenr != bytenr)
- goto out_unlock;
-
- ret = 1;
- if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
- goto out_unlock;
+ spin_lock(&head->lock);
+ node = rb_first(&head->ref_root);
+ while (node) {
+ ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+ node = rb_next(node);
- data_ref = btrfs_delayed_node_to_data_ref(ref);
+ /* If it's a shared ref we know a cross reference exists */
+ if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
+ ret = 1;
+ break;
+ }
- node = rb_prev(node);
- if (node) {
- int seq = ref->seq;
+ data_ref = btrfs_delayed_node_to_data_ref(ref);
- ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- if (ref->bytenr == bytenr && ref->seq == seq)
- goto out_unlock;
+ /*
+ * If our ref doesn't match the one we're currently looking at
+ * then we have a cross reference.
+ */
+ if (data_ref->root != root->root_key.objectid ||
+ data_ref->objectid != objectid ||
+ data_ref->offset != offset) {
+ ret = 1;
+ break;
+ }
}
-
- if (data_ref->root != root->root_key.objectid ||
- data_ref->objectid != objectid || data_ref->offset != offset)
- goto out_unlock;
-
- ret = 0;
-out_unlock:
+ spin_unlock(&head->lock);
mutex_unlock(&head->mutex);
-out:
- spin_unlock(&delayed_refs->lock);
return ret;
}
@@ -3402,6 +3345,23 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
return readonly;
}
+static const char *alloc_name(u64 flags)
+{
+ switch (flags) {
+ case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
+ return "mixed";
+ case BTRFS_BLOCK_GROUP_METADATA:
+ return "metadata";
+ case BTRFS_BLOCK_GROUP_DATA:
+ return "data";
+ case BTRFS_BLOCK_GROUP_SYSTEM:
+ return "system";
+ default:
+ WARN_ON(1);
+ return "invalid-combination";
+ };
+}
+
static int update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used,
struct btrfs_space_info **space_info)
@@ -3439,8 +3399,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
return ret;
}
- for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
+ for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
INIT_LIST_HEAD(&found->block_groups[i]);
+ kobject_init(&found->block_group_kobjs[i], &btrfs_raid_ktype);
+ }
init_rwsem(&found->groups_sem);
spin_lock_init(&found->lock);
found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
@@ -3457,11 +3419,21 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->chunk_alloc = 0;
found->flush = 0;
init_waitqueue_head(&found->wait);
+
+ ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
+ info->space_info_kobj, "%s",
+ alloc_name(found->flags));
+ if (ret) {
+ kfree(found);
+ return ret;
+ }
+
*space_info = found;
list_add_rcu(&found->list, &info->space_info);
if (flags & BTRFS_BLOCK_GROUP_DATA)
info->data_sinfo = found;
- return 0;
+
+ return ret;
}
static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
@@ -4637,7 +4609,7 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
u64 num_bytes)
{
struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
- if (global_rsv->full || global_rsv == block_rsv ||
+ if (global_rsv == block_rsv ||
block_rsv->space_info != global_rsv->space_info)
global_rsv = NULL;
block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
@@ -5916,24 +5888,16 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
{
struct btrfs_delayed_ref_head *head;
struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_delayed_ref_node *ref;
- struct rb_node *node;
int ret = 0;
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
head = btrfs_find_delayed_ref_head(trans, bytenr);
if (!head)
- goto out;
+ goto out_delayed_unlock;
- node = rb_prev(&head->node.rb_node);
- if (!node)
- goto out;
-
- ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-
- /* there are still entries for this ref, we can't drop it */
- if (ref->bytenr == bytenr)
+ spin_lock(&head->lock);
+ if (rb_first(&head->ref_root))
goto out;
if (head->extent_op) {
@@ -5955,19 +5919,19 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
* ahead and process it.
*/
head->node.in_tree = 0;
- rb_erase(&head->node.rb_node, &delayed_refs->root);
+ rb_erase(&head->href_node, &delayed_refs->href_root);
- delayed_refs->num_entries--;
+ atomic_dec(&delayed_refs->num_entries);
/*
* we don't take a ref on the node because we're removing it from the
* tree, so we just steal the ref the tree was holding.
*/
delayed_refs->num_heads--;
- if (list_empty(&head->cluster))
+ if (head->processing == 0)
delayed_refs->num_heads_ready--;
-
- list_del_init(&head->cluster);
+ head->processing = 0;
+ spin_unlock(&head->lock);
spin_unlock(&delayed_refs->lock);
BUG_ON(head->extent_op);
@@ -5978,6 +5942,9 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
btrfs_put_delayed_ref(&head->node);
return ret;
out:
+ spin_unlock(&head->lock);
+
+out_delayed_unlock:
spin_unlock(&delayed_refs->lock);
return 0;
}
@@ -6145,11 +6112,29 @@ int __get_raid_index(u64 flags)
return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
}
-static int get_block_group_index(struct btrfs_block_group_cache *cache)
+int get_block_group_index(struct btrfs_block_group_cache *cache)
{
return __get_raid_index(cache->flags);
}
+static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
+ [BTRFS_RAID_RAID10] = "raid10",
+ [BTRFS_RAID_RAID1] = "raid1",
+ [BTRFS_RAID_DUP] = "dup",
+ [BTRFS_RAID_RAID0] = "raid0",
+ [BTRFS_RAID_SINGLE] = "single",
+ [BTRFS_RAID_RAID5] = "raid5",
+ [BTRFS_RAID_RAID6] = "raid6",
+};
+
+static const char *get_raid_name(enum btrfs_raid_types type)
+{
+ if (type >= BTRFS_NR_RAID_TYPES)
+ return NULL;
+
+ return btrfs_raid_type_names[type];
+}
+
enum btrfs_loop_type {
LOOP_CACHING_NOWAIT = 0,
LOOP_CACHING_WAIT = 1,
@@ -6177,7 +6162,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
struct btrfs_root *root = orig_root->fs_info->extent_root;
struct btrfs_free_cluster *last_ptr = NULL;
struct btrfs_block_group_cache *block_group = NULL;
- struct btrfs_block_group_cache *used_block_group;
u64 search_start = 0;
u64 max_extent_size = 0;
int empty_cluster = 2 * 1024 * 1024;
@@ -6186,7 +6170,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
int index = __get_raid_index(flags);
int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
- bool found_uncached_bg = false;
bool failed_cluster_refill = false;
bool failed_alloc = false;
bool use_cluster = true;
@@ -6239,7 +6222,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
if (search_start == hint_byte) {
block_group = btrfs_lookup_block_group(root->fs_info,
search_start);
- used_block_group = block_group;
/*
* we don't want to use the block group if it doesn't match our
* allocation bits, or if its not cached.
@@ -6276,7 +6258,6 @@ search:
u64 offset;
int cached;
- used_block_group = block_group;
btrfs_get_block_group(block_group);
search_start = block_group->key.objectid;
@@ -6304,7 +6285,6 @@ search:
have_block_group:
cached = block_group_cache_done(block_group);
if (unlikely(!cached)) {
- found_uncached_bg = true;
ret = cache_block_group(block_group, 0);
BUG_ON(ret < 0);
ret = 0;
@@ -6320,6 +6300,7 @@ have_block_group:
* lets look there
*/
if (last_ptr) {
+ struct btrfs_block_group_cache *used_block_group;
unsigned long aligned_cluster;
/*
* the refill lock keeps out other
@@ -6330,10 +6311,8 @@ have_block_group:
if (used_block_group != block_group &&
(!used_block_group ||
used_block_group->ro ||
- !block_group_bits(used_block_group, flags))) {
- used_block_group = block_group;
+ !block_group_bits(used_block_group, flags)))
goto refill_cluster;
- }
if (used_block_group != block_group)
btrfs_get_block_group(used_block_group);
@@ -6347,17 +6326,19 @@ have_block_group:
/* we have a block, we're done */
spin_unlock(&last_ptr->refill_lock);
trace_btrfs_reserve_extent_cluster(root,
- block_group, search_start, num_bytes);
+ used_block_group,
+ search_start, num_bytes);
+ if (used_block_group != block_group) {
+ btrfs_put_block_group(block_group);
+ block_group = used_block_group;
+ }
goto checks;
}
WARN_ON(last_ptr->block_group != used_block_group);
- if (used_block_group != block_group) {
+ if (used_block_group != block_group)
btrfs_put_block_group(used_block_group);
- used_block_group = block_group;
- }
refill_cluster:
- BUG_ON(used_block_group != block_group);
/* If we are on LOOP_NO_EMPTY_SIZE, we can't
* set up a new clusters, so lets just skip it
* and let the allocator find whatever block
@@ -6476,25 +6457,25 @@ unclustered_alloc:
goto loop;
}
checks:
- search_start = stripe_align(root, used_block_group,
+ search_start = stripe_align(root, block_group,
offset, num_bytes);
/* move on to the next group */
if (search_start + num_bytes >
- used_block_group->key.objectid + used_block_group->key.offset) {
- btrfs_add_free_space(used_block_group, offset, num_bytes);
+ block_group->key.objectid + block_group->key.offset) {
+ btrfs_add_free_space(block_group, offset, num_bytes);
goto loop;
}
if (offset < search_start)
- btrfs_add_free_space(used_block_group, offset,
+ btrfs_add_free_space(block_group, offset,
search_start - offset);
BUG_ON(offset > search_start);
- ret = btrfs_update_reserved_bytes(used_block_group, num_bytes,
+ ret = btrfs_update_reserved_bytes(block_group, num_bytes,
alloc_type);
if (ret == -EAGAIN) {
- btrfs_add_free_space(used_block_group, offset, num_bytes);
+ btrfs_add_free_space(block_group, offset, num_bytes);
goto loop;
}
@@ -6504,16 +6485,12 @@ checks:
trace_btrfs_reserve_extent(orig_root, block_group,
search_start, num_bytes);
- if (used_block_group != block_group)
- btrfs_put_block_group(used_block_group);
btrfs_put_block_group(block_group);
break;
loop:
failed_cluster_refill = false;
failed_alloc = false;
BUG_ON(index != get_block_group_index(block_group));
- if (used_block_group != block_group)
- btrfs_put_block_group(used_block_group);
btrfs_put_block_group(block_group);
}
up_read(&space_info->groups_sem);
@@ -6584,12 +6561,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int index = 0;
spin_lock(&info->lock);
- printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n",
+ printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n",
info->flags,
info->total_bytes - info->bytes_used - info->bytes_pinned -
info->bytes_reserved - info->bytes_readonly,
(info->full) ? "" : "not ");
- printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, "
+ printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, "
"reserved=%llu, may_use=%llu, readonly=%llu\n",
info->total_bytes, info->bytes_used, info->bytes_pinned,
info->bytes_reserved, info->bytes_may_use,
@@ -6603,7 +6580,9 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
again:
list_for_each_entry(cache, &info->block_groups[index], list) {
spin_lock(&cache->lock);
- printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n",
+ printk(KERN_INFO "BTRFS: "
+ "block group %llu has %llu bytes, "
+ "%llu used %llu pinned %llu reserved %s\n",
cache->key.objectid, cache->key.offset,
btrfs_block_group_used(&cache->item), cache->pinned,
cache->reserved, cache->ro ? "[readonly]" : "");
@@ -6966,7 +6945,7 @@ again:
/*DEFAULT_RATELIMIT_BURST*/ 1);
if (__ratelimit(&_rs))
WARN(1, KERN_DEBUG
- "btrfs: block rsv returned %d\n", ret);
+ "BTRFS: block rsv returned %d\n", ret);
}
try_reserve:
ret = reserve_metadata_bytes(root, block_rsv, blocksize,
@@ -7714,7 +7693,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
btrfs_end_transaction_throttle(trans, tree_root);
if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
- pr_debug("btrfs: drop snapshot early exit\n");
+ pr_debug("BTRFS: drop snapshot early exit\n");
err = -EAGAIN;
goto out_free;
}
@@ -7779,7 +7758,7 @@ out:
*/
if (!for_reloc && root_dropped == false)
btrfs_add_dead_root(root);
- if (err)
+ if (err && err != -EAGAIN)
btrfs_std_error(root->fs_info, err);
return err;
}
@@ -8333,6 +8312,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
release_global_block_rsv(info);
while (!list_empty(&info->space_info)) {
+ int i;
+
space_info = list_entry(info->space_info.next,
struct btrfs_space_info,
list);
@@ -8343,9 +8324,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
dump_space_info(space_info, 0, 0);
}
}
- percpu_counter_destroy(&space_info->total_bytes_pinned);
list_del(&space_info->list);
- kfree(space_info);
+ for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
+ struct kobject *kobj;
+ kobj = &space_info->block_group_kobjs[i];
+ if (kobj->parent) {
+ kobject_del(kobj);
+ kobject_put(kobj);
+ }
+ }
+ kobject_del(&space_info->kobj);
+ kobject_put(&space_info->kobj);
}
return 0;
}
@@ -8356,10 +8345,57 @@ static void __link_block_group(struct btrfs_space_info *space_info,
int index = get_block_group_index(cache);
down_write(&space_info->groups_sem);
+ if (list_empty(&space_info->block_groups[index])) {
+ struct kobject *kobj = &space_info->block_group_kobjs[index];
+ int ret;
+
+ kobject_get(&space_info->kobj); /* put in release */
+ ret = kobject_add(kobj, &space_info->kobj, "%s",
+ get_raid_name(index));
+ if (ret) {
+ pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
+ kobject_put(&space_info->kobj);
+ }
+ }
list_add_tail(&cache->list, &space_info->block_groups[index]);
up_write(&space_info->groups_sem);
}
+static struct btrfs_block_group_cache *
+btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
+{
+ struct btrfs_block_group_cache *cache;
+
+ cache = kzalloc(sizeof(*cache), GFP_NOFS);
+ if (!cache)
+ return NULL;
+
+ cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
+ GFP_NOFS);
+ if (!cache->free_space_ctl) {
+ kfree(cache);
+ return NULL;
+ }
+
+ cache->key.objectid = start;
+ cache->key.offset = size;
+ cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+
+ cache->sectorsize = root->sectorsize;
+ cache->fs_info = root->fs_info;
+ cache->full_stripe_len = btrfs_full_stripe_len(root,
+ &root->fs_info->mapping_tree,
+ start);
+ atomic_set(&cache->count, 1);
+ spin_lock_init(&cache->lock);
+ INIT_LIST_HEAD(&cache->list);
+ INIT_LIST_HEAD(&cache->cluster_list);
+ INIT_LIST_HEAD(&cache->new_bg_list);
+ btrfs_init_free_space_ctl(cache);
+
+ return cache;
+}
+
int btrfs_read_block_groups(struct btrfs_root *root)
{
struct btrfs_path *path;
@@ -8395,26 +8431,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
break;
if (ret != 0)
goto error;
+
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- cache = kzalloc(sizeof(*cache), GFP_NOFS);
+
+ cache = btrfs_create_block_group_cache(root, found_key.objectid,
+ found_key.offset);
if (!cache) {
ret = -ENOMEM;
goto error;
}
- cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
- GFP_NOFS);
- if (!cache->free_space_ctl) {
- kfree(cache);
- ret = -ENOMEM;
- goto error;
- }
-
- atomic_set(&cache->count, 1);
- spin_lock_init(&cache->lock);
- cache->fs_info = info;
- INIT_LIST_HEAD(&cache->list);
- INIT_LIST_HEAD(&cache->cluster_list);
if (need_clear) {
/*
@@ -8435,16 +8461,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
read_extent_buffer(leaf, &cache->item,
btrfs_item_ptr_offset(leaf, path->slots[0]),
sizeof(cache->item));
- memcpy(&cache->key, &found_key, sizeof(found_key));
+ cache->flags = btrfs_block_group_flags(&cache->item);
key.objectid = found_key.objectid + found_key.offset;
btrfs_release_path(path);
- cache->flags = btrfs_block_group_flags(&cache->item);
- cache->sectorsize = root->sectorsize;
- cache->full_stripe_len = btrfs_full_stripe_len(root,
- &root->fs_info->mapping_tree,
- found_key.objectid);
- btrfs_init_free_space_ctl(cache);
/*
* We need to exclude the super stripes now so that the space
@@ -8458,8 +8478,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
* case.
*/
free_excluded_extents(root, cache);
- kfree(cache->free_space_ctl);
- kfree(cache);
+ btrfs_put_block_group(cache);
goto error;
}
@@ -8590,38 +8609,15 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
root->fs_info->last_trans_log_full_commit = trans->transid;
- cache = kzalloc(sizeof(*cache), GFP_NOFS);
+ cache = btrfs_create_block_group_cache(root, chunk_offset, size);
if (!cache)
return -ENOMEM;
- cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
- GFP_NOFS);
- if (!cache->free_space_ctl) {
- kfree(cache);
- return -ENOMEM;
- }
-
- cache->key.objectid = chunk_offset;
- cache->key.offset = size;
- cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
- cache->sectorsize = root->sectorsize;
- cache->fs_info = root->fs_info;
- cache->full_stripe_len = btrfs_full_stripe_len(root,
- &root->fs_info->mapping_tree,
- chunk_offset);
-
- atomic_set(&cache->count, 1);
- spin_lock_init(&cache->lock);
- INIT_LIST_HEAD(&cache->list);
- INIT_LIST_HEAD(&cache->cluster_list);
- INIT_LIST_HEAD(&cache->new_bg_list);
-
- btrfs_init_free_space_ctl(cache);
btrfs_set_block_group_used(&cache->item, bytes_used);
btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
- cache->flags = type;
btrfs_set_block_group_flags(&cache->item, type);
+ cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
ret = exclude_super_stripes(root, cache);
@@ -8631,8 +8627,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
* case.
*/
free_excluded_extents(root, cache);
- kfree(cache->free_space_ctl);
- kfree(cache);
+ btrfs_put_block_group(cache);
return ret;
}
@@ -8796,8 +8791,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* are still on the list after taking the semaphore
*/
list_del_init(&block_group->list);
- if (list_empty(&block_group->space_info->block_groups[index]))
+ if (list_empty(&block_group->space_info->block_groups[index])) {
+ kobject_del(&block_group->space_info->block_group_kobjs[index]);
+ kobject_put(&block_group->space_info->block_group_kobjs[index]);
clear_avail_alloc_bits(root->fs_info, block_group->flags);
+ }
up_write(&block_group->space_info->groups_sem);
if (block_group->cached == BTRFS_CACHE_STARTED)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ff43802a7c88..85bbd01f1271 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -59,7 +59,7 @@ void btrfs_leak_debug_check(void)
while (!list_empty(&states)) {
state = list_entry(states.next, struct extent_state, leak_list);
- printk(KERN_ERR "btrfs state leak: start %llu end %llu "
+ printk(KERN_ERR "BTRFS: state leak: start %llu end %llu "
"state %lu in tree %p refs %d\n",
state->start, state->end, state->state, state->tree,
atomic_read(&state->refs));
@@ -69,7 +69,7 @@ void btrfs_leak_debug_check(void)
while (!list_empty(&buffers)) {
eb = list_entry(buffers.next, struct extent_buffer, leak_list);
- printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
+ printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu "
"refs %d\n",
eb->start, eb->len, atomic_read(&eb->refs));
list_del(&eb->leak_list);
@@ -77,16 +77,22 @@ void btrfs_leak_debug_check(void)
}
}
-#define btrfs_debug_check_extent_io_range(inode, start, end) \
- __btrfs_debug_check_extent_io_range(__func__, (inode), (start), (end))
+#define btrfs_debug_check_extent_io_range(tree, start, end) \
+ __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
static inline void __btrfs_debug_check_extent_io_range(const char *caller,
- struct inode *inode, u64 start, u64 end)
+ struct extent_io_tree *tree, u64 start, u64 end)
{
- u64 isize = i_size_read(inode);
+ struct inode *inode;
+ u64 isize;
+
+ if (!tree->mapping)
+ return;
+ inode = tree->mapping->host;
+ isize = i_size_read(inode);
if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
printk_ratelimited(KERN_DEBUG
- "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
+ "BTRFS: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
caller, btrfs_ino(inode), isize, start, end);
}
}
@@ -124,6 +130,8 @@ static noinline void flush_write_bio(void *data);
static inline struct btrfs_fs_info *
tree_fs_info(struct extent_io_tree *tree)
{
+ if (!tree->mapping)
+ return NULL;
return btrfs_sb(tree->mapping->host->i_sb);
}
@@ -186,11 +194,9 @@ void extent_io_tree_init(struct extent_io_tree *tree,
struct address_space *mapping)
{
tree->state = RB_ROOT;
- INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
tree->ops = NULL;
tree->dirty_bytes = 0;
spin_lock_init(&tree->lock);
- spin_lock_init(&tree->buffer_lock);
tree->mapping = mapping;
}
@@ -224,12 +230,20 @@ void free_extent_state(struct extent_state *state)
}
static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
- struct rb_node *node)
+ struct rb_node *node,
+ struct rb_node ***p_in,
+ struct rb_node **parent_in)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct tree_entry *entry;
+ if (p_in && parent_in) {
+ p = *p_in;
+ parent = *parent_in;
+ goto do_insert;
+ }
+
while (*p) {
parent = *p;
entry = rb_entry(parent, struct tree_entry, rb_node);
@@ -242,35 +256,43 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
return parent;
}
+do_insert:
rb_link_node(node, parent, p);
rb_insert_color(node, root);
return NULL;
}
static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
- struct rb_node **prev_ret,
- struct rb_node **next_ret)
+ struct rb_node **prev_ret,
+ struct rb_node **next_ret,
+ struct rb_node ***p_ret,
+ struct rb_node **parent_ret)
{
struct rb_root *root = &tree->state;
- struct rb_node *n = root->rb_node;
+ struct rb_node **n = &root->rb_node;
struct rb_node *prev = NULL;
struct rb_node *orig_prev = NULL;
struct tree_entry *entry;
struct tree_entry *prev_entry = NULL;
- while (n) {
- entry = rb_entry(n, struct tree_entry, rb_node);
- prev = n;
+ while (*n) {
+ prev = *n;
+ entry = rb_entry(prev, struct tree_entry, rb_node);
prev_entry = entry;
if (offset < entry->start)
- n = n->rb_left;
+ n = &(*n)->rb_left;
else if (offset > entry->end)
- n = n->rb_right;
+ n = &(*n)->rb_right;
else
- return n;
+ return *n;
}
+ if (p_ret)
+ *p_ret = n;
+ if (parent_ret)
+ *parent_ret = prev;
+
if (prev_ret) {
orig_prev = prev;
while (prev && offset > prev_entry->end) {
@@ -292,18 +314,27 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
return NULL;
}
-static inline struct rb_node *tree_search(struct extent_io_tree *tree,
- u64 offset)
+static inline struct rb_node *
+tree_search_for_insert(struct extent_io_tree *tree,
+ u64 offset,
+ struct rb_node ***p_ret,
+ struct rb_node **parent_ret)
{
struct rb_node *prev = NULL;
struct rb_node *ret;
- ret = __etree_search(tree, offset, &prev, NULL);
+ ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
if (!ret)
return prev;
return ret;
}
+static inline struct rb_node *tree_search(struct extent_io_tree *tree,
+ u64 offset)
+{
+ return tree_search_for_insert(tree, offset, NULL, NULL);
+}
+
static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
struct extent_state *other)
{
@@ -385,23 +416,25 @@ static void set_state_bits(struct extent_io_tree *tree,
*/
static int insert_state(struct extent_io_tree *tree,
struct extent_state *state, u64 start, u64 end,
+ struct rb_node ***p,
+ struct rb_node **parent,
unsigned long *bits)
{
struct rb_node *node;
if (end < start)
- WARN(1, KERN_ERR "btrfs end < start %llu %llu\n",
+ WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
end, start);
state->start = start;
state->end = end;
set_state_bits(tree, state, bits);
- node = tree_insert(&tree->state, end, &state->rb_node);
+ node = tree_insert(&tree->state, end, &state->rb_node, p, parent);
if (node) {
struct extent_state *found;
found = rb_entry(node, struct extent_state, rb_node);
- printk(KERN_ERR "btrfs found node %llu %llu on insert of "
+ printk(KERN_ERR "BTRFS: found node %llu %llu on insert of "
"%llu %llu\n",
found->start, found->end, start, end);
return -EEXIST;
@@ -444,7 +477,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
prealloc->state = orig->state;
orig->start = split;
- node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
+ node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node,
+ NULL, NULL);
if (node) {
free_extent_state(prealloc);
return -EEXIST;
@@ -542,7 +576,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int err;
int clear = 0;
- btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+ btrfs_debug_check_extent_io_range(tree, start, end);
if (bits & EXTENT_DELALLOC)
bits |= EXTENT_NORESERVE;
@@ -702,7 +736,7 @@ static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state *state;
struct rb_node *node;
- btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+ btrfs_debug_check_extent_io_range(tree, start, end);
spin_lock(&tree->lock);
again:
@@ -783,11 +817,13 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state *state;
struct extent_state *prealloc = NULL;
struct rb_node *node;
+ struct rb_node **p;
+ struct rb_node *parent;
int err = 0;
u64 last_start;
u64 last_end;
- btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+ btrfs_debug_check_extent_io_range(tree, start, end);
bits |= EXTENT_FIRST_DELALLOC;
again:
@@ -809,14 +845,16 @@ again:
* this search will find all the extents that end after
* our range starts.
*/
- node = tree_search(tree, start);
+ node = tree_search_for_insert(tree, start, &p, &parent);
if (!node) {
prealloc = alloc_extent_state_atomic(prealloc);
BUG_ON(!prealloc);
- err = insert_state(tree, prealloc, start, end, &bits);
+ err = insert_state(tree, prealloc, start, end,
+ &p, &parent, &bits);
if (err)
extent_io_tree_panic(tree, err);
+ cache_state(prealloc, cached_state);
prealloc = NULL;
goto out;
}
@@ -919,7 +957,7 @@ hit_next:
* the later extent.
*/
err = insert_state(tree, prealloc, start, this_end,
- &bits);
+ NULL, NULL, &bits);
if (err)
extent_io_tree_panic(tree, err);
@@ -1005,11 +1043,13 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state *state;
struct extent_state *prealloc = NULL;
struct rb_node *node;
+ struct rb_node **p;
+ struct rb_node *parent;
int err = 0;
u64 last_start;
u64 last_end;
- btrfs_debug_check_extent_io_range(tree->mapping->host, start, end);
+ btrfs_debug_check_extent_io_range(tree, start, end);
again:
if (!prealloc && (mask & __GFP_WAIT)) {
@@ -1032,17 +1072,19 @@ again:
* this search will find all the extents that end after
* our range starts.
*/
- node = tree_search(tree, start);
+ node = tree_search_for_insert(tree, start, &p, &parent);
if (!node) {
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc) {
err = -ENOMEM;
goto out;
}
- err = insert_state(tree, prealloc, start, end, &bits);
- prealloc = NULL;
+ err = insert_state(tree, prealloc, start, end,
+ &p, &parent, &bits);
if (err)
extent_io_tree_panic(tree, err);
+ cache_state(prealloc, cached_state);
+ prealloc = NULL;
goto out;
}
state = rb_entry(node, struct extent_state, rb_node);
@@ -1135,7 +1177,7 @@ hit_next:
* the later extent.
*/
err = insert_state(tree, prealloc, start, this_end,
- &bits);
+ NULL, NULL, &bits);
if (err)
extent_io_tree_panic(tree, err);
cache_state(prealloc, cached_state);
@@ -1984,7 +2026,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
- bio->bi_size = 0;
+ bio->bi_iter.bi_size = 0;
map_length = length;
ret = btrfs_map_block(fs_info, WRITE, logical,
@@ -1995,7 +2037,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
}
BUG_ON(mirror_num != bbio->mirror_num);
sector = bbio->stripes[mirror_num-1].physical >> 9;
- bio->bi_sector = sector;
+ bio->bi_iter.bi_sector = sector;
dev = bbio->stripes[mirror_num-1].dev;
kfree(bbio);
if (!dev || !dev->bdev || !dev->writeable) {
@@ -2012,9 +2054,10 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
return -EIO;
}
- printk_ratelimited_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
- "(dev %s sector %llu)\n", page->mapping->host->i_ino,
- start, rcu_str_deref(dev->name), sector);
+ printk_ratelimited_in_rcu(KERN_INFO
+ "BTRFS: read error corrected: ino %lu off %llu "
+ "(dev %s sector %llu)\n", page->mapping->host->i_ino,
+ start, rcu_str_deref(dev->name), sector);
bio_put(bio);
return 0;
@@ -2156,7 +2199,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
return -EIO;
}
- if (em->start > start || em->start + em->len < start) {
+ if (em->start > start || em->start + em->len <= start) {
free_extent_map(em);
em = NULL;
}
@@ -2268,9 +2311,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
return -EIO;
}
bio->bi_end_io = failed_bio->bi_end_io;
- bio->bi_sector = failrec->logical >> 9;
+ bio->bi_iter.bi_sector = failrec->logical >> 9;
bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
- bio->bi_size = 0;
+ bio->bi_iter.bi_size = 0;
btrfs_failed_bio = btrfs_io_bio(failed_bio);
if (btrfs_failed_bio->csum) {
@@ -2332,37 +2375,39 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
*/
static void end_bio_extent_writepage(struct bio *bio, int err)
{
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct extent_io_tree *tree;
+ struct bio_vec *bvec;
u64 start;
u64 end;
+ int i;
- do {
+ bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
- tree = &BTRFS_I(page->mapping->host)->io_tree;
/* We always issue full-page reads, but if some block
* in a page fails to read, blk_update_request() will
* advance bv_offset and adjust bv_len to compensate.
* Print a warning for nonzero offsets, and an error
* if they don't add up to a full page. */
- if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
- printk("%s page write in btrfs with offset %u and length %u\n",
- bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
- ? KERN_ERR "partial" : KERN_INFO "incomplete",
- bvec->bv_offset, bvec->bv_len);
+ if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
+ if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
+ btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
+ "partial page write in btrfs with offset %u and length %u",
+ bvec->bv_offset, bvec->bv_len);
+ else
+ btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
+ "incomplete page write in btrfs with offset %u and "
+ "length %u",
+ bvec->bv_offset, bvec->bv_len);
+ }
start = page_offset(page);
end = start + bvec->bv_offset + bvec->bv_len - 1;
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
-
if (end_extent_writepage(page, err, start, end))
continue;
end_page_writeback(page);
- } while (bvec >= bio->bi_io_vec);
+ }
bio_put(bio);
}
@@ -2392,9 +2437,8 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
*/
static void end_bio_extent_readpage(struct bio *bio, int err)
{
+ struct bio_vec *bvec;
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct bio_vec *bvec = bio->bi_io_vec;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct extent_io_tree *tree;
u64 offset = 0;
@@ -2405,16 +2449,17 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
u64 extent_len = 0;
int mirror;
int ret;
+ int i;
if (err)
uptodate = 0;
- do {
+ bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
struct inode *inode = page->mapping->host;
pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
- "mirror=%lu\n", (u64)bio->bi_sector, err,
+ "mirror=%lu\n", (u64)bio->bi_iter.bi_sector, err,
io_bio->mirror_num);
tree = &BTRFS_I(inode)->io_tree;
@@ -2423,19 +2468,22 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
* advance bv_offset and adjust bv_len to compensate.
* Print a warning for nonzero offsets, and an error
* if they don't add up to a full page. */
- if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
- printk("%s page read in btrfs with offset %u and length %u\n",
- bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
- ? KERN_ERR "partial" : KERN_INFO "incomplete",
- bvec->bv_offset, bvec->bv_len);
+ if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
+ if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
+ btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
+ "partial page read in btrfs with offset %u and length %u",
+ bvec->bv_offset, bvec->bv_len);
+ else
+ btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
+ "incomplete page read in btrfs with offset %u and "
+ "length %u",
+ bvec->bv_offset, bvec->bv_len);
+ }
start = page_offset(page);
end = start + bvec->bv_offset + bvec->bv_len - 1;
len = bvec->bv_len;
- if (++bvec <= bvec_end)
- prefetchw(&bvec->bv_page->flags);
-
mirror = io_bio->mirror_num;
if (likely(uptodate && tree->ops &&
tree->ops->readpage_end_io_hook)) {
@@ -2516,7 +2564,7 @@ readpage_ok:
extent_start = start;
extent_len = end + 1 - start;
}
- } while (bvec <= bvec_end);
+ }
if (extent_len)
endio_readpage_release_extent(tree, extent_start, extent_len,
@@ -2547,9 +2595,8 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
}
if (bio) {
- bio->bi_size = 0;
bio->bi_bdev = bdev;
- bio->bi_sector = first_sector;
+ bio->bi_iter.bi_sector = first_sector;
btrfs_bio = btrfs_io_bio(bio);
btrfs_bio->csum = NULL;
btrfs_bio->csum_allocated = NULL;
@@ -2643,7 +2690,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
if (bio_ret && *bio_ret) {
bio = *bio_ret;
if (old_compressed)
- contig = bio->bi_sector == sector;
+ contig = bio->bi_iter.bi_sector == sector;
else
contig = bio_end_sector(bio) == sector;
@@ -3287,8 +3334,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
set_range_writeback(tree, cur, cur + iosize - 1);
if (!PageWriteback(page)) {
- printk(KERN_ERR "btrfs warning page %lu not "
- "writeback, cur %llu end %llu\n",
+ btrfs_err(BTRFS_I(inode)->root->fs_info,
+ "page %lu not writeback, cur %llu end %llu",
page->index, cur, end);
}
@@ -3410,20 +3457,18 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb)
static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
{
- int uptodate = err == 0;
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct bio_vec *bvec;
struct extent_buffer *eb;
- int done;
+ int i, done;
- do {
+ bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
- bvec--;
eb = (struct extent_buffer *)page->private;
BUG_ON(!eb);
done = atomic_dec_and_test(&eb->io_pages);
- if (!uptodate || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
+ if (err || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
ClearPageUptodate(page);
SetPageError(page);
@@ -3435,10 +3480,9 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
continue;
end_extent_buffer_writeback(eb);
- } while (bvec >= bio->bi_io_vec);
+ }
bio_put(bio);
-
}
static int write_one_eb(struct extent_buffer *eb,
@@ -3447,6 +3491,7 @@ static int write_one_eb(struct extent_buffer *eb,
struct extent_page_data *epd)
{
struct block_device *bdev = fs_info->fs_devices->latest_bdev;
+ struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
u64 offset = eb->start;
unsigned long i, num_pages;
unsigned long bio_flags = 0;
@@ -3464,7 +3509,7 @@ static int write_one_eb(struct extent_buffer *eb,
clear_page_dirty_for_io(p);
set_page_writeback(p);
- ret = submit_extent_page(rw, eb->tree, p, offset >> 9,
+ ret = submit_extent_page(rw, tree, p, offset >> 9,
PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
-1, end_bio_extent_buffer_writepage,
0, epd->bio_flags, bio_flags);
@@ -4082,12 +4127,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
- struct btrfs_file_extent_item *item;
int end = 0;
u64 em_start = 0;
u64 em_len = 0;
u64 em_end = 0;
- unsigned long emflags;
if (len == 0)
return -EINVAL;
@@ -4112,8 +4155,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
WARN_ON(!ret);
path->slots[0]--;
- item = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_file_extent_item);
btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
found_type = btrfs_key_type(&found_key);
@@ -4181,7 +4222,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
offset_in_extent = em_start - em->start;
em_end = extent_map_end(em);
em_len = em_end - em_start;
- emflags = em->flags;
disko = 0;
flags = 0;
@@ -4333,10 +4373,9 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
__free_extent_buffer(eb);
}
-static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
- u64 start,
- unsigned long len,
- gfp_t mask)
+static struct extent_buffer *
+__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
+ unsigned long len, gfp_t mask)
{
struct extent_buffer *eb = NULL;
@@ -4345,7 +4384,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
return NULL;
eb->start = start;
eb->len = len;
- eb->tree = tree;
+ eb->fs_info = fs_info;
eb->bflags = 0;
rwlock_init(&eb->lock);
atomic_set(&eb->write_locks, 0);
@@ -4477,13 +4516,14 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb)
}
}
-struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
- u64 start)
+struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
+ u64 start)
{
struct extent_buffer *eb;
rcu_read_lock();
- eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+ eb = radix_tree_lookup(&fs_info->buffer_radix,
+ start >> PAGE_CACHE_SHIFT);
if (eb && atomic_inc_not_zero(&eb->refs)) {
rcu_read_unlock();
mark_extent_buffer_accessed(eb);
@@ -4494,7 +4534,7 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
return NULL;
}
-struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
+struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len)
{
unsigned long num_pages = num_extent_pages(start, len);
@@ -4503,16 +4543,15 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
struct extent_buffer *eb;
struct extent_buffer *exists = NULL;
struct page *p;
- struct address_space *mapping = tree->mapping;
+ struct address_space *mapping = fs_info->btree_inode->i_mapping;
int uptodate = 1;
int ret;
-
- eb = find_extent_buffer(tree, start);
+ eb = find_extent_buffer(fs_info, start);
if (eb)
return eb;
- eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
+ eb = __alloc_extent_buffer(fs_info, start, len, GFP_NOFS);
if (!eb)
return NULL;
@@ -4567,12 +4606,13 @@ again:
if (ret)
goto free_eb;
- spin_lock(&tree->buffer_lock);
- ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
- spin_unlock(&tree->buffer_lock);
+ spin_lock(&fs_info->buffer_lock);
+ ret = radix_tree_insert(&fs_info->buffer_radix,
+ start >> PAGE_CACHE_SHIFT, eb);
+ spin_unlock(&fs_info->buffer_lock);
radix_tree_preload_end();
if (ret == -EEXIST) {
- exists = find_extent_buffer(tree, start);
+ exists = find_extent_buffer(fs_info, start);
if (exists)
goto free_eb;
else
@@ -4580,6 +4620,7 @@ again:
}
/* add one reference for the tree */
check_buffer_tree_ref(eb);
+ set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
/*
* there is a race where release page may have
@@ -4623,17 +4664,17 @@ static int release_extent_buffer(struct extent_buffer *eb)
{
WARN_ON(atomic_read(&eb->refs) == 0);
if (atomic_dec_and_test(&eb->refs)) {
- if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) {
- spin_unlock(&eb->refs_lock);
- } else {
- struct extent_io_tree *tree = eb->tree;
+ if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
+ struct btrfs_fs_info *fs_info = eb->fs_info;
spin_unlock(&eb->refs_lock);
- spin_lock(&tree->buffer_lock);
- radix_tree_delete(&tree->buffer,
+ spin_lock(&fs_info->buffer_lock);
+ radix_tree_delete(&fs_info->buffer_radix,
eb->start >> PAGE_CACHE_SHIFT);
- spin_unlock(&tree->buffer_lock);
+ spin_unlock(&fs_info->buffer_lock);
+ } else {
+ spin_unlock(&eb->refs_lock);
}
/* Should be safe to release our pages at this point */
@@ -5112,12 +5153,12 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
unsigned long src_i;
if (src_offset + len > dst->len) {
- printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
+ printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
"len %lu dst len %lu\n", src_offset, len, dst->len);
BUG_ON(1);
}
if (dst_offset + len > dst->len) {
- printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
+ printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
"len %lu dst len %lu\n", dst_offset, len, dst->len);
BUG_ON(1);
}
@@ -5159,12 +5200,12 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
unsigned long src_i;
if (src_offset + len > dst->len) {
- printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
+ printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
"len %lu len %lu\n", src_offset, len, dst->len);
BUG_ON(1);
}
if (dst_offset + len > dst->len) {
- printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
+ printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
"len %lu len %lu\n", dst_offset, len, dst->len);
BUG_ON(1);
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 19620c58f096..58b27e5ab521 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -43,6 +43,7 @@
#define EXTENT_BUFFER_WRITEBACK 7
#define EXTENT_BUFFER_IOERR 8
#define EXTENT_BUFFER_DUMMY 9
+#define EXTENT_BUFFER_IN_TREE 10
/* these are flags for extent_clear_unlock_delalloc */
#define PAGE_UNLOCK (1 << 0)
@@ -94,12 +95,10 @@ struct extent_io_ops {
struct extent_io_tree {
struct rb_root state;
- struct radix_tree_root buffer;
struct address_space *mapping;
u64 dirty_bytes;
int track_uptodate;
spinlock_t lock;
- spinlock_t buffer_lock;
struct extent_io_ops *ops;
};
@@ -130,7 +129,7 @@ struct extent_buffer {
unsigned long map_start;
unsigned long map_len;
unsigned long bflags;
- struct extent_io_tree *tree;
+ struct btrfs_fs_info *fs_info;
spinlock_t refs_lock;
atomic_t refs;
atomic_t io_pages;
@@ -266,11 +265,11 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
void set_page_extent_mapped(struct page *page);
-struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
+struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len);
struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len);
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
-struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
void free_extent_buffer(struct extent_buffer *eb);
void free_extent_buffer_stale(struct extent_buffer *eb);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index a4a7a1a8da95..996ad56b57db 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -79,12 +79,21 @@ void free_extent_map(struct extent_map *em)
}
}
-static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
- struct rb_node *node)
+/* simple helper to do math around the end of an extent, handling wrap */
+static u64 range_end(u64 start, u64 len)
+{
+ if (start + len < start)
+ return (u64)-1;
+ return start + len;
+}
+
+static int tree_insert(struct rb_root *root, struct extent_map *em)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
- struct extent_map *entry;
+ struct extent_map *entry = NULL;
+ struct rb_node *orig_parent = NULL;
+ u64 end = range_end(em->start, em->len);
while (*p) {
parent = *p;
@@ -92,19 +101,37 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
WARN_ON(!entry->in_tree);
- if (offset < entry->start)
+ if (em->start < entry->start)
p = &(*p)->rb_left;
- else if (offset >= extent_map_end(entry))
+ else if (em->start >= extent_map_end(entry))
p = &(*p)->rb_right;
else
- return parent;
+ return -EEXIST;
}
- entry = rb_entry(node, struct extent_map, rb_node);
- entry->in_tree = 1;
- rb_link_node(node, parent, p);
- rb_insert_color(node, root);
- return NULL;
+ orig_parent = parent;
+ while (parent && em->start >= extent_map_end(entry)) {
+ parent = rb_next(parent);
+ entry = rb_entry(parent, struct extent_map, rb_node);
+ }
+ if (parent)
+ if (end > entry->start && em->start < extent_map_end(entry))
+ return -EEXIST;
+
+ parent = orig_parent;
+ entry = rb_entry(parent, struct extent_map, rb_node);
+ while (parent && em->start < entry->start) {
+ parent = rb_prev(parent);
+ entry = rb_entry(parent, struct extent_map, rb_node);
+ }
+ if (parent)
+ if (end > entry->start && em->start < extent_map_end(entry))
+ return -EEXIST;
+
+ em->in_tree = 1;
+ rb_link_node(&em->rb_node, orig_parent, p);
+ rb_insert_color(&em->rb_node, root);
+ return 0;
}
/*
@@ -228,7 +255,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
merge = rb_entry(rb, struct extent_map, rb_node);
if (rb && mergable_maps(em, merge)) {
em->len += merge->len;
- em->block_len += merge->len;
+ em->block_len += merge->block_len;
rb_erase(&merge->rb_node, &tree->map);
merge->in_tree = 0;
em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
@@ -310,20 +337,11 @@ int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em, int modified)
{
int ret = 0;
- struct rb_node *rb;
- struct extent_map *exist;
- exist = lookup_extent_mapping(tree, em->start, em->len);
- if (exist) {
- free_extent_map(exist);
- ret = -EEXIST;
- goto out;
- }
- rb = tree_insert(&tree->map, em->start, &em->rb_node);
- if (rb) {
- ret = -EEXIST;
+ ret = tree_insert(&tree->map, em);
+ if (ret)
goto out;
- }
+
atomic_inc(&em->refs);
em->mod_start = em->start;
@@ -337,14 +355,6 @@ out:
return ret;
}
-/* simple helper to do math around the end of an extent, handling wrap */
-static u64 range_end(u64 start, u64 len)
-{
- if (start + len < start)
- return (u64)-1;
- return start + len;
-}
-
static struct extent_map *
__lookup_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len, int strict)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 6f3848860283..127555b29f58 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -182,7 +182,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
if (!path)
return -ENOMEM;
- nblocks = bio->bi_size >> inode->i_sb->s_blocksize_bits;
+ nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
if (!dst) {
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size,
@@ -201,7 +201,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
csum = (u8 *)dst;
}
- if (bio->bi_size > PAGE_CACHE_SIZE * 8)
+ if (bio->bi_iter.bi_size > PAGE_CACHE_SIZE * 8)
path->reada = 2;
WARN_ON(bio->bi_vcnt <= 0);
@@ -217,7 +217,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
path->skip_locking = 1;
}
- disk_bytenr = (u64)bio->bi_sector << 9;
+ disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
if (dio)
offset = logical_offset;
while (bio_index < bio->bi_vcnt) {
@@ -246,8 +246,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
offset + bvec->bv_len - 1,
EXTENT_NODATASUM, GFP_NOFS);
} else {
- printk(KERN_INFO "btrfs no csum found "
- "for inode %llu start %llu\n",
+ btrfs_info(BTRFS_I(inode)->root->fs_info,
+ "no csum found for inode %llu start %llu",
btrfs_ino(inode), offset);
}
item = NULL;
@@ -302,7 +302,7 @@ int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
struct btrfs_dio_private *dip, struct bio *bio,
u64 offset)
{
- int len = (bio->bi_sector << 9) - dip->disk_bytenr;
+ int len = (bio->bi_iter.bi_sector << 9) - dip->disk_bytenr;
u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
int ret;
@@ -447,11 +447,12 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
u64 offset;
WARN_ON(bio->bi_vcnt <= 0);
- sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
+ sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_iter.bi_size),
+ GFP_NOFS);
if (!sums)
return -ENOMEM;
- sums->len = bio->bi_size;
+ sums->len = bio->bi_iter.bi_size;
INIT_LIST_HEAD(&sums->list);
if (contig)
@@ -461,7 +462,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
ordered = btrfs_lookup_ordered_extent(inode, offset);
BUG_ON(!ordered); /* Logic error */
- sums->bytenr = (u64)bio->bi_sector << 9;
+ sums->bytenr = (u64)bio->bi_iter.bi_sector << 9;
index = 0;
while (bio_index < bio->bi_vcnt) {
@@ -476,7 +477,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
btrfs_add_ordered_sum(inode, ordered, sums);
btrfs_put_ordered_extent(ordered);
- bytes_left = bio->bi_size - total_bytes;
+ bytes_left = bio->bi_iter.bi_size - total_bytes;
sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
GFP_NOFS);
@@ -484,7 +485,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
sums->len = bytes_left;
ordered = btrfs_lookup_ordered_extent(inode, offset);
BUG_ON(!ordered); /* Logic error */
- sums->bytenr = ((u64)bio->bi_sector << 9) +
+ sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) +
total_bytes;
index = 0;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 82d0342763c5..0165b8672f09 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -692,7 +692,10 @@ next:
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
struct btrfs_path *path, u64 start, u64 end,
- u64 *drop_end, int drop_cache)
+ u64 *drop_end, int drop_cache,
+ int replace_extent,
+ u32 extent_item_size,
+ int *key_inserted)
{
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
@@ -712,6 +715,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
int modify_tree = -1;
int update_refs = (root->ref_cows || root == root->fs_info->tree_root);
int found = 0;
+ int leafs_visited = 0;
if (drop_cache)
btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -733,6 +737,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
path->slots[0]--;
}
ret = 0;
+ leafs_visited++;
next_slot:
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
@@ -744,6 +749,7 @@ next_slot:
ret = 0;
break;
}
+ leafs_visited++;
leaf = path->nodes[0];
recow = 1;
}
@@ -766,7 +772,8 @@ next_slot:
btrfs_file_extent_num_bytes(leaf, fi);
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
extent_end = key.offset +
- btrfs_file_extent_inline_len(leaf, fi);
+ btrfs_file_extent_inline_len(leaf,
+ path->slots[0], fi);
} else {
WARN_ON(1);
extent_end = search_start;
@@ -927,14 +934,44 @@ next_slot:
}
if (!ret && del_nr > 0) {
+ /*
+ * Set path->slots[0] to first slot, so that after the delete
+ * if items are move off from our leaf to its immediate left or
+ * right neighbor leafs, we end up with a correct and adjusted
+ * path->slots[0] for our insertion.
+ */
+ path->slots[0] = del_slot;
ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
if (ret)
btrfs_abort_transaction(trans, root, ret);
+
+ leaf = path->nodes[0];
+ /*
+ * leaf eb has flag EXTENT_BUFFER_STALE if it was deleted (that
+ * is, its contents got pushed to its neighbors), in which case
+ * it means path->locks[0] == 0
+ */
+ if (!ret && replace_extent && leafs_visited == 1 &&
+ path->locks[0] &&
+ btrfs_leaf_free_space(root, leaf) >=
+ sizeof(struct btrfs_item) + extent_item_size) {
+
+ key.objectid = ino;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = start;
+ setup_items_for_insert(root, path, &key,
+ &extent_item_size,
+ extent_item_size,
+ sizeof(struct btrfs_item) +
+ extent_item_size, 1);
+ *key_inserted = 1;
+ }
}
+ if (!replace_extent || !(*key_inserted))
+ btrfs_release_path(path);
if (drop_end)
*drop_end = found ? min(end, extent_end) : end;
- btrfs_release_path(path);
return ret;
}
@@ -949,7 +986,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
- drop_cache);
+ drop_cache, 0, 0, NULL);
btrfs_free_path(path);
return ret;
}
@@ -1235,29 +1272,18 @@ static int prepare_uptodate_page(struct page *page, u64 pos,
}
/*
- * this gets pages into the page cache and locks them down, it also properly
- * waits for data=ordered extents to finish before allowing the pages to be
- * modified.
+ * this just gets pages into the page cache and locks them down.
*/
-static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
- struct page **pages, size_t num_pages,
- loff_t pos, unsigned long first_index,
- size_t write_bytes, bool force_uptodate)
+static noinline int prepare_pages(struct inode *inode, struct page **pages,
+ size_t num_pages, loff_t pos,
+ size_t write_bytes, bool force_uptodate)
{
- struct extent_state *cached_state = NULL;
int i;
unsigned long index = pos >> PAGE_CACHE_SHIFT;
- struct inode *inode = file_inode(file);
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
int err = 0;
- int faili = 0;
- u64 start_pos;
- u64 last_pos;
-
- start_pos = pos & ~((u64)root->sectorsize - 1);
- last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
+ int faili;
-again:
for (i = 0; i < num_pages; i++) {
pages[i] = find_or_create_page(inode->i_mapping, index + i,
mask | __GFP_WRITE);
@@ -1280,57 +1306,85 @@ again:
}
wait_on_page_writeback(pages[i]);
}
- faili = num_pages - 1;
- err = 0;
+
+ return 0;
+fail:
+ while (faili >= 0) {
+ unlock_page(pages[faili]);
+ page_cache_release(pages[faili]);
+ faili--;
+ }
+ return err;
+
+}
+
+/*
+ * This function locks the extent and properly waits for data=ordered extents
+ * to finish before allowing the pages to be modified if need.
+ *
+ * The return value:
+ * 1 - the extent is locked
+ * 0 - the extent is not locked, and everything is OK
+ * -EAGAIN - need re-prepare the pages
+ * the other < 0 number - Something wrong happens
+ */
+static noinline int
+lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
+ size_t num_pages, loff_t pos,
+ u64 *lockstart, u64 *lockend,
+ struct extent_state **cached_state)
+{
+ u64 start_pos;
+ u64 last_pos;
+ int i;
+ int ret = 0;
+
+ start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
+ last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1;
+
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
lock_extent_bits(&BTRFS_I(inode)->io_tree,
- start_pos, last_pos - 1, 0, &cached_state);
- ordered = btrfs_lookup_first_ordered_extent(inode,
- last_pos - 1);
+ start_pos, last_pos, 0, cached_state);
+ ordered = btrfs_lookup_first_ordered_extent(inode, last_pos);
if (ordered &&
ordered->file_offset + ordered->len > start_pos &&
- ordered->file_offset < last_pos) {
+ ordered->file_offset <= last_pos) {
btrfs_put_ordered_extent(ordered);
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
- start_pos, last_pos - 1,
- &cached_state, GFP_NOFS);
+ start_pos, last_pos,
+ cached_state, GFP_NOFS);
for (i = 0; i < num_pages; i++) {
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
- err = btrfs_wait_ordered_range(inode, start_pos,
- last_pos - start_pos);
- if (err)
- goto fail;
- goto again;
+ ret = btrfs_wait_ordered_range(inode, start_pos,
+ last_pos - start_pos + 1);
+ if (ret)
+ return ret;
+ else
+ return -EAGAIN;
}
if (ordered)
btrfs_put_ordered_extent(ordered);
clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
- last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
+ last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
- 0, 0, &cached_state, GFP_NOFS);
- unlock_extent_cached(&BTRFS_I(inode)->io_tree,
- start_pos, last_pos - 1, &cached_state,
- GFP_NOFS);
+ 0, 0, cached_state, GFP_NOFS);
+ *lockstart = start_pos;
+ *lockend = last_pos;
+ ret = 1;
}
+
for (i = 0; i < num_pages; i++) {
if (clear_page_dirty_for_io(pages[i]))
account_page_redirty(pages[i]);
set_page_extent_mapped(pages[i]);
WARN_ON(!PageLocked(pages[i]));
}
- return 0;
-fail:
- while (faili >= 0) {
- unlock_page(pages[faili]);
- page_cache_release(pages[faili]);
- faili--;
- }
- return err;
+ return ret;
}
static noinline int check_can_nocow(struct inode *inode, loff_t pos,
@@ -1381,13 +1435,17 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
struct inode *inode = file_inode(file);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page **pages = NULL;
+ struct extent_state *cached_state = NULL;
u64 release_bytes = 0;
+ u64 lockstart;
+ u64 lockend;
unsigned long first_index;
size_t num_written = 0;
int nrptrs;
int ret = 0;
bool only_release_metadata = false;
bool force_page_uptodate = false;
+ bool need_unlock;
nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
@@ -1456,18 +1514,31 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
}
release_bytes = reserve_bytes;
-
+ need_unlock = false;
+again:
/*
* This is going to setup the pages array with the number of
* pages we want, so we don't really need to worry about the
* contents of pages from loop to loop
*/
- ret = prepare_pages(root, file, pages, num_pages,
- pos, first_index, write_bytes,
+ ret = prepare_pages(inode, pages, num_pages,
+ pos, write_bytes,
force_page_uptodate);
if (ret)
break;
+ ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
+ pos, &lockstart, &lockend,
+ &cached_state);
+ if (ret < 0) {
+ if (ret == -EAGAIN)
+ goto again;
+ break;
+ } else if (ret > 0) {
+ need_unlock = true;
+ ret = 0;
+ }
+
copied = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, i);
@@ -1512,19 +1583,21 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
}
release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
- if (copied > 0) {
+
+ if (copied > 0)
ret = btrfs_dirty_pages(root, inode, pages,
dirty_pages, pos, copied,
NULL);
- if (ret) {
- btrfs_drop_pages(pages, num_pages);
- break;
- }
+ if (need_unlock)
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree,
+ lockstart, lockend, &cached_state,
+ GFP_NOFS);
+ if (ret) {
+ btrfs_drop_pages(pages, num_pages);
+ break;
}
release_bytes = 0;
- btrfs_drop_pages(pages, num_pages);
-
if (only_release_metadata && copied > 0) {
u64 lockstart = round_down(pos, root->sectorsize);
u64 lockend = lockstart +
@@ -1536,6 +1609,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
only_release_metadata = false;
}
+ btrfs_drop_pages(pages, num_pages);
+
cond_resched();
balance_dirty_pages_ratelimited(inode->i_mapping);
@@ -1857,12 +1932,24 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (file->private_data)
btrfs_ioctl_trans_end(file);
+ /*
+ * We use start here because we will need to wait on the IO to complete
+ * in btrfs_sync_log, which could require joining a transaction (for
+ * example checking cross references in the nocow path). If we use join
+ * here we could get into a situation where we're waiting on IO to
+ * happen that is blocked on a transaction trying to commit. With start
+ * we inc the extwriter counter, so we wait for all extwriters to exit
+ * before we start blocking join'ers. This comment is to keep somebody
+ * from thinking they are super smart and changing this to
+ * btrfs_join_transaction *cough*Josef*cough*.
+ */
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
mutex_unlock(&inode->i_mutex);
goto out;
}
+ trans->sync = true;
ret = btrfs_log_dentry_safe(trans, root, dentry);
if (ret < 0) {
@@ -1963,11 +2050,13 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
struct btrfs_key key;
int ret;
+ if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
+ goto out;
+
key.objectid = btrfs_ino(inode);
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = offset;
-
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0)
return ret;
@@ -2064,8 +2153,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
u64 drop_end;
int ret = 0;
int err = 0;
+ int rsv_count;
bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
((offset + len - 1) >> PAGE_CACHE_SHIFT));
+ bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
ret = btrfs_wait_ordered_range(inode, offset, len);
if (ret)
@@ -2125,7 +2216,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
* we need to try again.
*/
if ((!ordered ||
- (ordered->file_offset + ordered->len < lockstart ||
+ (ordered->file_offset + ordered->len <= lockstart ||
ordered->file_offset > lockend)) &&
!test_range_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, EXTENT_UPTODATE, 0,
@@ -2163,9 +2254,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
/*
* 1 - update the inode
* 1 - removing the extents in the range
- * 1 - adding the hole extent
+ * 1 - adding the hole extent if no_holes isn't set
*/
- trans = btrfs_start_transaction(root, 3);
+ rsv_count = no_holes ? 2 : 3;
+ trans = btrfs_start_transaction(root, rsv_count);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto out_free;
@@ -2179,7 +2271,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
while (cur_offset < lockend) {
ret = __btrfs_drop_extents(trans, root, inode, path,
cur_offset, lockend + 1,
- &drop_end, 1);
+ &drop_end, 1, 0, 0, NULL);
if (ret != -ENOSPC)
break;
@@ -2202,7 +2294,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
- trans = btrfs_start_transaction(root, 3);
+ trans = btrfs_start_transaction(root, rsv_count);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
trans = NULL;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 057be95b1e1e..73f3de7a083c 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -347,8 +347,8 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
btrfs_readpage(NULL, page);
lock_page(page);
if (!PageUptodate(page)) {
- printk(KERN_ERR "btrfs: error reading free "
- "space cache\n");
+ btrfs_err(BTRFS_I(inode)->root->fs_info,
+ "error reading free space cache");
io_ctl_drop_pages(io_ctl);
return -EIO;
}
@@ -405,7 +405,7 @@ static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
gen = io_ctl->cur;
if (le64_to_cpu(*gen) != generation) {
- printk_ratelimited(KERN_ERR "btrfs: space cache generation "
+ printk_ratelimited(KERN_ERR "BTRFS: space cache generation "
"(%Lu) does not match inode (%Lu)\n", *gen,
generation);
io_ctl_unmap_page(io_ctl);
@@ -463,7 +463,7 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
PAGE_CACHE_SIZE - offset);
btrfs_csum_final(crc, (char *)&crc);
if (val != crc) {
- printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
+ printk_ratelimited(KERN_ERR "BTRFS: csum mismatch on free "
"space cache\n");
io_ctl_unmap_page(io_ctl);
return -EIO;
@@ -1902,7 +1902,7 @@ out:
spin_unlock(&ctl->tree_lock);
if (ret) {
- printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
+ printk(KERN_CRIT "BTRFS: unable to add free space :%d\n", ret);
ASSERT(ret != -EEXIST);
}
@@ -2011,14 +2011,15 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
info = rb_entry(n, struct btrfs_free_space, offset_index);
if (info->bytes >= bytes && !block_group->ro)
count++;
- printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
- info->offset, info->bytes,
+ btrfs_crit(block_group->fs_info,
+ "entry offset %llu, bytes %llu, bitmap %s",
+ info->offset, info->bytes,
(info->bitmap) ? "yes" : "no");
}
- printk(KERN_INFO "block group has cluster?: %s\n",
+ btrfs_info(block_group->fs_info, "block group has cluster?: %s",
list_empty(&block_group->cluster_list) ? "no" : "yes");
- printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
- "\n", count);
+ btrfs_info(block_group->fs_info,
+ "%d blocks of free space at or bigger than bytes is", count);
}
void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
@@ -2421,7 +2422,6 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *entry = NULL;
struct btrfs_free_space *last;
struct rb_node *node;
- u64 window_start;
u64 window_free;
u64 max_extent;
u64 total_size = 0;
@@ -2443,7 +2443,6 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
entry = rb_entry(node, struct btrfs_free_space, offset_index);
}
- window_start = entry->offset;
window_free = entry->bytes;
max_extent = entry->bytes;
first = entry;
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c
new file mode 100644
index 000000000000..85889aa82c62
--- /dev/null
+++ b/fs/btrfs/hash.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <crypto/hash.h>
+#include <linux/err.h>
+#include "hash.h"
+
+static struct crypto_shash *tfm;
+
+int __init btrfs_hash_init(void)
+{
+ tfm = crypto_alloc_shash("crc32c", 0, 0);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ return 0;
+}
+
+void btrfs_hash_exit(void)
+{
+ crypto_free_shash(tfm);
+}
+
+u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
+{
+ struct {
+ struct shash_desc shash;
+ char ctx[crypto_shash_descsize(tfm)];
+ } desc;
+ int err;
+
+ desc.shash.tfm = tfm;
+ desc.shash.flags = 0;
+ *(u32 *)desc.ctx = crc;
+
+ err = crypto_shash_update(&desc.shash, address, length);
+ BUG_ON(err);
+
+ return *(u32 *)desc.ctx;
+}
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h
index 1d982812ab67..118a2316e5d3 100644
--- a/fs/btrfs/hash.h
+++ b/fs/btrfs/hash.h
@@ -19,10 +19,15 @@
#ifndef __HASH__
#define __HASH__
-#include <linux/crc32c.h>
+int __init btrfs_hash_init(void);
+
+void btrfs_hash_exit(void);
+
+u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length);
+
static inline u64 btrfs_name_hash(const char *name, int len)
{
- return crc32c((u32)~1, name, len);
+ return btrfs_crc32c((u32)~1, name, len);
}
/*
@@ -31,7 +36,7 @@ static inline u64 btrfs_name_hash(const char *name, int len)
static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
int len)
{
- return (u64) crc32c(parent_objectid, name, len);
+ return (u64) btrfs_crc32c(parent_objectid, name, len);
}
#endif
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index ec82fae07097..2be38df703c9 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -91,32 +91,6 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid,
return 0;
}
-static struct btrfs_inode_ref *
-btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- const char *name, int name_len,
- u64 inode_objectid, u64 ref_objectid, int ins_len,
- int cow)
-{
- int ret;
- struct btrfs_key key;
- struct btrfs_inode_ref *ref;
-
- key.objectid = inode_objectid;
- key.type = BTRFS_INODE_REF_KEY;
- key.offset = ref_objectid;
-
- ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
- return NULL;
- if (!find_name_in_backref(path, name, name_len, &ref))
- return NULL;
- return ref;
-}
-
/* Returns NULL if no extref found */
struct btrfs_inode_extref *
btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
@@ -144,45 +118,6 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
return extref;
}
-int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- const char *name, int name_len,
- u64 inode_objectid, u64 ref_objectid, int mod,
- u64 *ret_index)
-{
- struct btrfs_inode_ref *ref;
- struct btrfs_inode_extref *extref;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
-
- ref = btrfs_lookup_inode_ref(trans, root, path, name, name_len,
- inode_objectid, ref_objectid, ins_len,
- cow);
- if (IS_ERR(ref))
- return PTR_ERR(ref);
-
- if (ref != NULL) {
- *ret_index = btrfs_inode_ref_index(path->nodes[0], ref);
- return 0;
- }
-
- btrfs_release_path(path);
-
- extref = btrfs_lookup_inode_extref(trans, root, path, name,
- name_len, inode_objectid,
- ref_objectid, ins_len, cow);
- if (IS_ERR(extref))
- return PTR_ERR(extref);
-
- if (extref) {
- *ret_index = btrfs_inode_extref_index(path->nodes[0], extref);
- return 0;
- }
-
- return -ENOENT;
-}
-
static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f1a77449d032..d3d44486290b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -58,9 +58,10 @@
#include "inode-map.h"
#include "backref.h"
#include "hash.h"
+#include "props.h"
struct btrfs_iget_args {
- u64 ino;
+ struct btrfs_key *location;
struct btrfs_root *root;
};
@@ -125,13 +126,12 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
* no overlapping inline items exist in the btree
*/
static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path, int extent_inserted,
struct btrfs_root *root, struct inode *inode,
u64 start, size_t size, size_t compressed_size,
int compress_type,
struct page **compressed_pages)
{
- struct btrfs_key key;
- struct btrfs_path *path;
struct extent_buffer *leaf;
struct page *page = NULL;
char *kaddr;
@@ -140,29 +140,29 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
int err = 0;
int ret;
size_t cur_size = size;
- size_t datasize;
unsigned long offset;
if (compressed_size && compressed_pages)
cur_size = compressed_size;
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
+ inode_add_bytes(inode, size);
- path->leave_spinning = 1;
+ if (!extent_inserted) {
+ struct btrfs_key key;
+ size_t datasize;
- key.objectid = btrfs_ino(inode);
- key.offset = start;
- btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
- datasize = btrfs_file_extent_calc_inline_size(cur_size);
+ key.objectid = btrfs_ino(inode);
+ key.offset = start;
+ btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
- inode_add_bytes(inode, size);
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- datasize);
- if (ret) {
- err = ret;
- goto fail;
+ datasize = btrfs_file_extent_calc_inline_size(cur_size);
+ path->leave_spinning = 1;
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ datasize);
+ if (ret) {
+ err = ret;
+ goto fail;
+ }
}
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -203,7 +203,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
page_cache_release(page);
}
btrfs_mark_buffer_dirty(leaf);
- btrfs_free_path(path);
+ btrfs_release_path(path);
/*
* we're an inline extent, so nobody can
@@ -219,7 +219,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
return ret;
fail:
- btrfs_free_path(path);
return err;
}
@@ -242,6 +241,9 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
u64 aligned_end = ALIGN(end, root->sectorsize);
u64 data_len = inline_len;
int ret;
+ struct btrfs_path *path;
+ int extent_inserted = 0;
+ u32 extent_item_size;
if (compressed_size)
data_len = compressed_size;
@@ -256,12 +258,27 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
return 1;
}
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
return PTR_ERR(trans);
+ }
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1);
+ if (compressed_size && compressed_pages)
+ extent_item_size = btrfs_file_extent_calc_inline_size(
+ compressed_size);
+ else
+ extent_item_size = btrfs_file_extent_calc_inline_size(
+ inline_len);
+
+ ret = __btrfs_drop_extents(trans, root, inode, path,
+ start, aligned_end, NULL,
+ 1, 1, extent_item_size, &extent_inserted);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out;
@@ -269,7 +286,8 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
if (isize > actual_end)
inline_len = min_t(u64, isize, actual_end);
- ret = insert_inline_extent(trans, root, inode, start,
+ ret = insert_inline_extent(trans, path, extent_inserted,
+ root, inode, start,
inline_len, compressed_size,
compress_type, compressed_pages);
if (ret && ret != -ENOSPC) {
@@ -284,6 +302,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
btrfs_delalloc_release_metadata(inode, end + 1 - start);
btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
out:
+ btrfs_free_path(path);
btrfs_end_transaction(trans, root);
return ret;
}
@@ -1262,7 +1281,8 @@ next_slot:
nocow = 1;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
extent_end = found_key.offset +
- btrfs_file_extent_inline_len(leaf, fi);
+ btrfs_file_extent_inline_len(leaf,
+ path->slots[0], fi);
extent_end = ALIGN(extent_end, root->sectorsize);
} else {
BUG_ON(1);
@@ -1577,7 +1597,7 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
unsigned long bio_flags)
{
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
- u64 logical = (u64)bio->bi_sector << 9;
+ u64 logical = (u64)bio->bi_iter.bi_sector << 9;
u64 length = 0;
u64 map_length;
int ret;
@@ -1585,7 +1605,7 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
if (bio_flags & EXTENT_BIO_COMPRESSED)
return 0;
- length = bio->bi_size;
+ length = bio->bi_iter.bi_size;
map_length = length;
ret = btrfs_map_block(root->fs_info, rw, logical,
&map_length, NULL, 0);
@@ -1841,14 +1861,13 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key ins;
+ int extent_inserted = 0;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->leave_spinning = 1;
-
/*
* we may be replacing one extent in the tree with another.
* The new extent is pinned in the extent map, and we don't want
@@ -1858,17 +1877,23 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
* the caller is expected to unpin it and allow it to be merged
* with the others.
*/
- ret = btrfs_drop_extents(trans, root, inode, file_pos,
- file_pos + num_bytes, 0);
+ ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
+ file_pos + num_bytes, NULL, 0,
+ 1, sizeof(*fi), &extent_inserted);
if (ret)
goto out;
- ins.objectid = btrfs_ino(inode);
- ins.offset = file_pos;
- ins.type = BTRFS_EXTENT_DATA_KEY;
- ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
- if (ret)
- goto out;
+ if (!extent_inserted) {
+ ins.objectid = btrfs_ino(inode);
+ ins.offset = file_pos;
+ ins.type = BTRFS_EXTENT_DATA_KEY;
+
+ path->leave_spinning = 1;
+ ret = btrfs_insert_empty_item(trans, root, path, &ins,
+ sizeof(*fi));
+ if (ret)
+ goto out;
+ }
leaf = path->nodes[0];
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
@@ -2290,7 +2315,7 @@ again:
u64 extent_len;
struct btrfs_key found_key;
- ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0)
goto out_free_path;
@@ -2543,12 +2568,6 @@ out_kfree:
return NULL;
}
-/*
- * helper function for btrfs_finish_ordered_io, this
- * just reads in some of the csum leaves to prime them into ram
- * before we start the transaction. It limits the amount of btree
- * reads required while inside the transaction.
- */
/* as ordered data IO finishes, this gets called so we can finish
* an ordered extent if the range of bytes in the file it covers are
* fully written.
@@ -2610,7 +2629,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
EXTENT_DEFRAG, 1, cached_state);
if (ret) {
u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
- if (last_snapshot >= BTRFS_I(inode)->generation)
+ if (0 && last_snapshot >= BTRFS_I(inode)->generation)
/* the inode is shared */
new = record_old_file_extents(inode, ordered_extent);
@@ -3248,7 +3267,8 @@ out:
* slot is the slot the inode is in, objectid is the objectid of the inode
*/
static noinline int acls_after_inode_item(struct extent_buffer *leaf,
- int slot, u64 objectid)
+ int slot, u64 objectid,
+ int *first_xattr_slot)
{
u32 nritems = btrfs_header_nritems(leaf);
struct btrfs_key found_key;
@@ -3264,6 +3284,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
}
slot++;
+ *first_xattr_slot = -1;
while (slot < nritems) {
btrfs_item_key_to_cpu(leaf, &found_key, slot);
@@ -3273,6 +3294,8 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
/* we found an xattr, assume we've got an acl */
if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
+ if (*first_xattr_slot == -1)
+ *first_xattr_slot = slot;
if (found_key.offset == xattr_access ||
found_key.offset == xattr_default)
return 1;
@@ -3301,6 +3324,8 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
* something larger than an xattr. We have to assume the inode
* has acls
*/
+ if (*first_xattr_slot == -1)
+ *first_xattr_slot = slot;
return 1;
}
@@ -3315,10 +3340,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
struct btrfs_timespec *tspec;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key location;
+ unsigned long ptr;
int maybe_acls;
u32 rdev;
int ret;
bool filled = false;
+ int first_xattr_slot;
ret = btrfs_fill_inode(inode, &rdev);
if (!ret)
@@ -3328,7 +3355,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
if (!path)
goto make_bad;
- path->leave_spinning = 1;
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
@@ -3338,7 +3364,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
leaf = path->nodes[0];
if (filled)
- goto cache_acl;
+ goto cache_index;
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
@@ -3381,18 +3407,51 @@ static void btrfs_read_locked_inode(struct inode *inode)
BTRFS_I(inode)->index_cnt = (u64)-1;
BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
+
+cache_index:
+ path->slots[0]++;
+ if (inode->i_nlink != 1 ||
+ path->slots[0] >= btrfs_header_nritems(leaf))
+ goto cache_acl;
+
+ btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
+ if (location.objectid != btrfs_ino(inode))
+ goto cache_acl;
+
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ if (location.type == BTRFS_INODE_REF_KEY) {
+ struct btrfs_inode_ref *ref;
+
+ ref = (struct btrfs_inode_ref *)ptr;
+ BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
+ } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
+ struct btrfs_inode_extref *extref;
+
+ extref = (struct btrfs_inode_extref *)ptr;
+ BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
+ extref);
+ }
cache_acl:
/*
* try to precache a NULL acl entry for files that don't have
* any xattrs or acls
*/
maybe_acls = acls_after_inode_item(leaf, path->slots[0],
- btrfs_ino(inode));
+ btrfs_ino(inode), &first_xattr_slot);
+ if (first_xattr_slot != -1) {
+ path->slots[0] = first_xattr_slot;
+ ret = btrfs_load_inode_props(inode, path);
+ if (ret)
+ btrfs_err(root->fs_info,
+ "error loading props for ino %llu (root %llu): %d\n",
+ btrfs_ino(inode),
+ root->root_key.objectid, ret);
+ }
+ btrfs_free_path(path);
+
if (!maybe_acls)
cache_no_acl(inode);
- btrfs_free_path(path);
-
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_mapping->a_ops = &btrfs_aops;
@@ -3496,7 +3555,6 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
goto failed;
}
- btrfs_unlock_up_safe(path, 1);
leaf = path->nodes[0];
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
@@ -3593,6 +3651,24 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
goto err;
btrfs_release_path(path);
+ /*
+ * If we don't have dir index, we have to get it by looking up
+ * the inode ref, since we get the inode ref, remove it directly,
+ * it is unnecessary to do delayed deletion.
+ *
+ * But if we have dir index, needn't search inode ref to get it.
+ * Since the inode ref is close to the inode item, it is better
+ * that we delay to delete it, and just do this deletion when
+ * we update the inode item.
+ */
+ if (BTRFS_I(inode)->dir_index) {
+ ret = btrfs_delayed_delete_inode_ref(inode);
+ if (!ret) {
+ index = BTRFS_I(inode)->dir_index;
+ goto skip_backref;
+ }
+ }
+
ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
dir_ino, &index);
if (ret) {
@@ -3602,7 +3678,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, root, ret);
goto err;
}
-
+skip_backref:
ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
@@ -3948,7 +4024,7 @@ search_again:
btrfs_file_extent_num_bytes(leaf, fi);
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
item_end += btrfs_file_extent_inline_len(leaf,
- fi);
+ path->slots[0], fi);
}
item_end--;
}
@@ -4018,6 +4094,12 @@ search_again:
inode_sub_bytes(inode, item_end + 1 -
new_size);
}
+
+ /*
+ * update the ram bytes to properly reflect
+ * the new size of our item
+ */
+ btrfs_set_file_extent_ram_bytes(leaf, fi, size);
size =
btrfs_file_extent_calc_inline_size(size);
btrfs_truncate_item(root, path, size, 1);
@@ -4203,6 +4285,49 @@ out:
return ret;
}
+static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
+ u64 offset, u64 len)
+{
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ /*
+ * Still need to make sure the inode looks like it's been updated so
+ * that any holes get logged if we fsync.
+ */
+ if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) {
+ BTRFS_I(inode)->last_trans = root->fs_info->generation;
+ BTRFS_I(inode)->last_sub_trans = root->log_transid;
+ BTRFS_I(inode)->last_log_commit = root->last_log_commit;
+ return 0;
+ }
+
+ /*
+ * 1 - for the one we're dropping
+ * 1 - for the one we're adding
+ * 1 - for updating the inode.
+ */
+ trans = btrfs_start_transaction(root, 3);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ btrfs_end_transaction(trans, root);
+ return ret;
+ }
+
+ ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
+ 0, 0, len, 0, len, 0, 0, 0);
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
+ else
+ btrfs_update_inode(trans, root, inode);
+ btrfs_end_transaction(trans, root);
+ return ret;
+}
+
/*
* This function puts in dummy file extents for the area we're creating a hole
* for. So if we are truncating this file to a larger size we need to insert
@@ -4211,7 +4336,6 @@ out:
*/
int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
{
- struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_map *em = NULL;
@@ -4266,31 +4390,10 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
struct extent_map *hole_em;
hole_size = last_byte - cur_offset;
- trans = btrfs_start_transaction(root, 3);
- if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
- break;
- }
-
- err = btrfs_drop_extents(trans, root, inode,
- cur_offset,
- cur_offset + hole_size, 1);
- if (err) {
- btrfs_abort_transaction(trans, root, err);
- btrfs_end_transaction(trans, root);
- break;
- }
-
- err = btrfs_insert_file_extent(trans, root,
- btrfs_ino(inode), cur_offset, 0,
- 0, hole_size, 0, hole_size,
- 0, 0, 0);
- if (err) {
- btrfs_abort_transaction(trans, root, err);
- btrfs_end_transaction(trans, root);
+ err = maybe_insert_hole(root, inode, cur_offset,
+ hole_size);
+ if (err)
break;
- }
-
btrfs_drop_extent_cache(inode, cur_offset,
cur_offset + hole_size - 1, 0);
hole_em = alloc_extent_map();
@@ -4309,7 +4412,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
hole_em->ram_bytes = hole_size;
hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
hole_em->compress_type = BTRFS_COMPRESS_NONE;
- hole_em->generation = trans->transid;
+ hole_em->generation = root->fs_info->generation;
while (1) {
write_lock(&em_tree->lock);
@@ -4322,17 +4425,14 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
hole_size - 1, 0);
}
free_extent_map(hole_em);
-next:
- btrfs_update_inode(trans, root, inode);
- btrfs_end_transaction(trans, root);
}
+next:
free_extent_map(em);
em = NULL;
cur_offset = last_byte;
if (cur_offset >= block_end)
break;
}
-
free_extent_map(em);
unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
GFP_NOFS);
@@ -4354,8 +4454,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
* these flags set. For all other operations the VFS set these flags
* explicitly if it wants a timestamp update.
*/
- if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
- inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
+ if (newsize != oldsize) {
+ inode_inc_iversion(inode);
+ if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
+ inode->i_ctime = inode->i_mtime =
+ current_fs_time(inode->i_sb);
+ }
if (newsize > oldsize) {
truncate_pagecache(inode, newsize);
@@ -4464,12 +4568,70 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
err = btrfs_dirty_inode(inode);
if (!err && attr->ia_valid & ATTR_MODE)
- err = btrfs_acl_chmod(inode);
+ err = posix_acl_chmod(inode, inode->i_mode);
}
return err;
}
+/*
+ * While truncating the inode pages during eviction, we get the VFS calling
+ * btrfs_invalidatepage() against each page of the inode. This is slow because
+ * the calls to btrfs_invalidatepage() result in a huge amount of calls to
+ * lock_extent_bits() and clear_extent_bit(), which keep merging and splitting
+ * extent_state structures over and over, wasting lots of time.
+ *
+ * Therefore if the inode is being evicted, let btrfs_invalidatepage() skip all
+ * those expensive operations on a per page basis and do only the ordered io
+ * finishing, while we release here the extent_map and extent_state structures,
+ * without the excessive merging and splitting.
+ */
+static void evict_inode_truncate_pages(struct inode *inode)
+{
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
+ struct rb_node *node;
+
+ ASSERT(inode->i_state & I_FREEING);
+ truncate_inode_pages(&inode->i_data, 0);
+
+ write_lock(&map_tree->lock);
+ while (!RB_EMPTY_ROOT(&map_tree->map)) {
+ struct extent_map *em;
+
+ node = rb_first(&map_tree->map);
+ em = rb_entry(node, struct extent_map, rb_node);
+ clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+ clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
+ remove_extent_mapping(map_tree, em);
+ free_extent_map(em);
+ }
+ write_unlock(&map_tree->lock);
+
+ spin_lock(&io_tree->lock);
+ while (!RB_EMPTY_ROOT(&io_tree->state)) {
+ struct extent_state *state;
+ struct extent_state *cached_state = NULL;
+
+ node = rb_first(&io_tree->state);
+ state = rb_entry(node, struct extent_state, rb_node);
+ atomic_inc(&state->refs);
+ spin_unlock(&io_tree->lock);
+
+ lock_extent_bits(io_tree, state->start, state->end,
+ 0, &cached_state);
+ clear_extent_bit(io_tree, state->start, state->end,
+ EXTENT_LOCKED | EXTENT_DIRTY |
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, 1, 1,
+ &cached_state, GFP_NOFS);
+ free_extent_state(state);
+
+ spin_lock(&io_tree->lock);
+ }
+ spin_unlock(&io_tree->lock);
+}
+
void btrfs_evict_inode(struct inode *inode)
{
struct btrfs_trans_handle *trans;
@@ -4480,7 +4642,8 @@ void btrfs_evict_inode(struct inode *inode)
trace_btrfs_inode_evict(inode);
- truncate_inode_pages(&inode->i_data, 0);
+ evict_inode_truncate_pages(inode);
+
if (inode->i_nlink &&
((btrfs_root_refs(&root->root_item) != 0 &&
root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
@@ -4655,9 +4818,9 @@ static int fixup_tree_root_location(struct btrfs_root *root,
}
err = -ENOENT;
- ret = btrfs_find_root_ref(root->fs_info->tree_root, path,
- BTRFS_I(dir)->root->root_key.objectid,
- location->objectid);
+ ret = btrfs_find_item(root->fs_info->tree_root, path,
+ BTRFS_I(dir)->root->root_key.objectid,
+ location->objectid, BTRFS_ROOT_REF_KEY, NULL);
if (ret) {
if (ret < 0)
err = ret;
@@ -4818,7 +4981,9 @@ again:
static int btrfs_init_locked_inode(struct inode *inode, void *p)
{
struct btrfs_iget_args *args = p;
- inode->i_ino = args->ino;
+ inode->i_ino = args->location->objectid;
+ memcpy(&BTRFS_I(inode)->location, args->location,
+ sizeof(*args->location));
BTRFS_I(inode)->root = args->root;
return 0;
}
@@ -4826,19 +4991,19 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
static int btrfs_find_actor(struct inode *inode, void *opaque)
{
struct btrfs_iget_args *args = opaque;
- return args->ino == btrfs_ino(inode) &&
+ return args->location->objectid == BTRFS_I(inode)->location.objectid &&
args->root == BTRFS_I(inode)->root;
}
static struct inode *btrfs_iget_locked(struct super_block *s,
- u64 objectid,
+ struct btrfs_key *location,
struct btrfs_root *root)
{
struct inode *inode;
struct btrfs_iget_args args;
- unsigned long hashval = btrfs_inode_hash(objectid, root);
+ unsigned long hashval = btrfs_inode_hash(location->objectid, root);
- args.ino = objectid;
+ args.location = location;
args.root = root;
inode = iget5_locked(s, hashval, btrfs_find_actor,
@@ -4855,13 +5020,11 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
{
struct inode *inode;
- inode = btrfs_iget_locked(s, location->objectid, root);
+ inode = btrfs_iget_locked(s, location, root);
if (!inode)
return ERR_PTR(-ENOMEM);
if (inode->i_state & I_NEW) {
- BTRFS_I(inode)->root = root;
- memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
btrfs_read_locked_inode(inode);
if (!is_bad_inode(inode)) {
inode_tree_add(inode);
@@ -4917,7 +5080,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
return ERR_PTR(ret);
if (location.objectid == 0)
- return NULL;
+ return ERR_PTR(-ENOENT);
if (location.type == BTRFS_INODE_ITEM_KEY) {
inode = btrfs_iget(dir->i_sb, &location, root, NULL);
@@ -4981,10 +5144,17 @@ static void btrfs_dentry_release(struct dentry *dentry)
static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
- struct dentry *ret;
+ struct inode *inode;
- ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
- return ret;
+ inode = btrfs_lookup_dentry(dir, dentry);
+ if (IS_ERR(inode)) {
+ if (PTR_ERR(inode) == -ENOENT)
+ inode = NULL;
+ else
+ return ERR_CAST(inode);
+ }
+
+ return d_materialise_unique(dentry, inode);
}
unsigned char btrfs_filetype_table[] = {
@@ -5354,7 +5524,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
u32 sizes[2];
unsigned long ptr;
int ret;
- int owner;
path = btrfs_alloc_path();
if (!path)
@@ -5388,6 +5557,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
* number
*/
BTRFS_I(inode)->index_cnt = 2;
+ BTRFS_I(inode)->dir_index = *index;
BTRFS_I(inode)->root = root;
BTRFS_I(inode)->generation = trans->transid;
inode->i_generation = BTRFS_I(inode)->generation;
@@ -5400,11 +5570,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
*/
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
- if (S_ISDIR(mode))
- owner = 0;
- else
- owner = 1;
-
key[0].objectid = objectid;
btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
key[0].offset = 0;
@@ -5469,6 +5634,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
btrfs_update_root_times(trans, root);
+ ret = btrfs_inode_inherit_props(trans, inode, dir);
+ if (ret)
+ btrfs_err(root->fs_info,
+ "error inheriting props for ino %llu (root %llu): %d",
+ btrfs_ino(inode), root->root_key.objectid, ret);
+
return inode;
fail:
if (dir)
@@ -5737,6 +5908,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail;
}
+ /* There are several dir indexes for this inode, clear the cache. */
+ BTRFS_I(inode)->dir_index = 0ULL;
inc_nlink(inode);
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
@@ -6000,7 +6173,7 @@ again:
btrfs_file_extent_num_bytes(leaf, item);
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
size_t size;
- size = btrfs_file_extent_inline_len(leaf, item);
+ size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
extent_end = ALIGN(extent_start + size, root->sectorsize);
}
next:
@@ -6069,7 +6242,7 @@ next:
goto out;
}
- size = btrfs_file_extent_inline_len(leaf, item);
+ size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
extent_offset = page_offset(page) + pg_offset - extent_start;
copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
size - extent_offset);
@@ -6386,6 +6559,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
int slot;
int found_type;
bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -6429,6 +6603,10 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
goto out;
+ extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
+ if (extent_end <= offset)
+ goto out;
+
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
if (disk_bytenr == 0)
goto out;
@@ -6446,8 +6624,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
*ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
}
- extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
-
if (btrfs_extent_readonly(root, disk_bytenr))
goto out;
btrfs_release_path(path);
@@ -6779,17 +6955,16 @@ unlock_err:
static void btrfs_endio_direct_read(struct bio *bio, int err)
{
struct btrfs_dio_private *dip = bio->bi_private;
- struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct bio_vec *bvec = bio->bi_io_vec;
+ struct bio_vec *bvec;
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct bio *dio_bio;
u32 *csums = (u32 *)dip->csum;
- int index = 0;
u64 start;
+ int i;
start = dip->logical_offset;
- do {
+ bio_for_each_segment_all(bvec, bio, i) {
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
struct page *page = bvec->bv_page;
char *kaddr;
@@ -6805,18 +6980,16 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
local_irq_restore(flags);
flush_dcache_page(bvec->bv_page);
- if (csum != csums[index]) {
+ if (csum != csums[i]) {
btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
btrfs_ino(inode), start, csum,
- csums[index]);
+ csums[i]);
err = -EIO;
}
}
start += bvec->bv_len;
- bvec++;
- index++;
- } while (bvec <= bvec_end);
+ }
unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
dip->logical_offset + dip->bytes - 1);
@@ -6894,10 +7067,11 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
struct btrfs_dio_private *dip = bio->bi_private;
if (err) {
- printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
- "sector %#Lx len %u err no %d\n",
+ btrfs_err(BTRFS_I(dip->inode)->root->fs_info,
+ "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
btrfs_ino(dip->inode), bio->bi_rw,
- (unsigned long long)bio->bi_sector, bio->bi_size, err);
+ (unsigned long long)bio->bi_iter.bi_sector,
+ bio->bi_iter.bi_size, err);
dip->errors = 1;
/*
@@ -6988,7 +7162,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
struct bio *bio;
struct bio *orig_bio = dip->orig_bio;
struct bio_vec *bvec = orig_bio->bi_io_vec;
- u64 start_sector = orig_bio->bi_sector;
+ u64 start_sector = orig_bio->bi_iter.bi_sector;
u64 file_offset = dip->logical_offset;
u64 submit_len = 0;
u64 map_length;
@@ -6996,7 +7170,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
int ret = 0;
int async_submit = 0;
- map_length = orig_bio->bi_size;
+ map_length = orig_bio->bi_iter.bi_size;
ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
&map_length, NULL, 0);
if (ret) {
@@ -7004,7 +7178,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
return -EIO;
}
- if (map_length >= orig_bio->bi_size) {
+ if (map_length >= orig_bio->bi_iter.bi_size) {
bio = orig_bio;
goto submit;
}
@@ -7056,7 +7230,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
- map_length = orig_bio->bi_size;
+ map_length = orig_bio->bi_iter.bi_size;
ret = btrfs_map_block(root->fs_info, rw,
start_sector << 9,
&map_length, NULL, 0);
@@ -7114,7 +7288,8 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
if (!skip_sum && !write) {
csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
- sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits;
+ sum_len = dio_bio->bi_iter.bi_size >>
+ inode->i_sb->s_blocksize_bits;
sum_len *= csum_size;
} else {
sum_len = 0;
@@ -7129,8 +7304,8 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
dip->private = dio_bio->bi_private;
dip->inode = inode;
dip->logical_offset = file_offset;
- dip->bytes = dio_bio->bi_size;
- dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
+ dip->bytes = dio_bio->bi_iter.bi_size;
+ dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
io_bio->bi_private = dip;
dip->errors = 0;
dip->orig_bio = io_bio;
@@ -7367,6 +7542,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
struct extent_state *cached_state = NULL;
u64 page_start = page_offset(page);
u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+ int inode_evicting = inode->i_state & I_FREEING;
/*
* we have the page locked, so new writeback can't start,
@@ -7382,17 +7558,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
btrfs_releasepage(page, GFP_NOFS);
return;
}
- lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
- ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
+
+ if (!inode_evicting)
+ lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+ ordered = btrfs_lookup_ordered_extent(inode, page_start);
if (ordered) {
/*
* IO on this page will never be started, so we need
* to account for any ordered extents now
*/
- clear_extent_bit(tree, page_start, page_end,
- EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
- EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS);
+ if (!inode_evicting)
+ clear_extent_bit(tree, page_start, page_end,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, 1, 0, &cached_state,
+ GFP_NOFS);
/*
* whoever cleared the private bit is responsible
* for the finish_ordered_io
@@ -7416,14 +7596,22 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
btrfs_finish_ordered_io(ordered);
}
btrfs_put_ordered_extent(ordered);
- cached_state = NULL;
- lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+ if (!inode_evicting) {
+ cached_state = NULL;
+ lock_extent_bits(tree, page_start, page_end, 0,
+ &cached_state);
+ }
+ }
+
+ if (!inode_evicting) {
+ clear_extent_bit(tree, page_start, page_end,
+ EXTENT_LOCKED | EXTENT_DIRTY |
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, 1, 1,
+ &cached_state, GFP_NOFS);
+
+ __btrfs_releasepage(page, GFP_NOFS);
}
- clear_extent_bit(tree, page_start, page_end,
- EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
- &cached_state, GFP_NOFS);
- __btrfs_releasepage(page, GFP_NOFS);
ClearPageChecked(page);
if (PagePrivate(page)) {
@@ -7733,7 +7921,9 @@ out:
* create a new subvolume directory/inode (helper for the ioctl).
*/
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *new_root, u64 new_dirid)
+ struct btrfs_root *new_root,
+ struct btrfs_root *parent_root,
+ u64 new_dirid)
{
struct inode *inode;
int err;
@@ -7751,6 +7941,12 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
set_nlink(inode, 1);
btrfs_i_size_write(inode, 0);
+ err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
+ if (err)
+ btrfs_err(new_root->fs_info,
+ "error inheriting subvolume %llu properties: %d\n",
+ new_root->root_key.objectid, err);
+
err = btrfs_update_inode(trans, new_root, inode);
iput(inode);
@@ -7776,6 +7972,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->flags = 0;
ei->csum_bytes = 0;
ei->index_cnt = (u64)-1;
+ ei->dir_index = 0;
ei->last_unlink_trans = 0;
ei->last_log_commit = 0;
@@ -8063,6 +8260,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (ret)
goto out_fail;
+ BTRFS_I(old_inode)->dir_index = 0ULL;
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
/* force full log commit if subvolume involved. */
root->fs_info->last_trans_log_full_commit = trans->transid;
@@ -8151,6 +8349,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out_fail;
}
+ if (old_inode->i_nlink == 1)
+ BTRFS_I(old_inode)->dir_index = index;
+
if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
struct dentry *parent = new_dentry->d_parent;
btrfs_log_new_name(trans, old_inode, old_dir, parent);
@@ -8286,7 +8487,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
{
int ret;
- if (root->fs_info->sb->s_flags & MS_RDONLY)
+ if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
return -EROFS;
ret = __start_delalloc_inodes(root, delay_iput);
@@ -8312,7 +8513,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput)
struct list_head splice;
int ret;
- if (fs_info->sb->s_flags & MS_RDONLY)
+ if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
return -EROFS;
INIT_LIST_HEAD(&splice);
@@ -8649,12 +8850,14 @@ static const struct inode_operations btrfs_dir_inode_operations = {
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
.get_acl = btrfs_get_acl,
+ .set_acl = btrfs_set_acl,
.update_time = btrfs_update_time,
};
static const struct inode_operations btrfs_dir_ro_inode_operations = {
.lookup = btrfs_lookup,
.permission = btrfs_permission,
.get_acl = btrfs_get_acl,
+ .set_acl = btrfs_set_acl,
.update_time = btrfs_update_time,
};
@@ -8724,6 +8927,7 @@ static const struct inode_operations btrfs_file_inode_operations = {
.permission = btrfs_permission,
.fiemap = btrfs_fiemap,
.get_acl = btrfs_get_acl,
+ .set_acl = btrfs_set_acl,
.update_time = btrfs_update_time,
};
static const struct inode_operations btrfs_special_inode_operations = {
@@ -8735,6 +8939,7 @@ static const struct inode_operations btrfs_special_inode_operations = {
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.get_acl = btrfs_get_acl,
+ .set_acl = btrfs_set_acl,
.update_time = btrfs_update_time,
};
static const struct inode_operations btrfs_symlink_inode_operations = {
@@ -8748,7 +8953,6 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
.getxattr = btrfs_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
- .get_acl = btrfs_get_acl,
.update_time = btrfs_update_time,
};
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 21da5762b0b1..a6d8efa46bfe 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -56,6 +56,8 @@
#include "rcu-string.h"
#include "send.h"
#include "dev-replace.h"
+#include "props.h"
+#include "sysfs.h"
static int btrfs_clone(struct inode *src, struct inode *inode,
u64 off, u64 olen, u64 olen_aligned, u64 destoff);
@@ -190,6 +192,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
unsigned int i_oldflags;
umode_t mode;
+ if (!inode_owner_or_capable(inode))
+ return -EPERM;
+
if (btrfs_root_readonly(root))
return -EROFS;
@@ -200,9 +205,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
if (ret)
return ret;
- if (!inode_owner_or_capable(inode))
- return -EACCES;
-
ret = mnt_want_write_file(file);
if (ret)
return ret;
@@ -280,9 +282,25 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
if (flags & FS_NOCOMP_FL) {
ip->flags &= ~BTRFS_INODE_COMPRESS;
ip->flags |= BTRFS_INODE_NOCOMPRESS;
+
+ ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
+ if (ret && ret != -ENODATA)
+ goto out_drop;
} else if (flags & FS_COMPR_FL) {
+ const char *comp;
+
ip->flags |= BTRFS_INODE_COMPRESS;
ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
+
+ if (root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
+ comp = "lzo";
+ else
+ comp = "zlib";
+ ret = btrfs_set_prop(inode, "btrfs.compression",
+ comp, strlen(comp), 0);
+ if (ret)
+ goto out_drop;
+
} else {
ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
}
@@ -392,6 +410,7 @@ static noinline int create_subvol(struct inode *dir,
struct btrfs_root *new_root;
struct btrfs_block_rsv block_rsv;
struct timespec cur_time = CURRENT_TIME;
+ struct inode *inode;
int ret;
int err;
u64 objectid;
@@ -417,7 +436,9 @@ static noinline int create_subvol(struct inode *dir,
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- goto out;
+ btrfs_subvolume_release_metadata(root, &block_rsv,
+ qgroup_reserved);
+ return ret;
}
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
@@ -500,7 +521,7 @@ static noinline int create_subvol(struct inode *dir,
btrfs_record_root_in_trans(trans, new_root);
- ret = btrfs_create_subvol_root(trans, new_root, new_dirid);
+ ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid);
if (ret) {
/* We potentially lose an unused inode item here */
btrfs_abort_transaction(trans, root, ret);
@@ -542,6 +563,8 @@ static noinline int create_subvol(struct inode *dir,
fail:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
+ btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
+
if (async_transid) {
*async_transid = trans->transid;
err = btrfs_commit_transaction_async(trans, root, 1);
@@ -553,10 +576,12 @@ fail:
if (err && !ret)
ret = err;
- if (!ret)
- d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
-out:
- btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
+ if (!ret) {
+ inode = btrfs_lookup_dentry(dir, dentry);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+ d_instantiate(dentry, inode);
+ }
return ret;
}
@@ -642,7 +667,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
ret = PTR_ERR(inode);
goto fail;
}
- BUG_ON(!inode);
+
d_instantiate(dentry, inode);
ret = 0;
fail:
@@ -1011,7 +1036,7 @@ out:
static int cluster_pages_for_defrag(struct inode *inode,
struct page **pages,
unsigned long start_index,
- int num_pages)
+ unsigned long num_pages)
{
unsigned long file_end;
u64 isize = i_size_read(inode);
@@ -1169,8 +1194,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
int defrag_count = 0;
int compress_type = BTRFS_COMPRESS_ZLIB;
int extent_thresh = range->extent_thresh;
- int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
- int cluster = max_cluster;
+ unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
+ unsigned long cluster = max_cluster;
u64 new_align = ~((u64)128 * 1024 - 1);
struct page **pages = NULL;
@@ -1254,7 +1279,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
break;
if (btrfs_defrag_cancelled(root->fs_info)) {
- printk(KERN_DEBUG "btrfs: defrag_file cancelled\n");
+ printk(KERN_DEBUG "BTRFS: defrag_file cancelled\n");
ret = -EAGAIN;
break;
}
@@ -1416,20 +1441,20 @@ static noinline int btrfs_ioctl_resize(struct file *file,
ret = -EINVAL;
goto out_free;
}
- printk(KERN_INFO "btrfs: resizing devid %llu\n", devid);
+ btrfs_info(root->fs_info, "resizing devid %llu", devid);
}
device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
if (!device) {
- printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
+ btrfs_info(root->fs_info, "resizer unable to find device %llu",
devid);
ret = -ENODEV;
goto out_free;
}
if (!device->writeable) {
- printk(KERN_INFO "btrfs: resizer unable to apply on "
- "readonly device %llu\n",
+ btrfs_info(root->fs_info,
+ "resizer unable to apply on readonly device %llu",
devid);
ret = -EPERM;
goto out_free;
@@ -1466,6 +1491,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
}
new_size = old_size - new_size;
} else if (mod > 0) {
+ if (new_size > ULLONG_MAX - old_size) {
+ ret = -EINVAL;
+ goto out_free;
+ }
new_size = old_size + new_size;
}
@@ -1481,7 +1510,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
do_div(new_size, root->sectorsize);
new_size *= root->sectorsize;
- printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
+ printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n",
rcu_str_deref(device->name), new_size);
if (new_size > old_size) {
@@ -1542,9 +1571,15 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
src_inode = file_inode(src.file);
if (src_inode->i_sb != file_inode(file)->i_sb) {
- printk(KERN_INFO "btrfs: Snapshot src from "
- "another FS\n");
+ btrfs_info(BTRFS_I(src_inode)->root->fs_info,
+ "Snapshot src from another FS");
ret = -EINVAL;
+ } else if (!inode_owner_or_capable(src_inode)) {
+ /*
+ * Subvolume creation is not restricted, but snapshots
+ * are limited to own subvolumes only
+ */
+ ret = -EPERM;
} else {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
@@ -1662,6 +1697,9 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
u64 flags;
int ret = 0;
+ if (!inode_owner_or_capable(inode))
+ return -EPERM;
+
ret = mnt_want_write_file(file);
if (ret)
goto out;
@@ -1686,11 +1724,6 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
goto out_drop_write;
}
- if (!inode_owner_or_capable(inode)) {
- ret = -EACCES;
- goto out_drop_write;
- }
-
down_write(&root->fs_info->subvol_sem);
/* nothing to do */
@@ -1698,12 +1731,28 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
goto out_drop_sem;
root_flags = btrfs_root_flags(&root->root_item);
- if (flags & BTRFS_SUBVOL_RDONLY)
+ if (flags & BTRFS_SUBVOL_RDONLY) {
btrfs_set_root_flags(&root->root_item,
root_flags | BTRFS_ROOT_SUBVOL_RDONLY);
- else
- btrfs_set_root_flags(&root->root_item,
+ } else {
+ /*
+ * Block RO -> RW transition if this subvolume is involved in
+ * send
+ */
+ spin_lock(&root->root_item_lock);
+ if (root->send_in_progress == 0) {
+ btrfs_set_root_flags(&root->root_item,
root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
+ spin_unlock(&root->root_item_lock);
+ } else {
+ spin_unlock(&root->root_item_lock);
+ btrfs_warn(root->fs_info,
+ "Attempt to set subvolume %llu read-write during send",
+ root->root_key.objectid);
+ ret = -EPERM;
+ goto out_drop_sem;
+ }
+ }
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
@@ -1910,7 +1959,7 @@ static noinline int search_ioctl(struct inode *inode,
key.offset = (u64)-1;
root = btrfs_read_fs_root_no_name(info, &key);
if (IS_ERR(root)) {
- printk(KERN_ERR "could not find root %llu\n",
+ printk(KERN_ERR "BTRFS: could not find root %llu\n",
sk->tree_id);
btrfs_free_path(path);
return -ENOENT;
@@ -2000,7 +2049,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
key.offset = (u64)-1;
root = btrfs_read_fs_root_no_name(info, &key);
if (IS_ERR(root)) {
- printk(KERN_ERR "could not find root %llu\n", tree_id);
+ printk(KERN_ERR "BTRFS: could not find root %llu\n", tree_id);
ret = -ENOENT;
goto out;
}
@@ -2686,14 +2735,11 @@ out_unlock:
#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
static long btrfs_ioctl_file_extent_same(struct file *file,
- void __user *argp)
+ struct btrfs_ioctl_same_args __user *argp)
{
- struct btrfs_ioctl_same_args tmp;
struct btrfs_ioctl_same_args *same;
struct btrfs_ioctl_same_extent_info *info;
- struct inode *src = file->f_dentry->d_inode;
- struct file *dst_file = NULL;
- struct inode *dst;
+ struct inode *src = file_inode(file);
u64 off;
u64 len;
int i;
@@ -2701,6 +2747,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
unsigned long size;
u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
bool is_admin = capable(CAP_SYS_ADMIN);
+ u16 count;
if (!(file->f_mode & FMODE_READ))
return -EINVAL;
@@ -2709,17 +2756,14 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
if (ret)
return ret;
- if (copy_from_user(&tmp,
- (struct btrfs_ioctl_same_args __user *)argp,
- sizeof(tmp))) {
+ if (get_user(count, &argp->dest_count)) {
ret = -EFAULT;
goto out;
}
- size = sizeof(tmp) +
- tmp.dest_count * sizeof(struct btrfs_ioctl_same_extent_info);
+ size = offsetof(struct btrfs_ioctl_same_args __user, info[count]);
- same = memdup_user((struct btrfs_ioctl_same_args __user *)argp, size);
+ same = memdup_user(argp, size);
if (IS_ERR(same)) {
ret = PTR_ERR(same);
@@ -2756,52 +2800,35 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
goto out;
/* pre-format output fields to sane values */
- for (i = 0; i < same->dest_count; i++) {
+ for (i = 0; i < count; i++) {
same->info[i].bytes_deduped = 0ULL;
same->info[i].status = 0;
}
- ret = 0;
- for (i = 0; i < same->dest_count; i++) {
- info = &same->info[i];
-
- dst_file = fget(info->fd);
- if (!dst_file) {
+ for (i = 0, info = same->info; i < count; i++, info++) {
+ struct inode *dst;
+ struct fd dst_file = fdget(info->fd);
+ if (!dst_file.file) {
info->status = -EBADF;
- goto next;
+ continue;
}
+ dst = file_inode(dst_file.file);
- if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
+ if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) {
info->status = -EINVAL;
- goto next;
- }
-
- info->status = -EXDEV;
- if (file->f_path.mnt != dst_file->f_path.mnt)
- goto next;
-
- dst = dst_file->f_dentry->d_inode;
- if (src->i_sb != dst->i_sb)
- goto next;
-
- if (S_ISDIR(dst->i_mode)) {
+ } else if (file->f_path.mnt != dst_file.file->f_path.mnt) {
+ info->status = -EXDEV;
+ } else if (S_ISDIR(dst->i_mode)) {
info->status = -EISDIR;
- goto next;
- }
-
- if (!S_ISREG(dst->i_mode)) {
+ } else if (!S_ISREG(dst->i_mode)) {
info->status = -EACCES;
- goto next;
+ } else {
+ info->status = btrfs_extent_same(src, off, len, dst,
+ info->logical_offset);
+ if (info->status == 0)
+ info->bytes_deduped += len;
}
-
- info->status = btrfs_extent_same(src, off, len, dst,
- info->logical_offset);
- if (info->status == 0)
- info->bytes_deduped += len;
-
-next:
- if (dst_file)
- fput(dst_file);
+ fdput(dst_file);
}
ret = copy_to_user(argp, same, size);
@@ -2860,12 +2887,14 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
* note the key will change type as we walk through the
* tree.
*/
+ path->leave_spinning = 1;
ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
0, 0);
if (ret < 0)
goto out;
nritems = btrfs_header_nritems(path->nodes[0]);
+process_slot:
if (path->slots[0] >= nritems) {
ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
if (ret < 0)
@@ -2892,11 +2921,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
u8 comp;
u64 endoff;
- size = btrfs_item_size_nr(leaf, slot);
- read_extent_buffer(leaf, buf,
- btrfs_item_ptr_offset(leaf, slot),
- size);
-
extent = btrfs_item_ptr(leaf, slot,
struct btrfs_file_extent_item);
comp = btrfs_file_extent_compression(leaf, extent);
@@ -2915,11 +2939,20 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
datal = btrfs_file_extent_ram_bytes(leaf,
extent);
}
- btrfs_release_path(path);
if (key.offset + datal <= off ||
- key.offset >= off + len - 1)
- goto next;
+ key.offset >= off + len - 1) {
+ path->slots[0]++;
+ goto process_slot;
+ }
+
+ size = btrfs_item_size_nr(leaf, slot);
+ read_extent_buffer(leaf, buf,
+ btrfs_item_ptr_offset(leaf, slot),
+ size);
+
+ btrfs_release_path(path);
+ path->leave_spinning = 0;
memcpy(&new_key, &key, sizeof(new_key));
new_key.objectid = btrfs_ino(inode);
@@ -3090,7 +3123,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
}
ret = btrfs_end_transaction(trans, root);
}
-next:
btrfs_release_path(path);
key.offset++;
}
@@ -3218,9 +3250,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
out_unlock:
- mutex_unlock(&src->i_mutex);
- if (!same_inode)
- mutex_unlock(&inode->i_mutex);
+ if (!same_inode) {
+ if (inode < src) {
+ mutex_unlock(&src->i_mutex);
+ mutex_unlock(&inode->i_mutex);
+ } else {
+ mutex_unlock(&inode->i_mutex);
+ mutex_unlock(&src->i_mutex);
+ }
+ } else {
+ mutex_unlock(&src->i_mutex);
+ }
out_fput:
fdput(src_file);
out_drop_write:
@@ -3343,8 +3383,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
if (IS_ERR_OR_NULL(di)) {
btrfs_free_path(path);
btrfs_end_transaction(trans, root);
- printk(KERN_ERR "Umm, you don't have the default dir item, "
- "this isn't going to work\n");
+ btrfs_err(new_root->fs_info, "Umm, you don't have the default dir"
+ "item, this isn't going to work");
ret = -ENOENT;
goto out;
}
@@ -4325,6 +4365,9 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
int ret = 0;
int received_uuid_changed;
+ if (!inode_owner_or_capable(inode))
+ return -EPERM;
+
ret = mnt_want_write_file(file);
if (ret < 0)
return ret;
@@ -4341,11 +4384,6 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
goto out;
}
- if (!inode_owner_or_capable(inode)) {
- ret = -EACCES;
- goto out;
- }
-
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa)) {
ret = PTR_ERR(sa);
@@ -4431,8 +4469,8 @@ static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
len = strnlen(label, BTRFS_LABEL_SIZE);
if (len == BTRFS_LABEL_SIZE) {
- pr_warn("btrfs: label is too long, return the first %zu bytes\n",
- --len);
+ btrfs_warn(root->fs_info,
+ "label is too long, return the first %zu bytes", --len);
}
ret = copy_to_user(arg, label, len);
@@ -4455,7 +4493,7 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
return -EFAULT;
if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) {
- pr_err("btrfs: unable to set label with more than %d bytes\n",
+ btrfs_err(root->fs_info, "unable to set label with more than %d bytes",
BTRFS_LABEL_SIZE - 1);
return -EINVAL;
}
@@ -4473,13 +4511,173 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
spin_lock(&root->fs_info->super_lock);
strcpy(super_block->label, label);
spin_unlock(&root->fs_info->super_lock);
- ret = btrfs_end_transaction(trans, root);
+ ret = btrfs_commit_transaction(trans, root);
out_unlock:
mnt_drop_write_file(file);
return ret;
}
+#define INIT_FEATURE_FLAGS(suffix) \
+ { .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \
+ .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \
+ .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix }
+
+static int btrfs_ioctl_get_supported_features(struct file *file,
+ void __user *arg)
+{
+ static struct btrfs_ioctl_feature_flags features[3] = {
+ INIT_FEATURE_FLAGS(SUPP),
+ INIT_FEATURE_FLAGS(SAFE_SET),
+ INIT_FEATURE_FLAGS(SAFE_CLEAR)
+ };
+
+ if (copy_to_user(arg, &features, sizeof(features)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int btrfs_ioctl_get_features(struct file *file, void __user *arg)
+{
+ struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct btrfs_super_block *super_block = root->fs_info->super_copy;
+ struct btrfs_ioctl_feature_flags features;
+
+ features.compat_flags = btrfs_super_compat_flags(super_block);
+ features.compat_ro_flags = btrfs_super_compat_ro_flags(super_block);
+ features.incompat_flags = btrfs_super_incompat_flags(super_block);
+
+ if (copy_to_user(arg, &features, sizeof(features)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int check_feature_bits(struct btrfs_root *root,
+ enum btrfs_feature_set set,
+ u64 change_mask, u64 flags, u64 supported_flags,
+ u64 safe_set, u64 safe_clear)
+{
+ const char *type = btrfs_feature_set_names[set];
+ char *names;
+ u64 disallowed, unsupported;
+ u64 set_mask = flags & change_mask;
+ u64 clear_mask = ~flags & change_mask;
+
+ unsupported = set_mask & ~supported_flags;
+ if (unsupported) {
+ names = btrfs_printable_features(set, unsupported);
+ if (names) {
+ btrfs_warn(root->fs_info,
+ "this kernel does not support the %s feature bit%s",
+ names, strchr(names, ',') ? "s" : "");
+ kfree(names);
+ } else
+ btrfs_warn(root->fs_info,
+ "this kernel does not support %s bits 0x%llx",
+ type, unsupported);
+ return -EOPNOTSUPP;
+ }
+
+ disallowed = set_mask & ~safe_set;
+ if (disallowed) {
+ names = btrfs_printable_features(set, disallowed);
+ if (names) {
+ btrfs_warn(root->fs_info,
+ "can't set the %s feature bit%s while mounted",
+ names, strchr(names, ',') ? "s" : "");
+ kfree(names);
+ } else
+ btrfs_warn(root->fs_info,
+ "can't set %s bits 0x%llx while mounted",
+ type, disallowed);
+ return -EPERM;
+ }
+
+ disallowed = clear_mask & ~safe_clear;
+ if (disallowed) {
+ names = btrfs_printable_features(set, disallowed);
+ if (names) {
+ btrfs_warn(root->fs_info,
+ "can't clear the %s feature bit%s while mounted",
+ names, strchr(names, ',') ? "s" : "");
+ kfree(names);
+ } else
+ btrfs_warn(root->fs_info,
+ "can't clear %s bits 0x%llx while mounted",
+ type, disallowed);
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+#define check_feature(root, change_mask, flags, mask_base) \
+check_feature_bits(root, FEAT_##mask_base, change_mask, flags, \
+ BTRFS_FEATURE_ ## mask_base ## _SUPP, \
+ BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \
+ BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR)
+
+static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
+{
+ struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
+ struct btrfs_super_block *super_block = root->fs_info->super_copy;
+ struct btrfs_ioctl_feature_flags flags[2];
+ struct btrfs_trans_handle *trans;
+ u64 newflags;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(flags, arg, sizeof(flags)))
+ return -EFAULT;
+
+ /* Nothing to do */
+ if (!flags[0].compat_flags && !flags[0].compat_ro_flags &&
+ !flags[0].incompat_flags)
+ return 0;
+
+ ret = check_feature(root, flags[0].compat_flags,
+ flags[1].compat_flags, COMPAT);
+ if (ret)
+ return ret;
+
+ ret = check_feature(root, flags[0].compat_ro_flags,
+ flags[1].compat_ro_flags, COMPAT_RO);
+ if (ret)
+ return ret;
+
+ ret = check_feature(root, flags[0].incompat_flags,
+ flags[1].incompat_flags, INCOMPAT);
+ if (ret)
+ return ret;
+
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ spin_lock(&root->fs_info->super_lock);
+ newflags = btrfs_super_compat_flags(super_block);
+ newflags |= flags[0].compat_flags & flags[1].compat_flags;
+ newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags);
+ btrfs_set_super_compat_flags(super_block, newflags);
+
+ newflags = btrfs_super_compat_ro_flags(super_block);
+ newflags |= flags[0].compat_ro_flags & flags[1].compat_ro_flags;
+ newflags &= ~(flags[0].compat_ro_flags & ~flags[1].compat_ro_flags);
+ btrfs_set_super_compat_ro_flags(super_block, newflags);
+
+ newflags = btrfs_super_incompat_flags(super_block);
+ newflags |= flags[0].incompat_flags & flags[1].incompat_flags;
+ newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags);
+ btrfs_set_super_incompat_flags(super_block, newflags);
+ spin_unlock(&root->fs_info->super_lock);
+
+ return btrfs_commit_transaction(trans, root);
+}
+
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
@@ -4598,6 +4796,12 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_set_fslabel(file, argp);
case BTRFS_IOC_FILE_EXTENT_SAME:
return btrfs_ioctl_file_extent_same(file, argp);
+ case BTRFS_IOC_GET_SUPPORTED_FEATURES:
+ return btrfs_ioctl_get_supported_features(file, argp);
+ case BTRFS_IOC_GET_FEATURES:
+ return btrfs_ioctl_get_features(file, argp);
+ case BTRFS_IOC_SET_FEATURES:
+ return btrfs_ioctl_set_features(file, argp);
}
return -ENOTTY;
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index b6a6f07c5ce2..b47f669aca75 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -141,7 +141,7 @@ static int lzo_compress_pages(struct list_head *ws,
ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
&out_len, workspace->mem);
if (ret != LZO_E_OK) {
- printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
+ printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n",
ret);
ret = -1;
goto out;
@@ -357,7 +357,7 @@ cont:
if (need_unmap)
kunmap(pages_in[page_in_index - 1]);
if (ret != LZO_E_OK) {
- printk(KERN_WARNING "btrfs decompress failed\n");
+ printk(KERN_WARNING "BTRFS: decompress failed\n");
ret = -1;
break;
}
@@ -401,7 +401,7 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
out_len = PAGE_CACHE_SIZE;
ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
if (ret != LZO_E_OK) {
- printk(KERN_WARNING "btrfs decompress failed!\n");
+ printk(KERN_WARNING "BTRFS: decompress failed!\n");
ret = -1;
goto out;
}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 69582d5b69d1..b16450b840e7 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -336,13 +336,14 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
entry->len);
*file_offset = dec_end;
if (dec_start > dec_end) {
- printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
- dec_start, dec_end);
+ btrfs_crit(BTRFS_I(inode)->root->fs_info,
+ "bad ordering dec_start %llu end %llu", dec_start, dec_end);
}
to_dec = dec_end - dec_start;
if (to_dec > entry->bytes_left) {
- printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
- entry->bytes_left, to_dec);
+ btrfs_crit(BTRFS_I(inode)->root->fs_info,
+ "bad ordered accounting left %llu size %llu",
+ entry->bytes_left, to_dec);
}
entry->bytes_left -= to_dec;
if (!uptodate)
@@ -401,7 +402,8 @@ have_entry:
}
if (io_size > entry->bytes_left) {
- printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
+ btrfs_crit(BTRFS_I(inode)->root->fs_info,
+ "bad ordered accounting left %llu size %llu",
entry->bytes_left, io_size);
}
entry->bytes_left -= io_size;
@@ -520,7 +522,8 @@ void btrfs_remove_ordered_extent(struct inode *inode,
spin_lock_irq(&tree->lock);
node = &entry->rb_node;
rb_erase(node, &tree->tree);
- tree->last = NULL;
+ if (tree->last == node)
+ tree->last = NULL;
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
spin_unlock_irq(&tree->lock);
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index 24cad1695af7..65793edb38ca 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -69,23 +69,3 @@ out:
btrfs_free_path(path);
return ret;
}
-
-int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset)
-{
- struct btrfs_path *path;
- struct btrfs_key key;
- int ret;
-
- key.objectid = BTRFS_ORPHAN_OBJECTID;
- key.type = BTRFS_ORPHAN_ITEM_KEY;
- key.offset = offset;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-
- btrfs_free_path(path);
- return ret;
-}
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 417053b17181..6efd70d3b64f 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -154,7 +154,7 @@ static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
u32 item_size)
{
if (!IS_ALIGNED(item_size, sizeof(u64))) {
- pr_warn("btrfs: uuid item with illegal size %lu!\n",
+ pr_warn("BTRFS: uuid item with illegal size %lu!\n",
(unsigned long)item_size);
return;
}
@@ -249,7 +249,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
BTRFS_FILE_EXTENT_INLINE) {
printk(KERN_INFO "\t\tinline extent data "
"size %u\n",
- btrfs_file_extent_inline_len(l, fi));
+ btrfs_file_extent_inline_len(l, i, fi));
break;
}
printk(KERN_INFO "\t\textent data disk bytenr %llu "
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
new file mode 100644
index 000000000000..129b1dd28527
--- /dev/null
+++ b/fs/btrfs/props.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/hashtable.h>
+#include "props.h"
+#include "btrfs_inode.h"
+#include "hash.h"
+#include "transaction.h"
+#include "xattr.h"
+
+#define BTRFS_PROP_HANDLERS_HT_BITS 8
+static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS);
+
+struct prop_handler {
+ struct hlist_node node;
+ const char *xattr_name;
+ int (*validate)(const char *value, size_t len);
+ int (*apply)(struct inode *inode, const char *value, size_t len);
+ const char *(*extract)(struct inode *inode);
+ int inheritable;
+};
+
+static int prop_compression_validate(const char *value, size_t len);
+static int prop_compression_apply(struct inode *inode,
+ const char *value,
+ size_t len);
+static const char *prop_compression_extract(struct inode *inode);
+
+static struct prop_handler prop_handlers[] = {
+ {
+ .xattr_name = XATTR_BTRFS_PREFIX "compression",
+ .validate = prop_compression_validate,
+ .apply = prop_compression_apply,
+ .extract = prop_compression_extract,
+ .inheritable = 1
+ },
+ {
+ .xattr_name = NULL
+ }
+};
+
+void __init btrfs_props_init(void)
+{
+ struct prop_handler *p;
+
+ hash_init(prop_handlers_ht);
+
+ for (p = &prop_handlers[0]; p->xattr_name; p++) {
+ u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
+
+ hash_add(prop_handlers_ht, &p->node, h);
+ }
+}
+
+static const struct hlist_head *find_prop_handlers_by_hash(const u64 hash)
+{
+ struct hlist_head *h;
+
+ h = &prop_handlers_ht[hash_min(hash, BTRFS_PROP_HANDLERS_HT_BITS)];
+ if (hlist_empty(h))
+ return NULL;
+
+ return h;
+}
+
+static const struct prop_handler *
+find_prop_handler(const char *name,
+ const struct hlist_head *handlers)
+{
+ struct prop_handler *h;
+
+ if (!handlers) {
+ u64 hash = btrfs_name_hash(name, strlen(name));
+
+ handlers = find_prop_handlers_by_hash(hash);
+ if (!handlers)
+ return NULL;
+ }
+
+ hlist_for_each_entry(h, handlers, node)
+ if (!strcmp(h->xattr_name, name))
+ return h;
+
+ return NULL;
+}
+
+static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
+ struct inode *inode,
+ const char *name,
+ const char *value,
+ size_t value_len,
+ int flags)
+{
+ const struct prop_handler *handler;
+ int ret;
+
+ if (strlen(name) <= XATTR_BTRFS_PREFIX_LEN)
+ return -EINVAL;
+
+ handler = find_prop_handler(name, NULL);
+ if (!handler)
+ return -EINVAL;
+
+ if (value_len == 0) {
+ ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
+ NULL, 0, flags);
+ if (ret)
+ return ret;
+
+ ret = handler->apply(inode, NULL, 0);
+ ASSERT(ret == 0);
+
+ return ret;
+ }
+
+ ret = handler->validate(value, value_len);
+ if (ret)
+ return ret;
+ ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
+ value, value_len, flags);
+ if (ret)
+ return ret;
+ ret = handler->apply(inode, value, value_len);
+ if (ret) {
+ __btrfs_setxattr(trans, inode, handler->xattr_name,
+ NULL, 0, flags);
+ return ret;
+ }
+
+ set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
+
+ return 0;
+}
+
+int btrfs_set_prop(struct inode *inode,
+ const char *name,
+ const char *value,
+ size_t value_len,
+ int flags)
+{
+ return __btrfs_set_prop(NULL, inode, name, value, value_len, flags);
+}
+
+static int iterate_object_props(struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 objectid,
+ void (*iterator)(void *,
+ const struct prop_handler *,
+ const char *,
+ size_t),
+ void *ctx)
+{
+ int ret;
+ char *name_buf = NULL;
+ char *value_buf = NULL;
+ int name_buf_len = 0;
+ int value_buf_len = 0;
+
+ while (1) {
+ struct btrfs_key key;
+ struct btrfs_dir_item *di;
+ struct extent_buffer *leaf;
+ u32 total_len, cur, this_len;
+ int slot;
+ const struct hlist_head *handlers;
+
+ slot = path->slots[0];
+ leaf = path->nodes[0];
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto out;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid != objectid)
+ break;
+ if (key.type != BTRFS_XATTR_ITEM_KEY)
+ break;
+
+ handlers = find_prop_handlers_by_hash(key.offset);
+ if (!handlers)
+ goto next_slot;
+
+ di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
+ cur = 0;
+ total_len = btrfs_item_size_nr(leaf, slot);
+
+ while (cur < total_len) {
+ u32 name_len = btrfs_dir_name_len(leaf, di);
+ u32 data_len = btrfs_dir_data_len(leaf, di);
+ unsigned long name_ptr, data_ptr;
+ const struct prop_handler *handler;
+
+ this_len = sizeof(*di) + name_len + data_len;
+ name_ptr = (unsigned long)(di + 1);
+ data_ptr = name_ptr + name_len;
+
+ if (name_len <= XATTR_BTRFS_PREFIX_LEN ||
+ memcmp_extent_buffer(leaf, XATTR_BTRFS_PREFIX,
+ name_ptr,
+ XATTR_BTRFS_PREFIX_LEN))
+ goto next_dir_item;
+
+ if (name_len >= name_buf_len) {
+ kfree(name_buf);
+ name_buf_len = name_len + 1;
+ name_buf = kmalloc(name_buf_len, GFP_NOFS);
+ if (!name_buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ read_extent_buffer(leaf, name_buf, name_ptr, name_len);
+ name_buf[name_len] = '\0';
+
+ handler = find_prop_handler(name_buf, handlers);
+ if (!handler)
+ goto next_dir_item;
+
+ if (data_len > value_buf_len) {
+ kfree(value_buf);
+ value_buf_len = data_len;
+ value_buf = kmalloc(data_len, GFP_NOFS);
+ if (!value_buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ read_extent_buffer(leaf, value_buf, data_ptr, data_len);
+
+ iterator(ctx, handler, value_buf, data_len);
+next_dir_item:
+ cur += this_len;
+ di = (struct btrfs_dir_item *)((char *) di + this_len);
+ }
+
+next_slot:
+ path->slots[0]++;
+ }
+
+ ret = 0;
+out:
+ btrfs_release_path(path);
+ kfree(name_buf);
+ kfree(value_buf);
+
+ return ret;
+}
+
+static void inode_prop_iterator(void *ctx,
+ const struct prop_handler *handler,
+ const char *value,
+ size_t len)
+{
+ struct inode *inode = ctx;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret;
+
+ ret = handler->apply(inode, value, len);
+ if (unlikely(ret))
+ btrfs_warn(root->fs_info,
+ "error applying prop %s to ino %llu (root %llu): %d",
+ handler->xattr_name, btrfs_ino(inode),
+ root->root_key.objectid, ret);
+ else
+ set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
+}
+
+int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ u64 ino = btrfs_ino(inode);
+ int ret;
+
+ ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode);
+
+ return ret;
+}
+
+static int inherit_props(struct btrfs_trans_handle *trans,
+ struct inode *inode,
+ struct inode *parent)
+{
+ const struct prop_handler *h;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret;
+
+ if (!test_bit(BTRFS_INODE_HAS_PROPS,
+ &BTRFS_I(parent)->runtime_flags))
+ return 0;
+
+ for (h = &prop_handlers[0]; h->xattr_name; h++) {
+ const char *value;
+ u64 num_bytes;
+
+ if (!h->inheritable)
+ continue;
+
+ value = h->extract(parent);
+ if (!value)
+ continue;
+
+ num_bytes = btrfs_calc_trans_metadata_size(root, 1);
+ ret = btrfs_block_rsv_add(root, trans->block_rsv,
+ num_bytes, BTRFS_RESERVE_NO_FLUSH);
+ if (ret)
+ goto out;
+ ret = __btrfs_set_prop(trans, inode, h->xattr_name,
+ value, strlen(value), 0);
+ btrfs_block_rsv_release(root, trans->block_rsv, num_bytes);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
+ struct inode *inode,
+ struct inode *dir)
+{
+ if (!dir)
+ return 0;
+
+ return inherit_props(trans, inode, dir);
+}
+
+int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_root *parent_root)
+{
+ struct btrfs_key key;
+ struct inode *parent_inode, *child_inode;
+ int ret;
+
+ key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ parent_inode = btrfs_iget(parent_root->fs_info->sb, &key,
+ parent_root, NULL);
+ if (IS_ERR(parent_inode))
+ return PTR_ERR(parent_inode);
+
+ child_inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
+ if (IS_ERR(child_inode)) {
+ iput(parent_inode);
+ return PTR_ERR(child_inode);
+ }
+
+ ret = inherit_props(trans, child_inode, parent_inode);
+ iput(child_inode);
+ iput(parent_inode);
+
+ return ret;
+}
+
+static int prop_compression_validate(const char *value, size_t len)
+{
+ if (!strncmp("lzo", value, len))
+ return 0;
+ else if (!strncmp("zlib", value, len))
+ return 0;
+
+ return -EINVAL;
+}
+
+static int prop_compression_apply(struct inode *inode,
+ const char *value,
+ size_t len)
+{
+ int type;
+
+ if (len == 0) {
+ BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
+ BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
+ BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
+
+ return 0;
+ }
+
+ if (!strncmp("lzo", value, len))
+ type = BTRFS_COMPRESS_LZO;
+ else if (!strncmp("zlib", value, len))
+ type = BTRFS_COMPRESS_ZLIB;
+ else
+ return -EINVAL;
+
+ BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
+ BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
+ BTRFS_I(inode)->force_compress = type;
+
+ return 0;
+}
+
+static const char *prop_compression_extract(struct inode *inode)
+{
+ switch (BTRFS_I(inode)->force_compress) {
+ case BTRFS_COMPRESS_ZLIB:
+ return "zlib";
+ case BTRFS_COMPRESS_LZO:
+ return "lzo";
+ }
+
+ return NULL;
+}
diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h
new file mode 100644
index 000000000000..100f18829d50
--- /dev/null
+++ b/fs/btrfs/props.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_PROPS_H
+#define __BTRFS_PROPS_H
+
+#include "ctree.h"
+
+void __init btrfs_props_init(void);
+
+int btrfs_set_prop(struct inode *inode,
+ const char *name,
+ const char *value,
+ size_t value_len,
+ int flags);
+
+int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path);
+
+int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
+ struct inode *inode,
+ struct inode *dir);
+
+int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_root *parent_root);
+
+#endif
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 4e6ef490619e..472302a2d745 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -301,16 +301,16 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
if (btrfs_qgroup_status_version(l, ptr) !=
BTRFS_QGROUP_STATUS_VERSION) {
- printk(KERN_ERR
- "btrfs: old qgroup version, quota disabled\n");
+ btrfs_err(fs_info,
+ "old qgroup version, quota disabled");
goto out;
}
if (btrfs_qgroup_status_generation(l, ptr) !=
fs_info->generation) {
flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
- printk(KERN_ERR
- "btrfs: qgroup generation mismatch, "
- "marked as inconsistent\n");
+ btrfs_err(fs_info,
+ "qgroup generation mismatch, "
+ "marked as inconsistent");
}
fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
ptr);
@@ -325,7 +325,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
qgroup = find_qgroup_rb(fs_info, found_key.offset);
if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
(!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
- printk(KERN_ERR "btrfs: inconsitent qgroup config\n");
+ btrfs_err(fs_info, "inconsitent qgroup config");
flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
}
if (!qgroup) {
@@ -396,8 +396,8 @@ next1:
ret = add_relation_rb(fs_info, found_key.objectid,
found_key.offset);
if (ret == -ENOENT) {
- printk(KERN_WARNING
- "btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
+ btrfs_warn(fs_info,
+ "orphan qgroup relation 0x%llx->0x%llx",
found_key.objectid, found_key.offset);
ret = 0; /* ignore the error */
}
@@ -644,8 +644,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
l = path->nodes[0];
slot = path->slots[0];
- qgroup_limit = btrfs_item_ptr(l, path->slots[0],
- struct btrfs_qgroup_limit_item);
+ qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
@@ -687,8 +686,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
l = path->nodes[0];
slot = path->slots[0];
- qgroup_info = btrfs_item_ptr(l, path->slots[0],
- struct btrfs_qgroup_info_item);
+ qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
@@ -1161,7 +1159,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
limit->rsv_excl);
if (ret) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
- printk(KERN_INFO "unable to update quota limit for %llu\n",
+ btrfs_info(fs_info, "unable to update quota limit for %llu",
qgroupid);
}
@@ -1349,7 +1347,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op)
{
- struct btrfs_key ins;
struct btrfs_root *quota_root;
u64 ref_root;
struct btrfs_qgroup *qgroup;
@@ -1363,10 +1360,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
BUG_ON(!fs_info->quota_root);
- ins.objectid = node->bytenr;
- ins.offset = node->num_bytes;
- ins.type = BTRFS_EXTENT_ITEM_KEY;
-
if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
struct btrfs_delayed_tree_ref *ref;
@@ -1840,7 +1833,9 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
{
if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
return;
- pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n",
+ btrfs_err(trans->root->fs_info,
+ "qgroups not uptodate in trans handle %p: list is%s empty, "
+ "seq is %#x.%x",
trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
(u32)(trans->delayed_ref_elem.seq >> 32),
(u32)trans->delayed_ref_elem.seq);
@@ -1902,9 +1897,17 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
mutex_unlock(&fs_info->qgroup_rescan_lock);
for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
+ u64 num_bytes;
+
btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
- if (found.type != BTRFS_EXTENT_ITEM_KEY)
+ if (found.type != BTRFS_EXTENT_ITEM_KEY &&
+ found.type != BTRFS_METADATA_ITEM_KEY)
continue;
+ if (found.type == BTRFS_METADATA_ITEM_KEY)
+ num_bytes = fs_info->extent_root->leafsize;
+ else
+ num_bytes = found.offset;
+
ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
tree_mod_seq_elem.seq, &roots);
if (ret < 0)
@@ -1949,12 +1952,12 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
struct btrfs_qgroup_list *glist;
qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
- qg->rfer += found.offset;
- qg->rfer_cmpr += found.offset;
+ qg->rfer += num_bytes;
+ qg->rfer_cmpr += num_bytes;
WARN_ON(qg->tag >= seq);
if (qg->refcnt - seq == roots->nnodes) {
- qg->excl += found.offset;
- qg->excl_cmpr += found.offset;
+ qg->excl += num_bytes;
+ qg->excl_cmpr += num_bytes;
}
qgroup_dirty(fs_info, qg);
@@ -2037,10 +2040,10 @@ out:
mutex_unlock(&fs_info->qgroup_rescan_lock);
if (err >= 0) {
- pr_info("btrfs: qgroup scan completed%s\n",
+ btrfs_info(fs_info, "qgroup scan completed%s",
err == 2 ? " (inconsistency flag cleared)" : "");
} else {
- pr_err("btrfs: qgroup scan failed with %d\n", err);
+ btrfs_err(fs_info, "qgroup scan failed with %d", err);
}
complete_all(&fs_info->qgroup_rescan_completion);
@@ -2096,7 +2099,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
if (ret) {
err:
- pr_info("btrfs: qgroup_rescan_init failed with %d\n", ret);
+ btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
return ret;
}
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 24ac21840a9a..9af0b25d991a 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1032,8 +1032,8 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
/* see if we can add this page onto our existing bio */
if (last) {
- last_end = (u64)last->bi_sector << 9;
- last_end += last->bi_size;
+ last_end = (u64)last->bi_iter.bi_sector << 9;
+ last_end += last->bi_iter.bi_size;
/*
* we can't merge these if they are from different
@@ -1053,9 +1053,9 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
if (!bio)
return -ENOMEM;
- bio->bi_size = 0;
+ bio->bi_iter.bi_size = 0;
bio->bi_bdev = stripe->dev->bdev;
- bio->bi_sector = disk_start >> 9;
+ bio->bi_iter.bi_sector = disk_start >> 9;
set_bit(BIO_UPTODATE, &bio->bi_flags);
bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
@@ -1111,7 +1111,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
spin_lock_irq(&rbio->bio_list_lock);
bio_list_for_each(bio, &rbio->bio_list) {
- start = (u64)bio->bi_sector << 9;
+ start = (u64)bio->bi_iter.bi_sector << 9;
stripe_offset = start - rbio->raid_map[0];
page_index = stripe_offset >> PAGE_CACHE_SHIFT;
@@ -1272,7 +1272,7 @@ cleanup:
static int find_bio_stripe(struct btrfs_raid_bio *rbio,
struct bio *bio)
{
- u64 physical = bio->bi_sector;
+ u64 physical = bio->bi_iter.bi_sector;
u64 stripe_start;
int i;
struct btrfs_bio_stripe *stripe;
@@ -1298,7 +1298,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
struct bio *bio)
{
- u64 logical = bio->bi_sector;
+ u64 logical = bio->bi_iter.bi_sector;
u64 stripe_start;
int i;
@@ -1602,8 +1602,8 @@ static int plug_cmp(void *priv, struct list_head *a, struct list_head *b)
plug_list);
struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
plug_list);
- u64 a_sector = ra->bio_list.head->bi_sector;
- u64 b_sector = rb->bio_list.head->bi_sector;
+ u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
+ u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
if (a_sector < b_sector)
return -1;
@@ -1691,7 +1691,7 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
if (IS_ERR(rbio))
return PTR_ERR(rbio);
bio_list_add(&rbio->bio_list, bio);
- rbio->bio_list_bytes = bio->bi_size;
+ rbio->bio_list_bytes = bio->bi_iter.bi_size;
/*
* don't plug on full rbios, just get them out the door
@@ -2044,7 +2044,7 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
rbio->read_rebuild = 1;
bio_list_add(&rbio->bio_list, bio);
- rbio->bio_list_bytes = bio->bi_size;
+ rbio->bio_list_bytes = bio->bi_iter.bi_size;
rbio->faila = find_logical_bio_stripe(rbio, bio);
if (rbio->faila == -1) {
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 1031b69252c5..31c797c48c3e 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -189,8 +189,8 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
*/
#ifdef DEBUG
if (rec->generation != generation) {
- printk(KERN_DEBUG "generation mismatch for "
- "(%llu,%d,%llu) %llu != %llu\n",
+ btrfs_debug(root->fs_info,
+ "generation mismatch for (%llu,%d,%llu) %llu != %llu",
key.objectid, key.type, key.offset,
rec->generation, generation);
}
@@ -365,8 +365,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
goto error;
if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
- printk(KERN_ERR "btrfs readahead: more than %d copies not "
- "supported", BTRFS_MAX_MIRRORS);
+ btrfs_err(root->fs_info,
+ "readahead: more than %d copies not supported",
+ BTRFS_MAX_MIRRORS);
goto error;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 429c73c374b8..07b3b36f40ee 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -94,6 +94,7 @@ struct backref_edge {
#define LOWER 0
#define UPPER 1
+#define RELOCATION_RESERVED_NODES 256
struct backref_cache {
/* red black tree of all backref nodes in the cache */
@@ -176,6 +177,8 @@ struct reloc_control {
u64 merging_rsv_size;
/* size of relocated tree nodes */
u64 nodes_relocated;
+ /* reserved size for block group relocation*/
+ u64 reserved_bytes;
u64 search_start;
u64 extents_found;
@@ -184,7 +187,6 @@ struct reloc_control {
unsigned int create_reloc_tree:1;
unsigned int merge_reloc_tree:1;
unsigned int found_file_extent:1;
- unsigned int commit_transaction:1;
};
/* stages of data relocation */
@@ -2309,9 +2311,6 @@ void free_reloc_roots(struct list_head *list)
reloc_root = list_entry(list->next, struct btrfs_root,
root_list);
__del_reloc_root(reloc_root);
- free_extent_buffer(reloc_root->node);
- free_extent_buffer(reloc_root->commit_root);
- kfree(reloc_root);
}
}
@@ -2353,10 +2352,9 @@ again:
ret = merge_reloc_root(rc, root);
if (ret) {
- __del_reloc_root(reloc_root);
- free_extent_buffer(reloc_root->node);
- free_extent_buffer(reloc_root->commit_root);
- kfree(reloc_root);
+ if (list_empty(&reloc_root->root_list))
+ list_add_tail(&reloc_root->root_list,
+ &reloc_roots);
goto out;
}
} else {
@@ -2452,7 +2450,7 @@ static noinline_for_stack
struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
struct reloc_control *rc,
struct backref_node *node,
- struct backref_edge *edges[], int *nr)
+ struct backref_edge *edges[])
{
struct backref_node *next;
struct btrfs_root *root;
@@ -2494,7 +2492,6 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
if (!root)
return NULL;
- *nr = index;
next = node;
/* setup backref node path for btrfs_reloc_cow_block */
while (1) {
@@ -2590,28 +2587,36 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
struct btrfs_root *root = rc->extent_root;
u64 num_bytes;
int ret;
+ u64 tmp;
num_bytes = calcu_metadata_size(rc, node, 1) * 2;
trans->block_rsv = rc->block_rsv;
- ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes,
- BTRFS_RESERVE_FLUSH_ALL);
+ rc->reserved_bytes += num_bytes;
+ ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes,
+ BTRFS_RESERVE_FLUSH_ALL);
if (ret) {
- if (ret == -EAGAIN)
- rc->commit_transaction = 1;
+ if (ret == -EAGAIN) {
+ tmp = rc->extent_root->nodesize *
+ RELOCATION_RESERVED_NODES;
+ while (tmp <= rc->reserved_bytes)
+ tmp <<= 1;
+ /*
+ * only one thread can access block_rsv at this point,
+ * so we don't need hold lock to protect block_rsv.
+ * we expand more reservation size here to allow enough
+ * space for relocation and we will return eailer in
+ * enospc case.
+ */
+ rc->block_rsv->size = tmp + rc->extent_root->nodesize *
+ RELOCATION_RESERVED_NODES;
+ }
return ret;
}
return 0;
}
-static void release_metadata_space(struct reloc_control *rc,
- struct backref_node *node)
-{
- u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2;
- btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes);
-}
-
/*
* relocate a block tree, and then update pointers in upper level
* blocks that reference the block to point to the new location.
@@ -2633,7 +2638,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,
u32 blocksize;
u64 bytenr;
u64 generation;
- int nr;
int slot;
int ret;
int err = 0;
@@ -2646,7 +2650,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
cond_resched();
upper = edge->node[UPPER];
- root = select_reloc_root(trans, rc, upper, edges, &nr);
+ root = select_reloc_root(trans, rc, upper, edges);
BUG_ON(!root);
if (upper->eb && !upper->locked) {
@@ -2898,7 +2902,6 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_path *path)
{
struct btrfs_root *root;
- int release = 0;
int ret = 0;
if (!node)
@@ -2915,7 +2918,6 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
ret = reserve_metadata_space(trans, rc, node);
if (ret)
goto out;
- release = 1;
}
if (root) {
@@ -2940,11 +2942,8 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
ret = do_relocation(trans, rc, node, key, path, 1);
}
out:
- if (ret || node->level == 0 || node->cowonly) {
- if (release)
- release_metadata_space(rc, node);
+ if (ret || node->level == 0 || node->cowonly)
remove_backref_node(&rc->backref_cache, node);
- }
return ret;
}
@@ -3867,29 +3866,20 @@ static noinline_for_stack
int prepare_to_relocate(struct reloc_control *rc)
{
struct btrfs_trans_handle *trans;
- int ret;
rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root,
BTRFS_BLOCK_RSV_TEMP);
if (!rc->block_rsv)
return -ENOMEM;
- /*
- * reserve some space for creating reloc trees.
- * btrfs_init_reloc_root will use them when there
- * is no reservation in transaction handle.
- */
- ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv,
- rc->extent_root->nodesize * 256,
- BTRFS_RESERVE_FLUSH_ALL);
- if (ret)
- return ret;
-
memset(&rc->cluster, 0, sizeof(rc->cluster));
rc->search_start = rc->block_group->key.objectid;
rc->extents_found = 0;
rc->nodes_relocated = 0;
rc->merging_rsv_size = 0;
+ rc->reserved_bytes = 0;
+ rc->block_rsv->size = rc->extent_root->nodesize *
+ RELOCATION_RESERVED_NODES;
rc->create_reloc_tree = 1;
set_reloc_control(rc);
@@ -3933,6 +3923,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
while (1) {
+ rc->reserved_bytes = 0;
+ ret = btrfs_block_rsv_refill(rc->extent_root,
+ rc->block_rsv, rc->block_rsv->size,
+ BTRFS_RESERVE_FLUSH_ALL);
+ if (ret) {
+ err = ret;
+ break;
+ }
progress++;
trans = btrfs_start_transaction(rc->extent_root, 0);
if (IS_ERR(trans)) {
@@ -4011,6 +4009,12 @@ restart:
if (!RB_EMPTY_ROOT(&blocks)) {
ret = relocate_tree_blocks(trans, rc, &blocks);
if (ret < 0) {
+ /*
+ * if we fail to relocate tree blocks, force to update
+ * backref cache when committing transaction.
+ */
+ rc->backref_cache.last_trans = trans->transid - 1;
+
if (ret != -EAGAIN) {
err = ret;
break;
@@ -4020,14 +4024,8 @@ restart:
}
}
- if (rc->commit_transaction) {
- rc->commit_transaction = 0;
- ret = btrfs_commit_transaction(trans, rc->extent_root);
- BUG_ON(ret);
- } else {
- btrfs_end_transaction_throttle(trans, rc->extent_root);
- btrfs_btree_balance_dirty(rc->extent_root);
- }
+ btrfs_end_transaction_throttle(trans, rc->extent_root);
+ btrfs_btree_balance_dirty(rc->extent_root);
trans = NULL;
if (rc->stage == MOVE_DATA_EXTENTS &&
@@ -4247,7 +4245,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
goto out;
}
- printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
+ btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu",
rc->block_group->key.objectid, rc->block_group->flags);
ret = btrfs_start_delalloc_roots(fs_info, 0);
@@ -4269,7 +4267,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
if (rc->extents_found == 0)
break;
- printk(KERN_INFO "btrfs: found %llu extents\n",
+ btrfs_info(extent_root->fs_info, "found %llu extents",
rc->extents_found);
if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
@@ -4285,11 +4283,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
}
}
- filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
- rc->block_group->key.objectid,
- rc->block_group->key.objectid +
- rc->block_group->key.offset - 1);
-
WARN_ON(rc->block_group->pinned > 0);
WARN_ON(rc->block_group->reserved > 0);
WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index ec71ea44d2b4..1389b69059de 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -44,7 +44,7 @@ static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
if (!need_reset && btrfs_root_generation(item)
!= btrfs_root_generation_v2(item)) {
if (btrfs_root_generation_v2(item) != 0) {
- printk(KERN_WARNING "btrfs: mismatching "
+ printk(KERN_WARNING "BTRFS: mismatching "
"generation and generation_v2 "
"found in root item. This root "
"was probably mounted with an "
@@ -154,7 +154,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
if (ret != 0) {
btrfs_print_leaf(root, path->nodes[0]);
- printk(KERN_CRIT "unable to update root key %llu %u %llu\n",
+ btrfs_crit(root->fs_info, "unable to update root key %llu %u %llu",
key->objectid, key->type, key->offset);
BUG_ON(1);
}
@@ -400,21 +400,6 @@ out:
return err;
}
-int btrfs_find_root_ref(struct btrfs_root *tree_root,
- struct btrfs_path *path,
- u64 root_id, u64 ref_id)
-{
- struct btrfs_key key;
- int ret;
-
- key.objectid = root_id;
- key.type = BTRFS_ROOT_REF_KEY;
- key.offset = ref_id;
-
- ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
- return ret;
-}
-
/*
* add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY
* or BTRFS_ROOT_BACKREF_KEY.
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 1fd3f33c330a..efba5d1282ee 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -256,6 +256,8 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
int mirror_num, u64 physical_for_dev_replace);
static void copy_nocow_pages_worker(struct btrfs_work *work);
+static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
+static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
@@ -269,6 +271,29 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
wake_up(&sctx->list_wait);
}
+static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
+{
+ while (atomic_read(&fs_info->scrub_pause_req)) {
+ mutex_unlock(&fs_info->scrub_lock);
+ wait_event(fs_info->scrub_pause_wait,
+ atomic_read(&fs_info->scrub_pause_req) == 0);
+ mutex_lock(&fs_info->scrub_lock);
+ }
+}
+
+static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
+{
+ atomic_inc(&fs_info->scrubs_paused);
+ wake_up(&fs_info->scrub_pause_wait);
+
+ mutex_lock(&fs_info->scrub_lock);
+ __scrub_blocked_if_needed(fs_info);
+ atomic_dec(&fs_info->scrubs_paused);
+ mutex_unlock(&fs_info->scrub_lock);
+
+ wake_up(&fs_info->scrub_pause_wait);
+}
+
/*
* used for workers that require transaction commits (i.e., for the
* NOCOW case)
@@ -480,7 +505,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
* hold all of the paths here
*/
for (i = 0; i < ipath->fspath->elem_cnt; ++i)
- printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
+ printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
"%s, sector %llu, root %llu, inode %llu, offset %llu, "
"length %llu, links %u (path: %s)\n", swarn->errstr,
swarn->logical, rcu_str_deref(swarn->dev->name),
@@ -492,7 +517,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
return 0;
err:
- printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
+ printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
"%s, sector %llu, root %llu, inode %llu, offset %llu: path "
"resolving failed with ret=%d\n", swarn->errstr,
swarn->logical, rcu_str_deref(swarn->dev->name),
@@ -555,7 +580,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
&ref_root, &ref_level);
printk_in_rcu(KERN_WARNING
- "btrfs: %s at logical %llu on dev %s, "
+ "BTRFS: %s at logical %llu on dev %s, "
"sector %llu: metadata %s (level %d) in tree "
"%llu\n", errstr, swarn.logical,
rcu_str_deref(dev->name),
@@ -704,13 +729,11 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work)
struct scrub_fixup_nodatasum *fixup;
struct scrub_ctx *sctx;
struct btrfs_trans_handle *trans = NULL;
- struct btrfs_fs_info *fs_info;
struct btrfs_path *path;
int uncorrectable = 0;
fixup = container_of(work, struct scrub_fixup_nodatasum, work);
sctx = fixup->sctx;
- fs_info = fixup->root->fs_info;
path = btrfs_alloc_path();
if (!path) {
@@ -759,8 +782,8 @@ out:
btrfs_dev_replace_stats_inc(
&sctx->dev_root->fs_info->dev_replace.
num_uncorrectable_read_errors);
- printk_ratelimited_in_rcu(KERN_ERR
- "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
+ printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
+ "unable to fixup (nodatasum) error at logical %llu on dev %s\n",
fixup->logical, rcu_str_deref(fixup->dev->name));
}
@@ -1161,7 +1184,7 @@ corrected_error:
sctx->stat.corrected_errors++;
spin_unlock(&sctx->stat_lock);
printk_ratelimited_in_rcu(KERN_ERR
- "btrfs: fixed up error at logical %llu on dev %s\n",
+ "BTRFS: fixed up error at logical %llu on dev %s\n",
logical, rcu_str_deref(dev->name));
}
} else {
@@ -1170,7 +1193,7 @@ did_not_correct_error:
sctx->stat.uncorrectable_errors++;
spin_unlock(&sctx->stat_lock);
printk_ratelimited_in_rcu(KERN_ERR
- "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
+ "BTRFS: unable to fixup (regular) error at logical %llu on dev %s\n",
logical, rcu_str_deref(dev->name));
}
@@ -1308,7 +1331,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
continue;
}
bio->bi_bdev = page->dev->bdev;
- bio->bi_sector = page->physical >> 9;
+ bio->bi_iter.bi_sector = page->physical >> 9;
bio_add_page(bio, page->page, PAGE_SIZE, 0);
if (btrfsic_submit_bio_wait(READ, bio))
@@ -1418,8 +1441,9 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
int ret;
if (!page_bad->dev->bdev) {
- printk_ratelimited(KERN_WARNING
- "btrfs: scrub_repair_page_from_good_copy(bdev == NULL) is unexpected!\n");
+ printk_ratelimited(KERN_WARNING "BTRFS: "
+ "scrub_repair_page_from_good_copy(bdev == NULL) "
+ "is unexpected!\n");
return -EIO;
}
@@ -1427,7 +1451,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
if (!bio)
return -EIO;
bio->bi_bdev = page_bad->dev->bdev;
- bio->bi_sector = page_bad->physical >> 9;
+ bio->bi_iter.bi_sector = page_bad->physical >> 9;
ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
if (PAGE_SIZE != ret) {
@@ -1520,7 +1544,7 @@ again:
bio->bi_private = sbio;
bio->bi_end_io = scrub_wr_bio_end_io;
bio->bi_bdev = sbio->dev->bdev;
- bio->bi_sector = sbio->physical >> 9;
+ bio->bi_iter.bi_sector = sbio->physical >> 9;
sbio->err = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical_for_dev_replace ||
@@ -1877,7 +1901,7 @@ static void scrub_submit(struct scrub_ctx *sctx)
* This case is handled correctly (but _very_ slowly).
*/
printk_ratelimited(KERN_WARNING
- "btrfs: scrub_submit(bio bdev == NULL) is unexpected!\n");
+ "BTRFS: scrub_submit(bio bdev == NULL) is unexpected!\n");
bio_endio(sbio->bio, -EIO);
} else {
btrfsic_submit_bio(READ, sbio->bio);
@@ -1926,7 +1950,7 @@ again:
bio->bi_private = sbio;
bio->bi_end_io = scrub_bio_end_io;
bio->bi_bdev = sbio->dev->bdev;
- bio->bi_sector = sbio->physical >> 9;
+ bio->bi_iter.bi_sector = sbio->physical >> 9;
sbio->err = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical ||
@@ -2286,8 +2310,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
wait_event(sctx->list_wait,
atomic_read(&sctx->bios_in_flight) == 0);
- atomic_inc(&fs_info->scrubs_paused);
- wake_up(&fs_info->scrub_pause_wait);
+ scrub_blocked_if_needed(fs_info);
/* FIXME it might be better to start readahead at commit root */
key_start.objectid = logical;
@@ -2311,16 +2334,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
if (!IS_ERR(reada2))
btrfs_reada_wait(reada2);
- mutex_lock(&fs_info->scrub_lock);
- while (atomic_read(&fs_info->scrub_pause_req)) {
- mutex_unlock(&fs_info->scrub_lock);
- wait_event(fs_info->scrub_pause_wait,
- atomic_read(&fs_info->scrub_pause_req) == 0);
- mutex_lock(&fs_info->scrub_lock);
- }
- atomic_dec(&fs_info->scrubs_paused);
- mutex_unlock(&fs_info->scrub_lock);
- wake_up(&fs_info->scrub_pause_wait);
/*
* collect all data csums for the stripe to avoid seeking during
@@ -2357,22 +2370,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
wait_event(sctx->list_wait,
atomic_read(&sctx->bios_in_flight) == 0);
atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
- atomic_inc(&fs_info->scrubs_paused);
- wake_up(&fs_info->scrub_pause_wait);
- mutex_lock(&fs_info->scrub_lock);
- while (atomic_read(&fs_info->scrub_pause_req)) {
- mutex_unlock(&fs_info->scrub_lock);
- wait_event(fs_info->scrub_pause_wait,
- atomic_read(&fs_info->scrub_pause_req) == 0);
- mutex_lock(&fs_info->scrub_lock);
- }
- atomic_dec(&fs_info->scrubs_paused);
- mutex_unlock(&fs_info->scrub_lock);
- wake_up(&fs_info->scrub_pause_wait);
+ scrub_blocked_if_needed(fs_info);
}
+ if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ else
+ key.type = BTRFS_EXTENT_ITEM_KEY;
key.objectid = logical;
- key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = (u64)-1;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -2380,8 +2385,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
goto out;
if (ret > 0) {
- ret = btrfs_previous_item(root, path, 0,
- BTRFS_EXTENT_ITEM_KEY);
+ ret = btrfs_previous_extent_item(root, path, 0);
if (ret < 0)
goto out;
if (ret > 0) {
@@ -2439,9 +2443,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
if (key.objectid < logical &&
(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
- printk(KERN_ERR
- "btrfs scrub: tree block %llu spanning "
- "stripes, ignored. logical=%llu\n",
+ btrfs_err(fs_info,
+ "scrub: tree block %llu spanning "
+ "stripes, ignored. logical=%llu",
key.objectid, logical);
goto next;
}
@@ -2683,21 +2687,9 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
wait_event(sctx->list_wait,
atomic_read(&sctx->bios_in_flight) == 0);
atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
- atomic_inc(&fs_info->scrubs_paused);
- wake_up(&fs_info->scrub_pause_wait);
wait_event(sctx->list_wait,
atomic_read(&sctx->workers_pending) == 0);
-
- mutex_lock(&fs_info->scrub_lock);
- while (atomic_read(&fs_info->scrub_pause_req)) {
- mutex_unlock(&fs_info->scrub_lock);
- wait_event(fs_info->scrub_pause_wait,
- atomic_read(&fs_info->scrub_pause_req) == 0);
- mutex_lock(&fs_info->scrub_lock);
- }
- atomic_dec(&fs_info->scrubs_paused);
- mutex_unlock(&fs_info->scrub_lock);
- wake_up(&fs_info->scrub_pause_wait);
+ scrub_blocked_if_needed(fs_info);
btrfs_put_block_group(cache);
if (ret)
@@ -2823,8 +2815,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
* check some assumptions
*/
if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) {
- printk(KERN_ERR
- "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n",
+ btrfs_err(fs_info,
+ "scrub: size assumption nodesize == leafsize (%d == %d) fails",
fs_info->chunk_root->nodesize,
fs_info->chunk_root->leafsize);
return -EINVAL;
@@ -2836,16 +2828,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
* the way scrub is implemented. Do not handle this
* situation at all because it won't ever happen.
*/
- printk(KERN_ERR
- "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n",
+ btrfs_err(fs_info,
+ "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN);
return -EINVAL;
}
if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
/* not supported for data w/o checksums */
- printk(KERN_ERR
- "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails\n",
+ btrfs_err(fs_info,
+ "scrub: size assumption sectorsize != PAGE_SIZE "
+ "(%d != %lu) fails",
fs_info->chunk_root->sectorsize, PAGE_SIZE);
return -EINVAL;
}
@@ -2858,7 +2851,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
* would exhaust the array bounds of pagev member in
* struct scrub_block
*/
- pr_err("btrfs_scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails\n",
+ btrfs_err(fs_info, "scrub: size assumption nodesize and sectorsize "
+ "<= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
fs_info->chunk_root->nodesize,
SCRUB_MAX_PAGES_PER_BLOCK,
fs_info->chunk_root->sectorsize,
@@ -2908,7 +2902,13 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
}
sctx->readonly = readonly;
dev->scrub_device = sctx;
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ /*
+ * checking @scrub_pause_req here, we can avoid
+ * race between committing transaction and scrubbing.
+ */
+ __scrub_blocked_if_needed(fs_info);
atomic_inc(&fs_info->scrubs_running);
mutex_unlock(&fs_info->scrub_lock);
@@ -2917,9 +2917,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
* by holding device list mutex, we can
* kick off writing super in log tree sync.
*/
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
ret = scrub_supers(sctx, dev);
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
}
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
if (!ret)
ret = scrub_enumerate_chunks(sctx, dev, start, end,
@@ -3167,7 +3168,8 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
ret = iterate_inodes_from_logical(logical, fs_info, path,
record_inode_for_nocow, nocow_ctx);
if (ret != 0 && ret != -ENOENT) {
- pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n",
+ btrfs_warn(fs_info, "iterate_inodes_from_logical() failed: log %llu, "
+ "phys %llu, len %llu, mir %u, ret %d",
logical, physical_for_dev_replace, len, mirror_num,
ret);
not_written = 1;
@@ -3289,7 +3291,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
again:
page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
if (!page) {
- pr_err("find_or_create_page() failed\n");
+ btrfs_err(fs_info, "find_or_create_page() failed");
ret = -ENOMEM;
goto out;
}
@@ -3361,7 +3363,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
return -EIO;
if (!dev->bdev) {
printk_ratelimited(KERN_WARNING
- "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
+ "BTRFS: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
return -EIO;
}
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
@@ -3371,8 +3373,8 @@ static int write_page_nocow(struct scrub_ctx *sctx,
spin_unlock(&sctx->stat_lock);
return -ENOMEM;
}
- bio->bi_size = 0;
- bio->bi_sector = physical_for_dev_replace >> 9;
+ bio->bi_iter.bi_size = 0;
+ bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
bio->bi_bdev = dev->bdev;
ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
if (ret != PAGE_CACHE_SIZE) {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 945d1db98f26..9dde9717c1b9 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -24,12 +24,12 @@
#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
#include <linux/radix-tree.h>
-#include <linux/crc32c.h>
#include <linux/vmalloc.h>
#include <linux/string.h>
#include "send.h"
#include "backref.h"
+#include "hash.h"
#include "locking.h"
#include "disk-io.h"
#include "btrfs_inode.h"
@@ -88,8 +88,6 @@ struct send_ctx {
u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */
- struct vfsmount *mnt;
-
struct btrfs_root *send_root;
struct btrfs_root *parent_root;
struct clone_root *clone_roots;
@@ -111,6 +109,7 @@ struct send_ctx {
int cur_inode_deleted;
u64 cur_inode_size;
u64 cur_inode_mode;
+ u64 cur_inode_last_extent;
u64 send_progress;
@@ -122,6 +121,74 @@ struct send_ctx {
int name_cache_size;
char *read_buf;
+
+ /*
+ * We process inodes by their increasing order, so if before an
+ * incremental send we reverse the parent/child relationship of
+ * directories such that a directory with a lower inode number was
+ * the parent of a directory with a higher inode number, and the one
+ * becoming the new parent got renamed too, we can't rename/move the
+ * directory with lower inode number when we finish processing it - we
+ * must process the directory with higher inode number first, then
+ * rename/move it and then rename/move the directory with lower inode
+ * number. Example follows.
+ *
+ * Tree state when the first send was performed:
+ *
+ * .
+ * |-- a (ino 257)
+ * |-- b (ino 258)
+ * |
+ * |
+ * |-- c (ino 259)
+ * | |-- d (ino 260)
+ * |
+ * |-- c2 (ino 261)
+ *
+ * Tree state when the second (incremental) send is performed:
+ *
+ * .
+ * |-- a (ino 257)
+ * |-- b (ino 258)
+ * |-- c2 (ino 261)
+ * |-- d2 (ino 260)
+ * |-- cc (ino 259)
+ *
+ * The sequence of steps that lead to the second state was:
+ *
+ * mv /a/b/c/d /a/b/c2/d2
+ * mv /a/b/c /a/b/c2/d2/cc
+ *
+ * "c" has lower inode number, but we can't move it (2nd mv operation)
+ * before we move "d", which has higher inode number.
+ *
+ * So we just memorize which move/rename operations must be performed
+ * later when their respective parent is processed and moved/renamed.
+ */
+
+ /* Indexed by parent directory inode number. */
+ struct rb_root pending_dir_moves;
+
+ /*
+ * Reverse index, indexed by the inode number of a directory that
+ * is waiting for the move/rename of its immediate parent before its
+ * own move/rename can be performed.
+ */
+ struct rb_root waiting_dir_moves;
+};
+
+struct pending_dir_move {
+ struct rb_node node;
+ struct list_head list;
+ u64 parent_ino;
+ u64 ino;
+ u64 gen;
+ struct list_head update_refs;
+};
+
+struct waiting_dir_move {
+ struct rb_node node;
+ u64 ino;
};
struct name_cache_entry {
@@ -145,6 +212,15 @@ struct name_cache_entry {
char name[];
};
+static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
+
+static int need_send_hole(struct send_ctx *sctx)
+{
+ return (sctx->parent_root && !sctx->cur_inode_new &&
+ !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
+ S_ISREG(sctx->cur_inode_mode));
+}
+
static void fs_path_reset(struct fs_path *p)
{
if (p->reversed) {
@@ -336,16 +412,6 @@ out:
return ret;
}
-#if 0
-static void fs_path_remove(struct fs_path *p)
-{
- BUG_ON(p->reversed);
- while (p->start != p->end && *p->end != '/')
- p->end--;
- *p->end = 0;
-}
-#endif
-
static int fs_path_copy(struct fs_path *p, struct fs_path *from)
{
int ret;
@@ -436,30 +502,15 @@ static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
return 0;
}
-#if 0
-static int tlv_put_u8(struct send_ctx *sctx, u16 attr, u8 value)
-{
- return tlv_put(sctx, attr, &value, sizeof(value));
-}
-
-static int tlv_put_u16(struct send_ctx *sctx, u16 attr, u16 value)
-{
- __le16 tmp = cpu_to_le16(value);
- return tlv_put(sctx, attr, &tmp, sizeof(tmp));
-}
-
-static int tlv_put_u32(struct send_ctx *sctx, u16 attr, u32 value)
-{
- __le32 tmp = cpu_to_le32(value);
- return tlv_put(sctx, attr, &tmp, sizeof(tmp));
-}
-#endif
+#define TLV_PUT_DEFINE_INT(bits) \
+ static int tlv_put_u##bits(struct send_ctx *sctx, \
+ u##bits attr, u##bits value) \
+ { \
+ __le##bits __tmp = cpu_to_le##bits(value); \
+ return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \
+ }
-static int tlv_put_u64(struct send_ctx *sctx, u16 attr, u64 value)
-{
- __le64 tmp = cpu_to_le64(value);
- return tlv_put(sctx, attr, &tmp, sizeof(tmp));
-}
+TLV_PUT_DEFINE_INT(64)
static int tlv_put_string(struct send_ctx *sctx, u16 attr,
const char *str, int len)
@@ -475,17 +526,6 @@ static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
}
-#if 0
-static int tlv_put_timespec(struct send_ctx *sctx, u16 attr,
- struct timespec *ts)
-{
- struct btrfs_timespec bts;
- bts.sec = cpu_to_le64(ts->tv_sec);
- bts.nsec = cpu_to_le32(ts->tv_nsec);
- return tlv_put(sctx, attr, &bts, sizeof(bts));
-}
-#endif
-
static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
struct extent_buffer *eb,
struct btrfs_timespec *ts)
@@ -533,12 +573,6 @@ static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
if (ret < 0) \
goto tlv_put_failure; \
} while (0)
-#define TLV_PUT_TIMESPEC(sctx, attrtype, ts) \
- do { \
- ret = tlv_put_timespec(sctx, attrtype, ts); \
- if (ret < 0) \
- goto tlv_put_failure; \
- } while (0)
#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
do { \
ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
@@ -586,7 +620,7 @@ static int send_cmd(struct send_ctx *sctx)
hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
hdr->crc = 0;
- crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
+ crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
hdr->crc = cpu_to_le32(crc);
ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
@@ -1270,7 +1304,7 @@ static int find_extent_clone(struct send_ctx *sctx,
if (!backref_ctx->found_itself) {
/* found a bug in backref code? */
ret = -EIO;
- printk(KERN_ERR "btrfs: ERROR did not find backref in "
+ btrfs_err(sctx->send_root->fs_info, "did not find backref in "
"send_root. inode=%llu, offset=%llu, "
"disk_byte=%llu found extent=%llu\n",
ino, data_offset, disk_byte, found_key.objectid);
@@ -1298,6 +1332,16 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
}
if (cur_clone_root) {
+ if (compressed != BTRFS_COMPRESS_NONE) {
+ /*
+ * Offsets given by iterate_extent_inodes() are relative
+ * to the start of the extent, we need to add logical
+ * offset from the file extent item.
+ * (See why at backref.c:check_extent_in_eb())
+ */
+ cur_clone_root->offset += btrfs_file_extent_offset(eb,
+ fi);
+ }
*found = cur_clone_root;
ret = 0;
} else {
@@ -1343,7 +1387,7 @@ static int read_symlink(struct btrfs_root *root,
BUG_ON(compression);
off = btrfs_file_extent_inline_start(ei);
- len = btrfs_file_extent_inline_len(path->nodes[0], ei);
+ len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei);
ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
@@ -1372,7 +1416,7 @@ static int gen_unique_name(struct send_ctx *sctx,
return -ENOMEM;
while (1) {
- len = snprintf(tmp, sizeof(tmp) - 1, "o%llu-%llu-%llu",
+ len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
ino, gen, idx);
if (len >= sizeof(tmp)) {
/* should really not happen */
@@ -1933,6 +1977,7 @@ static void name_cache_free(struct send_ctx *sctx)
*/
static int __get_cur_name_and_parent(struct send_ctx *sctx,
u64 ino, u64 gen,
+ int skip_name_cache,
u64 *parent_ino,
u64 *parent_gen,
struct fs_path *dest)
@@ -1942,6 +1987,8 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
struct btrfs_path *path = NULL;
struct name_cache_entry *nce = NULL;
+ if (skip_name_cache)
+ goto get_ref;
/*
* First check if we already did a call to this function with the same
* ino/gen. If yes, check if the cache entry is still up-to-date. If yes
@@ -1986,11 +2033,12 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
goto out_cache;
}
+get_ref:
/*
* Depending on whether the inode was already processed or not, use
* send_root or parent_root for ref lookup.
*/
- if (ino < sctx->send_progress)
+ if (ino < sctx->send_progress && !skip_name_cache)
ret = get_first_ref(sctx->send_root, ino,
parent_ino, parent_gen, dest);
else
@@ -2014,6 +2062,8 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
goto out;
ret = 1;
}
+ if (skip_name_cache)
+ goto out;
out_cache:
/*
@@ -2081,6 +2131,9 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
u64 parent_inode = 0;
u64 parent_gen = 0;
int stop = 0;
+ u64 start_ino = ino;
+ u64 start_gen = gen;
+ int skip_name_cache = 0;
name = fs_path_alloc();
if (!name) {
@@ -2088,19 +2141,32 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
goto out;
}
+ if (is_waiting_for_move(sctx, ino))
+ skip_name_cache = 1;
+
+again:
dest->reversed = 1;
fs_path_reset(dest);
while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
fs_path_reset(name);
- ret = __get_cur_name_and_parent(sctx, ino, gen,
+ ret = __get_cur_name_and_parent(sctx, ino, gen, skip_name_cache,
&parent_inode, &parent_gen, name);
if (ret < 0)
goto out;
if (ret)
stop = 1;
+ if (!skip_name_cache &&
+ is_waiting_for_move(sctx, parent_inode)) {
+ ino = start_ino;
+ gen = start_gen;
+ stop = 0;
+ skip_name_cache = 1;
+ goto again;
+ }
+
ret = fs_path_add_path(dest, name);
if (ret < 0)
goto out;
@@ -2131,7 +2197,7 @@ static int send_subvol_begin(struct send_ctx *sctx)
char *name = NULL;
int namelen;
- path = alloc_path_for_send();
+ path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -2180,12 +2246,12 @@ static int send_subvol_begin(struct send_ctx *sctx)
TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
sctx->send_root->root_item.uuid);
TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
- sctx->send_root->root_item.ctransid);
+ le64_to_cpu(sctx->send_root->root_item.ctransid));
if (parent_root) {
TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
sctx->parent_root->root_item.uuid);
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
- sctx->parent_root->root_item.ctransid);
+ le64_to_cpu(sctx->parent_root->root_item.ctransid));
}
ret = send_cmd(sctx);
@@ -2672,10 +2738,347 @@ out:
return ret;
}
+static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
+{
+ struct rb_node *n = sctx->waiting_dir_moves.rb_node;
+ struct waiting_dir_move *entry;
+
+ while (n) {
+ entry = rb_entry(n, struct waiting_dir_move, node);
+ if (ino < entry->ino)
+ n = n->rb_left;
+ else if (ino > entry->ino)
+ n = n->rb_right;
+ else
+ return 1;
+ }
+ return 0;
+}
+
+static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
+{
+ struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
+ struct rb_node *parent = NULL;
+ struct waiting_dir_move *entry, *dm;
+
+ dm = kmalloc(sizeof(*dm), GFP_NOFS);
+ if (!dm)
+ return -ENOMEM;
+ dm->ino = ino;
+
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct waiting_dir_move, node);
+ if (ino < entry->ino) {
+ p = &(*p)->rb_left;
+ } else if (ino > entry->ino) {
+ p = &(*p)->rb_right;
+ } else {
+ kfree(dm);
+ return -EEXIST;
+ }
+ }
+
+ rb_link_node(&dm->node, parent, p);
+ rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
+ return 0;
+}
+
+static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino)
+{
+ struct rb_node *n = sctx->waiting_dir_moves.rb_node;
+ struct waiting_dir_move *entry;
+
+ while (n) {
+ entry = rb_entry(n, struct waiting_dir_move, node);
+ if (ino < entry->ino) {
+ n = n->rb_left;
+ } else if (ino > entry->ino) {
+ n = n->rb_right;
+ } else {
+ rb_erase(&entry->node, &sctx->waiting_dir_moves);
+ kfree(entry);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino)
+{
+ struct rb_node **p = &sctx->pending_dir_moves.rb_node;
+ struct rb_node *parent = NULL;
+ struct pending_dir_move *entry, *pm;
+ struct recorded_ref *cur;
+ int exists = 0;
+ int ret;
+
+ pm = kmalloc(sizeof(*pm), GFP_NOFS);
+ if (!pm)
+ return -ENOMEM;
+ pm->parent_ino = parent_ino;
+ pm->ino = sctx->cur_ino;
+ pm->gen = sctx->cur_inode_gen;
+ INIT_LIST_HEAD(&pm->list);
+ INIT_LIST_HEAD(&pm->update_refs);
+ RB_CLEAR_NODE(&pm->node);
+
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct pending_dir_move, node);
+ if (parent_ino < entry->parent_ino) {
+ p = &(*p)->rb_left;
+ } else if (parent_ino > entry->parent_ino) {
+ p = &(*p)->rb_right;
+ } else {