blob: 9ccea86dd01528010aac5298f8f04d7afb0f6422 [file] [log] [blame]
Chris Masond1310b22008-01-24 16:13:08 -05001#include <linux/bitops.h>
2#include <linux/slab.h>
3#include <linux/bio.h>
4#include <linux/mm.h>
Chris Masond1310b22008-01-24 16:13:08 -05005#include <linux/pagemap.h>
6#include <linux/page-flags.h>
7#include <linux/module.h>
8#include <linux/spinlock.h>
9#include <linux/blkdev.h>
10#include <linux/swap.h>
Chris Masond1310b22008-01-24 16:13:08 -050011#include <linux/writeback.h>
12#include <linux/pagevec.h>
13#include "extent_io.h"
14#include "extent_map.h"
David Woodhouse2db04962008-08-07 11:19:43 -040015#include "compat.h"
David Woodhouse902b22f2008-08-20 08:51:49 -040016#include "ctree.h"
17#include "btrfs_inode.h"
Chris Masond1310b22008-01-24 16:13:08 -050018
Chris Masond1310b22008-01-24 16:13:08 -050019static struct kmem_cache *extent_state_cache;
20static struct kmem_cache *extent_buffer_cache;
21
22static LIST_HEAD(buffers);
23static LIST_HEAD(states);
Chris Mason4bef0842008-09-08 11:18:08 -040024
Chris Masonb47eda82008-11-10 12:34:40 -050025#define LEAK_DEBUG 0
Chris Mason39351272009-02-04 09:24:05 -050026#if LEAK_DEBUG
Chris Masond3977122009-01-05 21:25:51 -050027static DEFINE_SPINLOCK(leak_lock);
Chris Mason4bef0842008-09-08 11:18:08 -040028#endif
Chris Masond1310b22008-01-24 16:13:08 -050029
Chris Masond1310b22008-01-24 16:13:08 -050030#define BUFFER_LRU_MAX 64
31
32struct tree_entry {
33 u64 start;
34 u64 end;
Chris Masond1310b22008-01-24 16:13:08 -050035 struct rb_node rb_node;
36};
37
38struct extent_page_data {
39 struct bio *bio;
40 struct extent_io_tree *tree;
41 get_extent_t *get_extent;
Chris Mason771ed682008-11-06 22:02:51 -050042
43 /* tells writepage not to lock the state bits for this range
44 * it still does the unlocking
45 */
Chris Masonffbd5172009-04-20 15:50:09 -040046 unsigned int extent_locked:1;
47
48 /* tells the submit_bio code to use a WRITE_SYNC */
49 unsigned int sync_io:1;
Chris Masond1310b22008-01-24 16:13:08 -050050};
51
52int __init extent_io_init(void)
53{
Christoph Hellwig9601e3f2009-04-13 15:33:09 +020054 extent_state_cache = kmem_cache_create("extent_state",
55 sizeof(struct extent_state), 0,
56 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
Chris Masond1310b22008-01-24 16:13:08 -050057 if (!extent_state_cache)
58 return -ENOMEM;
59
Christoph Hellwig9601e3f2009-04-13 15:33:09 +020060 extent_buffer_cache = kmem_cache_create("extent_buffers",
61 sizeof(struct extent_buffer), 0,
62 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
Chris Masond1310b22008-01-24 16:13:08 -050063 if (!extent_buffer_cache)
64 goto free_state_cache;
65 return 0;
66
67free_state_cache:
68 kmem_cache_destroy(extent_state_cache);
69 return -ENOMEM;
70}
71
72void extent_io_exit(void)
73{
74 struct extent_state *state;
Chris Mason2d2ae542008-03-26 16:24:23 -040075 struct extent_buffer *eb;
Chris Masond1310b22008-01-24 16:13:08 -050076
77 while (!list_empty(&states)) {
Chris Mason2d2ae542008-03-26 16:24:23 -040078 state = list_entry(states.next, struct extent_state, leak_list);
Chris Masond3977122009-01-05 21:25:51 -050079 printk(KERN_ERR "btrfs state leak: start %llu end %llu "
80 "state %lu in tree %p refs %d\n",
81 (unsigned long long)state->start,
82 (unsigned long long)state->end,
83 state->state, state->tree, atomic_read(&state->refs));
Chris Mason2d2ae542008-03-26 16:24:23 -040084 list_del(&state->leak_list);
Chris Masond1310b22008-01-24 16:13:08 -050085 kmem_cache_free(extent_state_cache, state);
86
87 }
88
Chris Mason2d2ae542008-03-26 16:24:23 -040089 while (!list_empty(&buffers)) {
90 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
Chris Masond3977122009-01-05 21:25:51 -050091 printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
92 "refs %d\n", (unsigned long long)eb->start,
93 eb->len, atomic_read(&eb->refs));
Chris Mason2d2ae542008-03-26 16:24:23 -040094 list_del(&eb->leak_list);
95 kmem_cache_free(extent_buffer_cache, eb);
96 }
Chris Masond1310b22008-01-24 16:13:08 -050097 if (extent_state_cache)
98 kmem_cache_destroy(extent_state_cache);
99 if (extent_buffer_cache)
100 kmem_cache_destroy(extent_buffer_cache);
101}
102
103void extent_io_tree_init(struct extent_io_tree *tree,
104 struct address_space *mapping, gfp_t mask)
105{
Eric Paris6bef4d32010-02-23 19:43:04 +0000106 tree->state = RB_ROOT;
Miao Xie19fe0a82010-10-26 20:57:29 -0400107 INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -0500108 tree->ops = NULL;
109 tree->dirty_bytes = 0;
Chris Mason70dec802008-01-29 09:59:12 -0500110 spin_lock_init(&tree->lock);
Chris Mason6af118ce2008-07-22 11:18:07 -0400111 spin_lock_init(&tree->buffer_lock);
Chris Masond1310b22008-01-24 16:13:08 -0500112 tree->mapping = mapping;
Chris Masond1310b22008-01-24 16:13:08 -0500113}
Chris Masond1310b22008-01-24 16:13:08 -0500114
Christoph Hellwigb2950862008-12-02 09:54:17 -0500115static struct extent_state *alloc_extent_state(gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -0500116{
117 struct extent_state *state;
Chris Mason39351272009-02-04 09:24:05 -0500118#if LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -0400119 unsigned long flags;
Chris Mason4bef0842008-09-08 11:18:08 -0400120#endif
Chris Masond1310b22008-01-24 16:13:08 -0500121
122 state = kmem_cache_alloc(extent_state_cache, mask);
Peter2b114d12008-04-01 11:21:40 -0400123 if (!state)
Chris Masond1310b22008-01-24 16:13:08 -0500124 return state;
125 state->state = 0;
Chris Masond1310b22008-01-24 16:13:08 -0500126 state->private = 0;
Chris Mason70dec802008-01-29 09:59:12 -0500127 state->tree = NULL;
Chris Mason39351272009-02-04 09:24:05 -0500128#if LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -0400129 spin_lock_irqsave(&leak_lock, flags);
130 list_add(&state->leak_list, &states);
131 spin_unlock_irqrestore(&leak_lock, flags);
Chris Mason4bef0842008-09-08 11:18:08 -0400132#endif
Chris Masond1310b22008-01-24 16:13:08 -0500133 atomic_set(&state->refs, 1);
134 init_waitqueue_head(&state->wq);
135 return state;
136}
Chris Masond1310b22008-01-24 16:13:08 -0500137
Chris Mason4845e442010-05-25 20:56:50 -0400138void free_extent_state(struct extent_state *state)
Chris Masond1310b22008-01-24 16:13:08 -0500139{
Chris Masond1310b22008-01-24 16:13:08 -0500140 if (!state)
141 return;
142 if (atomic_dec_and_test(&state->refs)) {
Chris Mason39351272009-02-04 09:24:05 -0500143#if LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -0400144 unsigned long flags;
Chris Mason4bef0842008-09-08 11:18:08 -0400145#endif
Chris Mason70dec802008-01-29 09:59:12 -0500146 WARN_ON(state->tree);
Chris Mason39351272009-02-04 09:24:05 -0500147#if LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -0400148 spin_lock_irqsave(&leak_lock, flags);
149 list_del(&state->leak_list);
150 spin_unlock_irqrestore(&leak_lock, flags);
Chris Mason4bef0842008-09-08 11:18:08 -0400151#endif
Chris Masond1310b22008-01-24 16:13:08 -0500152 kmem_cache_free(extent_state_cache, state);
153 }
154}
Chris Masond1310b22008-01-24 16:13:08 -0500155
156static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
157 struct rb_node *node)
158{
Chris Masond3977122009-01-05 21:25:51 -0500159 struct rb_node **p = &root->rb_node;
160 struct rb_node *parent = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500161 struct tree_entry *entry;
162
Chris Masond3977122009-01-05 21:25:51 -0500163 while (*p) {
Chris Masond1310b22008-01-24 16:13:08 -0500164 parent = *p;
165 entry = rb_entry(parent, struct tree_entry, rb_node);
166
167 if (offset < entry->start)
168 p = &(*p)->rb_left;
169 else if (offset > entry->end)
170 p = &(*p)->rb_right;
171 else
172 return parent;
173 }
174
175 entry = rb_entry(node, struct tree_entry, rb_node);
Chris Masond1310b22008-01-24 16:13:08 -0500176 rb_link_node(node, parent, p);
177 rb_insert_color(node, root);
178 return NULL;
179}
180
Chris Mason80ea96b2008-02-01 14:51:59 -0500181static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
Chris Masond1310b22008-01-24 16:13:08 -0500182 struct rb_node **prev_ret,
183 struct rb_node **next_ret)
184{
Chris Mason80ea96b2008-02-01 14:51:59 -0500185 struct rb_root *root = &tree->state;
Chris Masond3977122009-01-05 21:25:51 -0500186 struct rb_node *n = root->rb_node;
Chris Masond1310b22008-01-24 16:13:08 -0500187 struct rb_node *prev = NULL;
188 struct rb_node *orig_prev = NULL;
189 struct tree_entry *entry;
190 struct tree_entry *prev_entry = NULL;
191
Chris Masond3977122009-01-05 21:25:51 -0500192 while (n) {
Chris Masond1310b22008-01-24 16:13:08 -0500193 entry = rb_entry(n, struct tree_entry, rb_node);
194 prev = n;
195 prev_entry = entry;
196
197 if (offset < entry->start)
198 n = n->rb_left;
199 else if (offset > entry->end)
200 n = n->rb_right;
Chris Masond3977122009-01-05 21:25:51 -0500201 else
Chris Masond1310b22008-01-24 16:13:08 -0500202 return n;
203 }
204
205 if (prev_ret) {
206 orig_prev = prev;
Chris Masond3977122009-01-05 21:25:51 -0500207 while (prev && offset > prev_entry->end) {
Chris Masond1310b22008-01-24 16:13:08 -0500208 prev = rb_next(prev);
209 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
210 }
211 *prev_ret = prev;
212 prev = orig_prev;
213 }
214
215 if (next_ret) {
216 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
Chris Masond3977122009-01-05 21:25:51 -0500217 while (prev && offset < prev_entry->start) {
Chris Masond1310b22008-01-24 16:13:08 -0500218 prev = rb_prev(prev);
219 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
220 }
221 *next_ret = prev;
222 }
223 return NULL;
224}
225
Chris Mason80ea96b2008-02-01 14:51:59 -0500226static inline struct rb_node *tree_search(struct extent_io_tree *tree,
227 u64 offset)
Chris Masond1310b22008-01-24 16:13:08 -0500228{
Chris Mason70dec802008-01-29 09:59:12 -0500229 struct rb_node *prev = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500230 struct rb_node *ret;
Chris Mason70dec802008-01-29 09:59:12 -0500231
Chris Mason80ea96b2008-02-01 14:51:59 -0500232 ret = __etree_search(tree, offset, &prev, NULL);
Chris Masond3977122009-01-05 21:25:51 -0500233 if (!ret)
Chris Masond1310b22008-01-24 16:13:08 -0500234 return prev;
235 return ret;
236}
237
Josef Bacik9ed74f22009-09-11 16:12:44 -0400238static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
239 struct extent_state *other)
240{
241 if (tree->ops && tree->ops->merge_extent_hook)
242 tree->ops->merge_extent_hook(tree->mapping->host, new,
243 other);
244}
245
Chris Masond1310b22008-01-24 16:13:08 -0500246/*
247 * utility function to look for merge candidates inside a given range.
248 * Any extents with matching state are merged together into a single
249 * extent in the tree. Extents with EXTENT_IO in their state field
250 * are not merged because the end_io handlers need to be able to do
251 * operations on them without sleeping (or doing allocations/splits).
252 *
253 * This should be called with the tree lock held.
254 */
255static int merge_state(struct extent_io_tree *tree,
256 struct extent_state *state)
257{
258 struct extent_state *other;
259 struct rb_node *other_node;
260
Zheng Yan5b21f2e2008-09-26 10:05:38 -0400261 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
Chris Masond1310b22008-01-24 16:13:08 -0500262 return 0;
263
264 other_node = rb_prev(&state->rb_node);
265 if (other_node) {
266 other = rb_entry(other_node, struct extent_state, rb_node);
267 if (other->end == state->start - 1 &&
268 other->state == state->state) {
Josef Bacik9ed74f22009-09-11 16:12:44 -0400269 merge_cb(tree, state, other);
Chris Masond1310b22008-01-24 16:13:08 -0500270 state->start = other->start;
Chris Mason70dec802008-01-29 09:59:12 -0500271 other->tree = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500272 rb_erase(&other->rb_node, &tree->state);
273 free_extent_state(other);
274 }
275 }
276 other_node = rb_next(&state->rb_node);
277 if (other_node) {
278 other = rb_entry(other_node, struct extent_state, rb_node);
279 if (other->start == state->end + 1 &&
280 other->state == state->state) {
Josef Bacik9ed74f22009-09-11 16:12:44 -0400281 merge_cb(tree, state, other);
Chris Masond1310b22008-01-24 16:13:08 -0500282 other->start = state->start;
Chris Mason70dec802008-01-29 09:59:12 -0500283 state->tree = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500284 rb_erase(&state->rb_node, &tree->state);
285 free_extent_state(state);
Josef Bacik9ed74f22009-09-11 16:12:44 -0400286 state = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500287 }
288 }
Josef Bacik9ed74f22009-09-11 16:12:44 -0400289
Chris Masond1310b22008-01-24 16:13:08 -0500290 return 0;
291}
292
Josef Bacik9ed74f22009-09-11 16:12:44 -0400293static int set_state_cb(struct extent_io_tree *tree,
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400294 struct extent_state *state, int *bits)
Chris Mason291d6732008-01-29 15:55:23 -0500295{
296 if (tree->ops && tree->ops->set_bit_hook) {
Josef Bacik9ed74f22009-09-11 16:12:44 -0400297 return tree->ops->set_bit_hook(tree->mapping->host,
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400298 state, bits);
Chris Mason291d6732008-01-29 15:55:23 -0500299 }
Josef Bacik9ed74f22009-09-11 16:12:44 -0400300
301 return 0;
Chris Mason291d6732008-01-29 15:55:23 -0500302}
303
304static void clear_state_cb(struct extent_io_tree *tree,
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400305 struct extent_state *state, int *bits)
Chris Mason291d6732008-01-29 15:55:23 -0500306{
Josef Bacik9ed74f22009-09-11 16:12:44 -0400307 if (tree->ops && tree->ops->clear_bit_hook)
308 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
Chris Mason291d6732008-01-29 15:55:23 -0500309}
310
Chris Masond1310b22008-01-24 16:13:08 -0500311/*
312 * insert an extent_state struct into the tree. 'bits' are set on the
313 * struct before it is inserted.
314 *
315 * This may return -EEXIST if the extent is already there, in which case the
316 * state struct is freed.
317 *
318 * The tree lock is not taken internally. This is a utility function and
319 * probably isn't what you want to call (see set/clear_extent_bit).
320 */
321static int insert_state(struct extent_io_tree *tree,
322 struct extent_state *state, u64 start, u64 end,
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400323 int *bits)
Chris Masond1310b22008-01-24 16:13:08 -0500324{
325 struct rb_node *node;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400326 int bits_to_set = *bits & ~EXTENT_CTLBITS;
Josef Bacik9ed74f22009-09-11 16:12:44 -0400327 int ret;
Chris Masond1310b22008-01-24 16:13:08 -0500328
329 if (end < start) {
Chris Masond3977122009-01-05 21:25:51 -0500330 printk(KERN_ERR "btrfs end < start %llu %llu\n",
331 (unsigned long long)end,
332 (unsigned long long)start);
Chris Masond1310b22008-01-24 16:13:08 -0500333 WARN_ON(1);
334 }
Chris Masond1310b22008-01-24 16:13:08 -0500335 state->start = start;
336 state->end = end;
Josef Bacik9ed74f22009-09-11 16:12:44 -0400337 ret = set_state_cb(tree, state, bits);
338 if (ret)
339 return ret;
340
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400341 if (bits_to_set & EXTENT_DIRTY)
Josef Bacik9ed74f22009-09-11 16:12:44 -0400342 tree->dirty_bytes += end - start + 1;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400343 state->state |= bits_to_set;
Chris Masond1310b22008-01-24 16:13:08 -0500344 node = tree_insert(&tree->state, end, &state->rb_node);
345 if (node) {
346 struct extent_state *found;
347 found = rb_entry(node, struct extent_state, rb_node);
Chris Masond3977122009-01-05 21:25:51 -0500348 printk(KERN_ERR "btrfs found node %llu %llu on insert of "
349 "%llu %llu\n", (unsigned long long)found->start,
350 (unsigned long long)found->end,
351 (unsigned long long)start, (unsigned long long)end);
Chris Masond1310b22008-01-24 16:13:08 -0500352 free_extent_state(state);
353 return -EEXIST;
354 }
Chris Mason70dec802008-01-29 09:59:12 -0500355 state->tree = tree;
Chris Masond1310b22008-01-24 16:13:08 -0500356 merge_state(tree, state);
357 return 0;
358}
359
Josef Bacik9ed74f22009-09-11 16:12:44 -0400360static int split_cb(struct extent_io_tree *tree, struct extent_state *orig,
361 u64 split)
362{
363 if (tree->ops && tree->ops->split_extent_hook)
364 return tree->ops->split_extent_hook(tree->mapping->host,
365 orig, split);
366 return 0;
367}
368
Chris Masond1310b22008-01-24 16:13:08 -0500369/*
370 * split a given extent state struct in two, inserting the preallocated
371 * struct 'prealloc' as the newly created second half. 'split' indicates an
372 * offset inside 'orig' where it should be split.
373 *
374 * Before calling,
375 * the tree has 'orig' at [orig->start, orig->end]. After calling, there
376 * are two extent state structs in the tree:
377 * prealloc: [orig->start, split - 1]
378 * orig: [ split, orig->end ]
379 *
380 * The tree locks are not taken by this function. They need to be held
381 * by the caller.
382 */
383static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
384 struct extent_state *prealloc, u64 split)
385{
386 struct rb_node *node;
Josef Bacik9ed74f22009-09-11 16:12:44 -0400387
388 split_cb(tree, orig, split);
389
Chris Masond1310b22008-01-24 16:13:08 -0500390 prealloc->start = orig->start;
391 prealloc->end = split - 1;
392 prealloc->state = orig->state;
393 orig->start = split;
394
395 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
396 if (node) {
Chris Masond1310b22008-01-24 16:13:08 -0500397 free_extent_state(prealloc);
398 return -EEXIST;
399 }
Chris Mason70dec802008-01-29 09:59:12 -0500400 prealloc->tree = tree;
Chris Masond1310b22008-01-24 16:13:08 -0500401 return 0;
402}
403
404/*
405 * utility function to clear some bits in an extent state struct.
406 * it will optionally wake up any one waiting on this state (wake == 1), or
407 * forcibly remove the state from the tree (delete == 1).
408 *
409 * If no bits are set on the state struct after clearing things, the
410 * struct is freed and removed from the tree
411 */
412static int clear_state_bit(struct extent_io_tree *tree,
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400413 struct extent_state *state,
414 int *bits, int wake)
Chris Masond1310b22008-01-24 16:13:08 -0500415{
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400416 int bits_to_clear = *bits & ~EXTENT_CTLBITS;
Josef Bacik32c00af2009-10-08 13:34:05 -0400417 int ret = state->state & bits_to_clear;
Chris Masond1310b22008-01-24 16:13:08 -0500418
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400419 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
Chris Masond1310b22008-01-24 16:13:08 -0500420 u64 range = state->end - state->start + 1;
421 WARN_ON(range > tree->dirty_bytes);
422 tree->dirty_bytes -= range;
423 }
Chris Mason291d6732008-01-29 15:55:23 -0500424 clear_state_cb(tree, state, bits);
Josef Bacik32c00af2009-10-08 13:34:05 -0400425 state->state &= ~bits_to_clear;
Chris Masond1310b22008-01-24 16:13:08 -0500426 if (wake)
427 wake_up(&state->wq);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400428 if (state->state == 0) {
Chris Mason70dec802008-01-29 09:59:12 -0500429 if (state->tree) {
Chris Masond1310b22008-01-24 16:13:08 -0500430 rb_erase(&state->rb_node, &tree->state);
Chris Mason70dec802008-01-29 09:59:12 -0500431 state->tree = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500432 free_extent_state(state);
433 } else {
434 WARN_ON(1);
435 }
436 } else {
437 merge_state(tree, state);
438 }
439 return ret;
440}
441
Xiao Guangrong82337672011-04-20 06:44:57 +0000442static struct extent_state *
443alloc_extent_state_atomic(struct extent_state *prealloc)
444{
445 if (!prealloc)
446 prealloc = alloc_extent_state(GFP_ATOMIC);
447
448 return prealloc;
449}
450
Chris Masond1310b22008-01-24 16:13:08 -0500451/*
452 * clear some bits on a range in the tree. This may require splitting
453 * or inserting elements in the tree, so the gfp mask is used to
454 * indicate which allocations or sleeping are allowed.
455 *
456 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
457 * the given range from the tree regardless of state (ie for truncate).
458 *
459 * the range [start, end] is inclusive.
460 *
461 * This takes the tree lock, and returns < 0 on error, > 0 if any of the
462 * bits were already set, or zero if none of the bits were already set.
463 */
464int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
Chris Mason2c64c532009-09-02 15:04:12 -0400465 int bits, int wake, int delete,
466 struct extent_state **cached_state,
467 gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -0500468{
469 struct extent_state *state;
Chris Mason2c64c532009-09-02 15:04:12 -0400470 struct extent_state *cached;
Chris Masond1310b22008-01-24 16:13:08 -0500471 struct extent_state *prealloc = NULL;
Chris Mason2c64c532009-09-02 15:04:12 -0400472 struct rb_node *next_node;
Chris Masond1310b22008-01-24 16:13:08 -0500473 struct rb_node *node;
Yan Zheng5c939df2009-05-27 09:16:03 -0400474 u64 last_end;
Chris Masond1310b22008-01-24 16:13:08 -0500475 int err;
476 int set = 0;
Josef Bacik2ac55d42010-02-03 19:33:23 +0000477 int clear = 0;
Chris Masond1310b22008-01-24 16:13:08 -0500478
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400479 if (delete)
480 bits |= ~EXTENT_CTLBITS;
481 bits |= EXTENT_FIRST_DELALLOC;
482
Josef Bacik2ac55d42010-02-03 19:33:23 +0000483 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
484 clear = 1;
Chris Masond1310b22008-01-24 16:13:08 -0500485again:
486 if (!prealloc && (mask & __GFP_WAIT)) {
487 prealloc = alloc_extent_state(mask);
Xiao Guangrong82337672011-04-20 06:44:57 +0000488 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -0500489 }
490
Chris Masoncad321a2008-12-17 14:51:42 -0500491 spin_lock(&tree->lock);
Chris Mason2c64c532009-09-02 15:04:12 -0400492 if (cached_state) {
493 cached = *cached_state;
Josef Bacik2ac55d42010-02-03 19:33:23 +0000494
495 if (clear) {
496 *cached_state = NULL;
497 cached_state = NULL;
498 }
499
Chris Mason42daec22009-09-23 19:51:09 -0400500 if (cached && cached->tree && cached->start == start) {
Josef Bacik2ac55d42010-02-03 19:33:23 +0000501 if (clear)
502 atomic_dec(&cached->refs);
Chris Mason2c64c532009-09-02 15:04:12 -0400503 state = cached;
Chris Mason42daec22009-09-23 19:51:09 -0400504 goto hit_next;
Chris Mason2c64c532009-09-02 15:04:12 -0400505 }
Josef Bacik2ac55d42010-02-03 19:33:23 +0000506 if (clear)
507 free_extent_state(cached);
Chris Mason2c64c532009-09-02 15:04:12 -0400508 }
Chris Masond1310b22008-01-24 16:13:08 -0500509 /*
510 * this search will find the extents that end after
511 * our range starts
512 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500513 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500514 if (!node)
515 goto out;
516 state = rb_entry(node, struct extent_state, rb_node);
Chris Mason2c64c532009-09-02 15:04:12 -0400517hit_next:
Chris Masond1310b22008-01-24 16:13:08 -0500518 if (state->start > end)
519 goto out;
520 WARN_ON(state->end < start);
Yan Zheng5c939df2009-05-27 09:16:03 -0400521 last_end = state->end;
Chris Masond1310b22008-01-24 16:13:08 -0500522
523 /*
524 * | ---- desired range ---- |
525 * | state | or
526 * | ------------- state -------------- |
527 *
528 * We need to split the extent we found, and may flip
529 * bits on second half.
530 *
531 * If the extent we found extends past our range, we
532 * just split and search again. It'll get split again
533 * the next time though.
534 *
535 * If the extent we found is inside our range, we clear
536 * the desired bit on it.
537 */
538
539 if (state->start < start) {
Xiao Guangrong82337672011-04-20 06:44:57 +0000540 prealloc = alloc_extent_state_atomic(prealloc);
541 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -0500542 err = split_state(tree, state, prealloc, start);
543 BUG_ON(err == -EEXIST);
544 prealloc = NULL;
545 if (err)
546 goto out;
547 if (state->end <= end) {
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400548 set |= clear_state_bit(tree, state, &bits, wake);
Yan Zheng5c939df2009-05-27 09:16:03 -0400549 if (last_end == (u64)-1)
550 goto out;
551 start = last_end + 1;
Chris Masond1310b22008-01-24 16:13:08 -0500552 }
553 goto search_again;
554 }
555 /*
556 * | ---- desired range ---- |
557 * | state |
558 * We need to split the extent, and clear the bit
559 * on the first half
560 */
561 if (state->start <= end && state->end > end) {
Xiao Guangrong82337672011-04-20 06:44:57 +0000562 prealloc = alloc_extent_state_atomic(prealloc);
563 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -0500564 err = split_state(tree, state, prealloc, end + 1);
565 BUG_ON(err == -EEXIST);
Chris Masond1310b22008-01-24 16:13:08 -0500566 if (wake)
567 wake_up(&state->wq);
Chris Mason42daec22009-09-23 19:51:09 -0400568
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400569 set |= clear_state_bit(tree, prealloc, &bits, wake);
Josef Bacik9ed74f22009-09-11 16:12:44 -0400570
Chris Masond1310b22008-01-24 16:13:08 -0500571 prealloc = NULL;
572 goto out;
573 }
Chris Mason42daec22009-09-23 19:51:09 -0400574
Chris Mason2c64c532009-09-02 15:04:12 -0400575 if (state->end < end && prealloc && !need_resched())
576 next_node = rb_next(&state->rb_node);
577 else
578 next_node = NULL;
Chris Mason42daec22009-09-23 19:51:09 -0400579
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400580 set |= clear_state_bit(tree, state, &bits, wake);
Yan Zheng5c939df2009-05-27 09:16:03 -0400581 if (last_end == (u64)-1)
582 goto out;
583 start = last_end + 1;
Chris Mason2c64c532009-09-02 15:04:12 -0400584 if (start <= end && next_node) {
585 state = rb_entry(next_node, struct extent_state,
586 rb_node);
587 if (state->start == start)
588 goto hit_next;
589 }
Chris Masond1310b22008-01-24 16:13:08 -0500590 goto search_again;
591
592out:
Chris Masoncad321a2008-12-17 14:51:42 -0500593 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500594 if (prealloc)
595 free_extent_state(prealloc);
596
597 return set;
598
599search_again:
600 if (start > end)
601 goto out;
Chris Masoncad321a2008-12-17 14:51:42 -0500602 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500603 if (mask & __GFP_WAIT)
604 cond_resched();
605 goto again;
606}
Chris Masond1310b22008-01-24 16:13:08 -0500607
608static int wait_on_state(struct extent_io_tree *tree,
609 struct extent_state *state)
Christoph Hellwig641f5212008-12-02 06:36:10 -0500610 __releases(tree->lock)
611 __acquires(tree->lock)
Chris Masond1310b22008-01-24 16:13:08 -0500612{
613 DEFINE_WAIT(wait);
614 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
Chris Masoncad321a2008-12-17 14:51:42 -0500615 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500616 schedule();
Chris Masoncad321a2008-12-17 14:51:42 -0500617 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500618 finish_wait(&state->wq, &wait);
619 return 0;
620}
621
622/*
623 * waits for one or more bits to clear on a range in the state tree.
624 * The range [start, end] is inclusive.
625 * The tree lock is taken by this function
626 */
627int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
628{
629 struct extent_state *state;
630 struct rb_node *node;
631
Chris Masoncad321a2008-12-17 14:51:42 -0500632 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500633again:
634 while (1) {
635 /*
636 * this search will find all the extents that end after
637 * our range starts
638 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500639 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500640 if (!node)
641 break;
642
643 state = rb_entry(node, struct extent_state, rb_node);
644
645 if (state->start > end)
646 goto out;
647
648 if (state->state & bits) {
649 start = state->start;
650 atomic_inc(&state->refs);
651 wait_on_state(tree, state);
652 free_extent_state(state);
653 goto again;
654 }
655 start = state->end + 1;
656
657 if (start > end)
658 break;
659
660 if (need_resched()) {
Chris Masoncad321a2008-12-17 14:51:42 -0500661 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500662 cond_resched();
Chris Masoncad321a2008-12-17 14:51:42 -0500663 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500664 }
665 }
666out:
Chris Masoncad321a2008-12-17 14:51:42 -0500667 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500668 return 0;
669}
Chris Masond1310b22008-01-24 16:13:08 -0500670
Josef Bacik9ed74f22009-09-11 16:12:44 -0400671static int set_state_bits(struct extent_io_tree *tree,
Chris Masond1310b22008-01-24 16:13:08 -0500672 struct extent_state *state,
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400673 int *bits)
Chris Masond1310b22008-01-24 16:13:08 -0500674{
Josef Bacik9ed74f22009-09-11 16:12:44 -0400675 int ret;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400676 int bits_to_set = *bits & ~EXTENT_CTLBITS;
Josef Bacik9ed74f22009-09-11 16:12:44 -0400677
678 ret = set_state_cb(tree, state, bits);
679 if (ret)
680 return ret;
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400681 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
Chris Masond1310b22008-01-24 16:13:08 -0500682 u64 range = state->end - state->start + 1;
683 tree->dirty_bytes += range;
684 }
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400685 state->state |= bits_to_set;
Josef Bacik9ed74f22009-09-11 16:12:44 -0400686
687 return 0;
Chris Masond1310b22008-01-24 16:13:08 -0500688}
689
Chris Mason2c64c532009-09-02 15:04:12 -0400690static void cache_state(struct extent_state *state,
691 struct extent_state **cached_ptr)
692{
693 if (cached_ptr && !(*cached_ptr)) {
694 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) {
695 *cached_ptr = state;
696 atomic_inc(&state->refs);
697 }
698 }
699}
700
Arne Jansen507903b2011-04-06 10:02:20 +0000701static void uncache_state(struct extent_state **cached_ptr)
702{
703 if (cached_ptr && (*cached_ptr)) {
704 struct extent_state *state = *cached_ptr;
Chris Mason109b36a2011-04-12 13:57:39 -0400705 *cached_ptr = NULL;
706 free_extent_state(state);
Arne Jansen507903b2011-04-06 10:02:20 +0000707 }
708}
709
Chris Masond1310b22008-01-24 16:13:08 -0500710/*
Chris Mason1edbb732009-09-02 13:24:36 -0400711 * set some bits on a range in the tree. This may require allocations or
712 * sleeping, so the gfp mask is used to indicate what is allowed.
Chris Masond1310b22008-01-24 16:13:08 -0500713 *
Chris Mason1edbb732009-09-02 13:24:36 -0400714 * If any of the exclusive bits are set, this will fail with -EEXIST if some
715 * part of the range already has the desired bits set. The start of the
716 * existing range is returned in failed_start in this case.
Chris Masond1310b22008-01-24 16:13:08 -0500717 *
Chris Mason1edbb732009-09-02 13:24:36 -0400718 * [start, end] is inclusive This takes the tree lock.
Chris Masond1310b22008-01-24 16:13:08 -0500719 */
Chris Mason1edbb732009-09-02 13:24:36 -0400720
Chris Mason4845e442010-05-25 20:56:50 -0400721int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
722 int bits, int exclusive_bits, u64 *failed_start,
723 struct extent_state **cached_state, gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -0500724{
725 struct extent_state *state;
726 struct extent_state *prealloc = NULL;
727 struct rb_node *node;
Chris Masond1310b22008-01-24 16:13:08 -0500728 int err = 0;
Chris Masond1310b22008-01-24 16:13:08 -0500729 u64 last_start;
730 u64 last_end;
Chris Mason42daec22009-09-23 19:51:09 -0400731
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400732 bits |= EXTENT_FIRST_DELALLOC;
Chris Masond1310b22008-01-24 16:13:08 -0500733again:
734 if (!prealloc && (mask & __GFP_WAIT)) {
735 prealloc = alloc_extent_state(mask);
Xiao Guangrong82337672011-04-20 06:44:57 +0000736 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -0500737 }
738
Chris Masoncad321a2008-12-17 14:51:42 -0500739 spin_lock(&tree->lock);
Chris Mason9655d292009-09-02 15:22:30 -0400740 if (cached_state && *cached_state) {
741 state = *cached_state;
742 if (state->start == start && state->tree) {
743 node = &state->rb_node;
744 goto hit_next;
745 }
746 }
Chris Masond1310b22008-01-24 16:13:08 -0500747 /*
748 * this search will find all the extents that end after
749 * our range starts.
750 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500751 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500752 if (!node) {
Xiao Guangrong82337672011-04-20 06:44:57 +0000753 prealloc = alloc_extent_state_atomic(prealloc);
754 BUG_ON(!prealloc);
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400755 err = insert_state(tree, prealloc, start, end, &bits);
Chris Masond1310b22008-01-24 16:13:08 -0500756 prealloc = NULL;
757 BUG_ON(err == -EEXIST);
758 goto out;
759 }
Chris Masond1310b22008-01-24 16:13:08 -0500760 state = rb_entry(node, struct extent_state, rb_node);
Chris Mason40431d62009-08-05 12:57:59 -0400761hit_next:
Chris Masond1310b22008-01-24 16:13:08 -0500762 last_start = state->start;
763 last_end = state->end;
764
765 /*
766 * | ---- desired range ---- |
767 * | state |
768 *
769 * Just lock what we found and keep going
770 */
771 if (state->start == start && state->end <= end) {
Chris Mason40431d62009-08-05 12:57:59 -0400772 struct rb_node *next_node;
Chris Mason1edbb732009-09-02 13:24:36 -0400773 if (state->state & exclusive_bits) {
Chris Masond1310b22008-01-24 16:13:08 -0500774 *failed_start = state->start;
775 err = -EEXIST;
776 goto out;
777 }
Chris Mason42daec22009-09-23 19:51:09 -0400778
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400779 err = set_state_bits(tree, state, &bits);
Josef Bacik9ed74f22009-09-11 16:12:44 -0400780 if (err)
781 goto out;
782
Chris Mason2c64c532009-09-02 15:04:12 -0400783 cache_state(state, cached_state);
Chris Masond1310b22008-01-24 16:13:08 -0500784 merge_state(tree, state);
Yan Zheng5c939df2009-05-27 09:16:03 -0400785 if (last_end == (u64)-1)
786 goto out;
Chris Mason40431d62009-08-05 12:57:59 -0400787
Yan Zheng5c939df2009-05-27 09:16:03 -0400788 start = last_end + 1;
Chris Mason40431d62009-08-05 12:57:59 -0400789 if (start < end && prealloc && !need_resched()) {
790 next_node = rb_next(node);
791 if (next_node) {
792 state = rb_entry(next_node, struct extent_state,
793 rb_node);
794 if (state->start == start)
795 goto hit_next;
796 }
797 }
Chris Masond1310b22008-01-24 16:13:08 -0500798 goto search_again;
799 }
800
801 /*
802 * | ---- desired range ---- |
803 * | state |
804 * or
805 * | ------------- state -------------- |
806 *
807 * We need to split the extent we found, and may flip bits on
808 * second half.
809 *
810 * If the extent we found extends past our
811 * range, we just split and search again. It'll get split
812 * again the next time though.
813 *
814 * If the extent we found is inside our range, we set the
815 * desired bit on it.
816 */
817 if (state->start < start) {
Chris Mason1edbb732009-09-02 13:24:36 -0400818 if (state->state & exclusive_bits) {
Chris Masond1310b22008-01-24 16:13:08 -0500819 *failed_start = start;
820 err = -EEXIST;
821 goto out;
822 }
Xiao Guangrong82337672011-04-20 06:44:57 +0000823
824 prealloc = alloc_extent_state_atomic(prealloc);
825 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -0500826 err = split_state(tree, state, prealloc, start);
827 BUG_ON(err == -EEXIST);
828 prealloc = NULL;
829 if (err)
830 goto out;
831 if (state->end <= end) {
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400832 err = set_state_bits(tree, state, &bits);
Josef Bacik9ed74f22009-09-11 16:12:44 -0400833 if (err)
834 goto out;
Chris Mason2c64c532009-09-02 15:04:12 -0400835 cache_state(state, cached_state);
Chris Masond1310b22008-01-24 16:13:08 -0500836 merge_state(tree, state);
Yan Zheng5c939df2009-05-27 09:16:03 -0400837 if (last_end == (u64)-1)
838 goto out;
839 start = last_end + 1;
Chris Masond1310b22008-01-24 16:13:08 -0500840 }
841 goto search_again;
842 }
843 /*
844 * | ---- desired range ---- |
845 * | state | or | state |
846 *
847 * There's a hole, we need to insert something in it and
848 * ignore the extent we found.
849 */
850 if (state->start > start) {
851 u64 this_end;
852 if (end < last_start)
853 this_end = end;
854 else
Chris Masond3977122009-01-05 21:25:51 -0500855 this_end = last_start - 1;
Xiao Guangrong82337672011-04-20 06:44:57 +0000856
857 prealloc = alloc_extent_state_atomic(prealloc);
858 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -0500859 err = insert_state(tree, prealloc, start, this_end,
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400860 &bits);
Josef Bacik9ed74f22009-09-11 16:12:44 -0400861 BUG_ON(err == -EEXIST);
862 if (err) {
863 prealloc = NULL;
864 goto out;
865 }
Chris Mason2c64c532009-09-02 15:04:12 -0400866 cache_state(prealloc, cached_state);
Chris Masond1310b22008-01-24 16:13:08 -0500867 prealloc = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500868 start = this_end + 1;
869 goto search_again;
870 }
871 /*
872 * | ---- desired range ---- |
873 * | state |
874 * We need to split the extent, and set the bit
875 * on the first half
876 */
877 if (state->start <= end && state->end > end) {
Chris Mason1edbb732009-09-02 13:24:36 -0400878 if (state->state & exclusive_bits) {
Chris Masond1310b22008-01-24 16:13:08 -0500879 *failed_start = start;
880 err = -EEXIST;
881 goto out;
882 }
Xiao Guangrong82337672011-04-20 06:44:57 +0000883
884 prealloc = alloc_extent_state_atomic(prealloc);
885 BUG_ON(!prealloc);
Chris Masond1310b22008-01-24 16:13:08 -0500886 err = split_state(tree, state, prealloc, end + 1);
887 BUG_ON(err == -EEXIST);
888
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400889 err = set_state_bits(tree, prealloc, &bits);
Josef Bacik9ed74f22009-09-11 16:12:44 -0400890 if (err) {
891 prealloc = NULL;
892 goto out;
893 }
Chris Mason2c64c532009-09-02 15:04:12 -0400894 cache_state(prealloc, cached_state);
Chris Masond1310b22008-01-24 16:13:08 -0500895 merge_state(tree, prealloc);
896 prealloc = NULL;
897 goto out;
898 }
899
900 goto search_again;
901
902out:
Chris Masoncad321a2008-12-17 14:51:42 -0500903 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500904 if (prealloc)
905 free_extent_state(prealloc);
906
907 return err;
908
909search_again:
910 if (start > end)
911 goto out;
Chris Masoncad321a2008-12-17 14:51:42 -0500912 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500913 if (mask & __GFP_WAIT)
914 cond_resched();
915 goto again;
916}
Chris Masond1310b22008-01-24 16:13:08 -0500917
918/* wrappers around set/clear extent bit */
919int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
920 gfp_t mask)
921{
922 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
Chris Mason2c64c532009-09-02 15:04:12 -0400923 NULL, mask);
Chris Masond1310b22008-01-24 16:13:08 -0500924}
Chris Masond1310b22008-01-24 16:13:08 -0500925
926int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
927 int bits, gfp_t mask)
928{
929 return set_extent_bit(tree, start, end, bits, 0, NULL,
Chris Mason2c64c532009-09-02 15:04:12 -0400930 NULL, mask);
Chris Masond1310b22008-01-24 16:13:08 -0500931}
Chris Masond1310b22008-01-24 16:13:08 -0500932
933int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
934 int bits, gfp_t mask)
935{
Chris Mason2c64c532009-09-02 15:04:12 -0400936 return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
Chris Masond1310b22008-01-24 16:13:08 -0500937}
Chris Masond1310b22008-01-24 16:13:08 -0500938
939int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
Josef Bacik2ac55d42010-02-03 19:33:23 +0000940 struct extent_state **cached_state, gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -0500941{
942 return set_extent_bit(tree, start, end,
Chris Mason40431d62009-08-05 12:57:59 -0400943 EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE,
Josef Bacik2ac55d42010-02-03 19:33:23 +0000944 0, NULL, cached_state, mask);
Chris Masond1310b22008-01-24 16:13:08 -0500945}
Chris Masond1310b22008-01-24 16:13:08 -0500946
947int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
948 gfp_t mask)
949{
950 return clear_extent_bit(tree, start, end,
Josef Bacik32c00af2009-10-08 13:34:05 -0400951 EXTENT_DIRTY | EXTENT_DELALLOC |
Yan, Zheng0ca1f7c2010-05-16 10:48:47 -0400952 EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
Chris Masond1310b22008-01-24 16:13:08 -0500953}
Chris Masond1310b22008-01-24 16:13:08 -0500954
955int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
956 gfp_t mask)
957{
958 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
Chris Mason2c64c532009-09-02 15:04:12 -0400959 NULL, mask);
Chris Masond1310b22008-01-24 16:13:08 -0500960}
Chris Masond1310b22008-01-24 16:13:08 -0500961
Christoph Hellwigb2950862008-12-02 09:54:17 -0500962static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
Chris Masond1310b22008-01-24 16:13:08 -0500963 gfp_t mask)
964{
Chris Mason2c64c532009-09-02 15:04:12 -0400965 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
966 NULL, mask);
Chris Masond1310b22008-01-24 16:13:08 -0500967}
Chris Masond1310b22008-01-24 16:13:08 -0500968
969int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
Arne Jansen507903b2011-04-06 10:02:20 +0000970 struct extent_state **cached_state, gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -0500971{
Arne Jansen507903b2011-04-06 10:02:20 +0000972 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
973 NULL, cached_state, mask);
Chris Masond1310b22008-01-24 16:13:08 -0500974}
Chris Masond1310b22008-01-24 16:13:08 -0500975
Chris Masond3977122009-01-05 21:25:51 -0500976static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
Josef Bacik2ac55d42010-02-03 19:33:23 +0000977 u64 end, struct extent_state **cached_state,
978 gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -0500979{
Chris Mason2c64c532009-09-02 15:04:12 -0400980 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
Josef Bacik2ac55d42010-02-03 19:33:23 +0000981 cached_state, mask);
Chris Masond1310b22008-01-24 16:13:08 -0500982}
Chris Masond1310b22008-01-24 16:13:08 -0500983
Chris Masond1310b22008-01-24 16:13:08 -0500984int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
985{
986 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
987}
Chris Masond1310b22008-01-24 16:13:08 -0500988
Chris Masond352ac62008-09-29 15:18:18 -0400989/*
990 * either insert or lock state struct between start and end use mask to tell
991 * us if waiting is desired.
992 */
Chris Mason1edbb732009-09-02 13:24:36 -0400993int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
Chris Mason2c64c532009-09-02 15:04:12 -0400994 int bits, struct extent_state **cached_state, gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -0500995{
996 int err;
997 u64 failed_start;
998 while (1) {
Chris Mason1edbb732009-09-02 13:24:36 -0400999 err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
Chris Mason2c64c532009-09-02 15:04:12 -04001000 EXTENT_LOCKED, &failed_start,
1001 cached_state, mask);
Chris Masond1310b22008-01-24 16:13:08 -05001002 if (err == -EEXIST && (mask & __GFP_WAIT)) {
1003 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1004 start = failed_start;
1005 } else {
1006 break;
1007 }
1008 WARN_ON(start > end);
1009 }
1010 return err;
1011}
Chris Masond1310b22008-01-24 16:13:08 -05001012
Chris Mason1edbb732009-09-02 13:24:36 -04001013int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
1014{
Chris Mason2c64c532009-09-02 15:04:12 -04001015 return lock_extent_bits(tree, start, end, 0, NULL, mask);
Chris Mason1edbb732009-09-02 13:24:36 -04001016}
1017
Josef Bacik25179202008-10-29 14:49:05 -04001018int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
1019 gfp_t mask)
1020{
1021 int err;
1022 u64 failed_start;
1023
Chris Mason2c64c532009-09-02 15:04:12 -04001024 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1025 &failed_start, NULL, mask);
Yan Zheng66435582008-10-30 14:19:50 -04001026 if (err == -EEXIST) {
1027 if (failed_start > start)
1028 clear_extent_bit(tree, start, failed_start - 1,
Chris Mason2c64c532009-09-02 15:04:12 -04001029 EXTENT_LOCKED, 1, 0, NULL, mask);
Josef Bacik25179202008-10-29 14:49:05 -04001030 return 0;
Yan Zheng66435582008-10-30 14:19:50 -04001031 }
Josef Bacik25179202008-10-29 14:49:05 -04001032 return 1;
1033}
Josef Bacik25179202008-10-29 14:49:05 -04001034
Chris Mason2c64c532009-09-02 15:04:12 -04001035int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
1036 struct extent_state **cached, gfp_t mask)
1037{
1038 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
1039 mask);
1040}
1041
Arne Jansen507903b2011-04-06 10:02:20 +00001042int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -05001043{
Chris Mason2c64c532009-09-02 15:04:12 -04001044 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1045 mask);
Chris Masond1310b22008-01-24 16:13:08 -05001046}
Chris Masond1310b22008-01-24 16:13:08 -05001047
1048/*
1049 * helper function to set pages and extents in the tree dirty
1050 */
1051int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
1052{
1053 unsigned long index = start >> PAGE_CACHE_SHIFT;
1054 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1055 struct page *page;
1056
1057 while (index <= end_index) {
1058 page = find_get_page(tree->mapping, index);
1059 BUG_ON(!page);
1060 __set_page_dirty_nobuffers(page);
1061 page_cache_release(page);
1062 index++;
1063 }
Chris Masond1310b22008-01-24 16:13:08 -05001064 return 0;
1065}
Chris Masond1310b22008-01-24 16:13:08 -05001066
1067/*
1068 * helper function to set both pages and extents in the tree writeback
1069 */
Christoph Hellwigb2950862008-12-02 09:54:17 -05001070static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
Chris Masond1310b22008-01-24 16:13:08 -05001071{
1072 unsigned long index = start >> PAGE_CACHE_SHIFT;
1073 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1074 struct page *page;
1075
1076 while (index <= end_index) {
1077 page = find_get_page(tree->mapping, index);
1078 BUG_ON(!page);
1079 set_page_writeback(page);
1080 page_cache_release(page);
1081 index++;
1082 }
Chris Masond1310b22008-01-24 16:13:08 -05001083 return 0;
1084}
Chris Masond1310b22008-01-24 16:13:08 -05001085
Chris Masond352ac62008-09-29 15:18:18 -04001086/*
1087 * find the first offset in the io tree with 'bits' set. zero is
1088 * returned if we find something, and *start_ret and *end_ret are
1089 * set to reflect the state struct that was found.
1090 *
1091 * If nothing was found, 1 is returned, < 0 on error
1092 */
Chris Masond1310b22008-01-24 16:13:08 -05001093int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1094 u64 *start_ret, u64 *end_ret, int bits)
1095{
1096 struct rb_node *node;
1097 struct extent_state *state;
1098 int ret = 1;
1099
Chris Masoncad321a2008-12-17 14:51:42 -05001100 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001101 /*
1102 * this search will find all the extents that end after
1103 * our range starts.
1104 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001105 node = tree_search(tree, start);
Chris Masond3977122009-01-05 21:25:51 -05001106 if (!node)
Chris Masond1310b22008-01-24 16:13:08 -05001107 goto out;
Chris Masond1310b22008-01-24 16:13:08 -05001108
Chris Masond3977122009-01-05 21:25:51 -05001109 while (1) {
Chris Masond1310b22008-01-24 16:13:08 -05001110 state = rb_entry(node, struct extent_state, rb_node);
1111 if (state->end >= start && (state->state & bits)) {
1112 *start_ret = state->start;
1113 *end_ret = state->end;
1114 ret = 0;
1115 break;
1116 }
1117 node = rb_next(node);
1118 if (!node)
1119 break;
1120 }
1121out:
Chris Masoncad321a2008-12-17 14:51:42 -05001122 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001123 return ret;
1124}
Chris Masond1310b22008-01-24 16:13:08 -05001125
Chris Masond352ac62008-09-29 15:18:18 -04001126/* find the first state struct with 'bits' set after 'start', and
1127 * return it. tree->lock must be held. NULL will returned if
1128 * nothing was found after 'start'
1129 */
Chris Masond7fc6402008-02-18 12:12:38 -05001130struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
1131 u64 start, int bits)
1132{
1133 struct rb_node *node;
1134 struct extent_state *state;
1135
1136 /*
1137 * this search will find all the extents that end after
1138 * our range starts.
1139 */
1140 node = tree_search(tree, start);
Chris Masond3977122009-01-05 21:25:51 -05001141 if (!node)
Chris Masond7fc6402008-02-18 12:12:38 -05001142 goto out;
Chris Masond7fc6402008-02-18 12:12:38 -05001143
Chris Masond3977122009-01-05 21:25:51 -05001144 while (1) {
Chris Masond7fc6402008-02-18 12:12:38 -05001145 state = rb_entry(node, struct extent_state, rb_node);
Chris Masond3977122009-01-05 21:25:51 -05001146 if (state->end >= start && (state->state & bits))
Chris Masond7fc6402008-02-18 12:12:38 -05001147 return state;
Chris Masond3977122009-01-05 21:25:51 -05001148
Chris Masond7fc6402008-02-18 12:12:38 -05001149 node = rb_next(node);
1150 if (!node)
1151 break;
1152 }
1153out:
1154 return NULL;
1155}
Chris Masond7fc6402008-02-18 12:12:38 -05001156
Chris Masond352ac62008-09-29 15:18:18 -04001157/*
1158 * find a contiguous range of bytes in the file marked as delalloc, not
1159 * more than 'max_bytes'. start and end are used to return the range,
1160 *
1161 * 1 is returned if we find something, 0 if nothing was in the tree
1162 */
Chris Masonc8b97812008-10-29 14:49:59 -04001163static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
Josef Bacikc2a128d2010-02-02 21:19:11 +00001164 u64 *start, u64 *end, u64 max_bytes,
1165 struct extent_state **cached_state)
Chris Masond1310b22008-01-24 16:13:08 -05001166{
1167 struct rb_node *node;
1168 struct extent_state *state;
1169 u64 cur_start = *start;
1170 u64 found = 0;
1171 u64 total_bytes = 0;
1172
Chris Masoncad321a2008-12-17 14:51:42 -05001173 spin_lock(&tree->lock);
Chris Masonc8b97812008-10-29 14:49:59 -04001174
Chris Masond1310b22008-01-24 16:13:08 -05001175 /*
1176 * this search will find all the extents that end after
1177 * our range starts.
1178 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001179 node = tree_search(tree, cur_start);
Peter2b114d12008-04-01 11:21:40 -04001180 if (!node) {
Chris Mason3b951512008-04-17 11:29:12 -04001181 if (!found)
1182 *end = (u64)-1;
Chris Masond1310b22008-01-24 16:13:08 -05001183 goto out;
1184 }
1185
Chris Masond3977122009-01-05 21:25:51 -05001186 while (1) {
Chris Masond1310b22008-01-24 16:13:08 -05001187 state = rb_entry(node, struct extent_state, rb_node);
Zheng Yan5b21f2e2008-09-26 10:05:38 -04001188 if (found && (state->start != cur_start ||
1189 (state->state & EXTENT_BOUNDARY))) {
Chris Masond1310b22008-01-24 16:13:08 -05001190 goto out;
1191 }
1192 if (!(state->state & EXTENT_DELALLOC)) {
1193 if (!found)
1194 *end = state->end;
1195 goto out;
1196 }
Josef Bacikc2a128d2010-02-02 21:19:11 +00001197 if (!found) {
Chris Masond1310b22008-01-24 16:13:08 -05001198 *start = state->start;
Josef Bacikc2a128d2010-02-02 21:19:11 +00001199 *cached_state = state;
1200 atomic_inc(&state->refs);
1201 }
Chris Masond1310b22008-01-24 16:13:08 -05001202 found++;
1203 *end = state->end;
1204 cur_start = state->end + 1;
1205 node = rb_next(node);
1206 if (!node)
1207 break;
1208 total_bytes += state->end - state->start + 1;
1209 if (total_bytes >= max_bytes)
1210 break;
1211 }
1212out:
Chris Masoncad321a2008-12-17 14:51:42 -05001213 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001214 return found;
1215}
1216
Chris Masonc8b97812008-10-29 14:49:59 -04001217static noinline int __unlock_for_delalloc(struct inode *inode,
1218 struct page *locked_page,
1219 u64 start, u64 end)
1220{
1221 int ret;
1222 struct page *pages[16];
1223 unsigned long index = start >> PAGE_CACHE_SHIFT;
1224 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1225 unsigned long nr_pages = end_index - index + 1;
1226 int i;
1227
1228 if (index == locked_page->index && end_index == index)
1229 return 0;
1230
Chris Masond3977122009-01-05 21:25:51 -05001231 while (nr_pages > 0) {
Chris Masonc8b97812008-10-29 14:49:59 -04001232 ret = find_get_pages_contig(inode->i_mapping, index,
Chris Mason5b050f02008-11-11 09:34:41 -05001233 min_t(unsigned long, nr_pages,
1234 ARRAY_SIZE(pages)), pages);
Chris Masonc8b97812008-10-29 14:49:59 -04001235 for (i = 0; i < ret; i++) {
1236 if (pages[i] != locked_page)
1237 unlock_page(pages[i]);
1238 page_cache_release(pages[i]);
1239 }
1240 nr_pages -= ret;
1241 index += ret;
1242 cond_resched();
1243 }
1244 return 0;
1245}
1246
1247static noinline int lock_delalloc_pages(struct inode *inode,
1248 struct page *locked_page,
1249 u64 delalloc_start,
1250 u64 delalloc_end)
1251{
1252 unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
1253 unsigned long start_index = index;
1254 unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
1255 unsigned long pages_locked = 0;
1256 struct page *pages[16];
1257 unsigned long nrpages;
1258 int ret;
1259 int i;
1260
1261 /* the caller is responsible for locking the start index */
1262 if (index == locked_page->index && index == end_index)
1263 return 0;
1264
1265 /* skip the page at the start index */
1266 nrpages = end_index - index + 1;
Chris Masond3977122009-01-05 21:25:51 -05001267 while (nrpages > 0) {
Chris Masonc8b97812008-10-29 14:49:59 -04001268 ret = find_get_pages_contig(inode->i_mapping, index,
Chris Mason5b050f02008-11-11 09:34:41 -05001269 min_t(unsigned long,
1270 nrpages, ARRAY_SIZE(pages)), pages);
Chris Masonc8b97812008-10-29 14:49:59 -04001271 if (ret == 0) {
1272 ret = -EAGAIN;
1273 goto done;
1274 }
1275 /* now we have an array of pages, lock them all */
1276 for (i = 0; i < ret; i++) {
1277 /*
1278 * the caller is taking responsibility for
1279 * locked_page
1280 */
Chris Mason771ed682008-11-06 22:02:51 -05001281 if (pages[i] != locked_page) {
Chris Masonc8b97812008-10-29 14:49:59 -04001282 lock_page(pages[i]);
Chris Masonf2b1c412008-11-10 07:31:30 -05001283 if (!PageDirty(pages[i]) ||
1284 pages[i]->mapping != inode->i_mapping) {
Chris Mason771ed682008-11-06 22:02:51 -05001285 ret = -EAGAIN;
1286 unlock_page(pages[i]);
1287 page_cache_release(pages[i]);
1288 goto done;
1289 }
1290 }
Chris Masonc8b97812008-10-29 14:49:59 -04001291 page_cache_release(pages[i]);
Chris Mason771ed682008-11-06 22:02:51 -05001292 pages_locked++;
Chris Masonc8b97812008-10-29 14:49:59 -04001293 }
Chris Masonc8b97812008-10-29 14:49:59 -04001294 nrpages -= ret;
1295 index += ret;
1296 cond_resched();
1297 }
1298 ret = 0;
1299done:
1300 if (ret && pages_locked) {
1301 __unlock_for_delalloc(inode, locked_page,
1302 delalloc_start,
1303 ((u64)(start_index + pages_locked - 1)) <<
1304 PAGE_CACHE_SHIFT);
1305 }
1306 return ret;
1307}
1308
1309/*
1310 * find a contiguous range of bytes in the file marked as delalloc, not
1311 * more than 'max_bytes'. start and end are used to return the range,
1312 *
1313 * 1 is returned if we find something, 0 if nothing was in the tree
1314 */
1315static noinline u64 find_lock_delalloc_range(struct inode *inode,
1316 struct extent_io_tree *tree,
1317 struct page *locked_page,
1318 u64 *start, u64 *end,
1319 u64 max_bytes)
1320{
1321 u64 delalloc_start;
1322 u64 delalloc_end;
1323 u64 found;
Chris Mason9655d292009-09-02 15:22:30 -04001324 struct extent_state *cached_state = NULL;
Chris Masonc8b97812008-10-29 14:49:59 -04001325 int ret;
1326 int loops = 0;
1327
1328again:
1329 /* step one, find a bunch of delalloc bytes starting at start */
1330 delalloc_start = *start;
1331 delalloc_end = 0;
1332 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
Josef Bacikc2a128d2010-02-02 21:19:11 +00001333 max_bytes, &cached_state);
Chris Mason70b99e62008-10-31 12:46:39 -04001334 if (!found || delalloc_end <= *start) {
Chris Masonc8b97812008-10-29 14:49:59 -04001335 *start = delalloc_start;
1336 *end = delalloc_end;
Josef Bacikc2a128d2010-02-02 21:19:11 +00001337 free_extent_state(cached_state);
Chris Masonc8b97812008-10-29 14:49:59 -04001338 return found;
1339 }
1340
1341 /*
Chris Mason70b99e62008-10-31 12:46:39 -04001342 * start comes from the offset of locked_page. We have to lock
1343 * pages in order, so we can't process delalloc bytes before
1344 * locked_page
1345 */
Chris Masond3977122009-01-05 21:25:51 -05001346 if (delalloc_start < *start)
Chris Mason70b99e62008-10-31 12:46:39 -04001347 delalloc_start = *start;
Chris Mason70b99e62008-10-31 12:46:39 -04001348
1349 /*
Chris Masonc8b97812008-10-29 14:49:59 -04001350 * make sure to limit the number of pages we try to lock down
1351 * if we're looping.
1352 */
Chris Masond3977122009-01-05 21:25:51 -05001353 if (delalloc_end + 1 - delalloc_start > max_bytes && loops)
Chris Mason771ed682008-11-06 22:02:51 -05001354 delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
Chris Masond3977122009-01-05 21:25:51 -05001355
Chris Masonc8b97812008-10-29 14:49:59 -04001356 /* step two, lock all the pages after the page that has start */
1357 ret = lock_delalloc_pages(inode, locked_page,
1358 delalloc_start, delalloc_end);
1359 if (ret == -EAGAIN) {
1360 /* some of the pages are gone, lets avoid looping by
1361 * shortening the size of the delalloc range we're searching
1362 */
Chris Mason9655d292009-09-02 15:22:30 -04001363 free_extent_state(cached_state);
Chris Masonc8b97812008-10-29 14:49:59 -04001364 if (!loops) {
1365 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
1366 max_bytes = PAGE_CACHE_SIZE - offset;
1367 loops = 1;
1368 goto again;
1369 } else {
1370 found = 0;
1371 goto out_failed;
1372 }
1373 }
1374 BUG_ON(ret);
1375
1376 /* step three, lock the state bits for the whole range */
Chris Mason9655d292009-09-02 15:22:30 -04001377 lock_extent_bits(tree, delalloc_start, delalloc_end,
1378 0, &cached_state, GFP_NOFS);
Chris Masonc8b97812008-10-29 14:49:59 -04001379
1380 /* then test to make sure it is all still delalloc */
1381 ret = test_range_bit(tree, delalloc_start, delalloc_end,
Chris Mason9655d292009-09-02 15:22:30 -04001382 EXTENT_DELALLOC, 1, cached_state);
Chris Masonc8b97812008-10-29 14:49:59 -04001383 if (!ret) {
Chris Mason9655d292009-09-02 15:22:30 -04001384 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1385 &cached_state, GFP_NOFS);
Chris Masonc8b97812008-10-29 14:49:59 -04001386 __unlock_for_delalloc(inode, locked_page,
1387 delalloc_start, delalloc_end);
1388 cond_resched();
1389 goto again;
1390 }
Chris Mason9655d292009-09-02 15:22:30 -04001391 free_extent_state(cached_state);
Chris Masonc8b97812008-10-29 14:49:59 -04001392 *start = delalloc_start;
1393 *end = delalloc_end;
1394out_failed:
1395 return found;
1396}
1397
1398int extent_clear_unlock_delalloc(struct inode *inode,
1399 struct extent_io_tree *tree,
1400 u64 start, u64 end, struct page *locked_page,
Chris Masona791e352009-10-08 11:27:10 -04001401 unsigned long op)
Chris Masonc8b97812008-10-29 14:49:59 -04001402{
1403 int ret;
1404 struct page *pages[16];
1405 unsigned long index = start >> PAGE_CACHE_SHIFT;
1406 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1407 unsigned long nr_pages = end_index - index + 1;
1408 int i;
Chris Mason771ed682008-11-06 22:02:51 -05001409 int clear_bits = 0;
Chris Masonc8b97812008-10-29 14:49:59 -04001410
Chris Masona791e352009-10-08 11:27:10 -04001411 if (op & EXTENT_CLEAR_UNLOCK)
Chris Mason771ed682008-11-06 22:02:51 -05001412 clear_bits |= EXTENT_LOCKED;
Chris Masona791e352009-10-08 11:27:10 -04001413 if (op & EXTENT_CLEAR_DIRTY)
Chris Masonc8b97812008-10-29 14:49:59 -04001414 clear_bits |= EXTENT_DIRTY;
1415
Chris Masona791e352009-10-08 11:27:10 -04001416 if (op & EXTENT_CLEAR_DELALLOC)
Chris Mason771ed682008-11-06 22:02:51 -05001417 clear_bits |= EXTENT_DELALLOC;
1418
Chris Mason2c64c532009-09-02 15:04:12 -04001419 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
Josef Bacik32c00af2009-10-08 13:34:05 -04001420 if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
1421 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
1422 EXTENT_SET_PRIVATE2)))
Chris Mason771ed682008-11-06 22:02:51 -05001423 return 0;
Chris Masonc8b97812008-10-29 14:49:59 -04001424
Chris Masond3977122009-01-05 21:25:51 -05001425 while (nr_pages > 0) {
Chris Masonc8b97812008-10-29 14:49:59 -04001426 ret = find_get_pages_contig(inode->i_mapping, index,
Chris Mason5b050f02008-11-11 09:34:41 -05001427 min_t(unsigned long,
1428 nr_pages, ARRAY_SIZE(pages)), pages);
Chris Masonc8b97812008-10-29 14:49:59 -04001429 for (i = 0; i < ret; i++) {
Chris Mason8b62b722009-09-02 16:53:46 -04001430
Chris Masona791e352009-10-08 11:27:10 -04001431 if (op & EXTENT_SET_PRIVATE2)
Chris Mason8b62b722009-09-02 16:53:46 -04001432 SetPagePrivate2(pages[i]);
1433
Chris Masonc8b97812008-10-29 14:49:59 -04001434 if (pages[i] == locked_page) {
1435 page_cache_release(pages[i]);
1436 continue;
1437 }
Chris Masona791e352009-10-08 11:27:10 -04001438 if (op & EXTENT_CLEAR_DIRTY)
Chris Masonc8b97812008-10-29 14:49:59 -04001439 clear_page_dirty_for_io(pages[i]);
Chris Masona791e352009-10-08 11:27:10 -04001440 if (op & EXTENT_SET_WRITEBACK)
Chris Masonc8b97812008-10-29 14:49:59 -04001441 set_page_writeback(pages[i]);
Chris Masona791e352009-10-08 11:27:10 -04001442 if (op & EXTENT_END_WRITEBACK)
Chris Masonc8b97812008-10-29 14:49:59 -04001443 end_page_writeback(pages[i]);
Chris Masona791e352009-10-08 11:27:10 -04001444 if (op & EXTENT_CLEAR_UNLOCK_PAGE)
Chris Mason771ed682008-11-06 22:02:51 -05001445 unlock_page(pages[i]);
Chris Masonc8b97812008-10-29 14:49:59 -04001446 page_cache_release(pages[i]);
1447 }
1448 nr_pages -= ret;
1449 index += ret;
1450 cond_resched();
1451 }
1452 return 0;
1453}
Chris Masonc8b97812008-10-29 14:49:59 -04001454
Chris Masond352ac62008-09-29 15:18:18 -04001455/*
1456 * count the number of bytes in the tree that have a given bit(s)
1457 * set. This can be fairly slow, except for EXTENT_DIRTY which is
1458 * cached. The total number found is returned.
1459 */
Chris Masond1310b22008-01-24 16:13:08 -05001460u64 count_range_bits(struct extent_io_tree *tree,
1461 u64 *start, u64 search_end, u64 max_bytes,
Chris Masonec29ed52011-02-23 16:23:20 -05001462 unsigned long bits, int contig)
Chris Masond1310b22008-01-24 16:13:08 -05001463{
1464 struct rb_node *node;
1465 struct extent_state *state;
1466 u64 cur_start = *start;
1467 u64 total_bytes = 0;
Chris Masonec29ed52011-02-23 16:23:20 -05001468 u64 last = 0;
Chris Masond1310b22008-01-24 16:13:08 -05001469 int found = 0;
1470
1471 if (search_end <= cur_start) {
Chris Masond1310b22008-01-24 16:13:08 -05001472 WARN_ON(1);
1473 return 0;
1474 }
1475
Chris Masoncad321a2008-12-17 14:51:42 -05001476 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001477 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1478 total_bytes = tree->dirty_bytes;
1479 goto out;
1480 }
1481 /*
1482 * this search will find all the extents that end after
1483 * our range starts.
1484 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001485 node = tree_search(tree, cur_start);
Chris Masond3977122009-01-05 21:25:51 -05001486 if (!node)
Chris Masond1310b22008-01-24 16:13:08 -05001487 goto out;
Chris Masond1310b22008-01-24 16:13:08 -05001488
Chris Masond3977122009-01-05 21:25:51 -05001489 while (1) {
Chris Masond1310b22008-01-24 16:13:08 -05001490 state = rb_entry(node, struct extent_state, rb_node);
1491 if (state->start > search_end)
1492 break;
Chris Masonec29ed52011-02-23 16:23:20 -05001493 if (contig && found && state->start > last + 1)
1494 break;
1495 if (state->end >= cur_start && (state->state & bits) == bits) {
Chris Masond1310b22008-01-24 16:13:08 -05001496 total_bytes += min(search_end, state->end) + 1 -
1497 max(cur_start, state->start);
1498 if (total_bytes >= max_bytes)
1499 break;
1500 if (!found) {
1501 *start = state->start;
1502 found = 1;
1503 }
Chris Masonec29ed52011-02-23 16:23:20 -05001504 last = state->end;
1505 } else if (contig && found) {
1506 break;
Chris Masond1310b22008-01-24 16:13:08 -05001507 }
1508 node = rb_next(node);
1509 if (!node)
1510 break;
1511 }
1512out:
Chris Masoncad321a2008-12-17 14:51:42 -05001513 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001514 return total_bytes;
1515}
Christoph Hellwigb2950862008-12-02 09:54:17 -05001516
Chris Masond352ac62008-09-29 15:18:18 -04001517/*
1518 * set the private field for a given byte offset in the tree. If there isn't
1519 * an extent_state there already, this does nothing.
1520 */
Chris Masond1310b22008-01-24 16:13:08 -05001521int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1522{
1523 struct rb_node *node;
1524 struct extent_state *state;
1525 int ret = 0;
1526
Chris Masoncad321a2008-12-17 14:51:42 -05001527 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001528 /*
1529 * this search will find all the extents that end after
1530 * our range starts.
1531 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001532 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -04001533 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -05001534 ret = -ENOENT;
1535 goto out;
1536 }
1537 state = rb_entry(node, struct extent_state, rb_node);
1538 if (state->start != start) {
1539 ret = -ENOENT;
1540 goto out;
1541 }
1542 state->private = private;
1543out:
Chris Masoncad321a2008-12-17 14:51:42 -05001544 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001545 return ret;
1546}
1547
1548int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1549{
1550 struct rb_node *node;
1551 struct extent_state *state;
1552 int ret = 0;
1553
Chris Masoncad321a2008-12-17 14:51:42 -05001554 spin_lock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001555 /*
1556 * this search will find all the extents that end after
1557 * our range starts.
1558 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001559 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -04001560 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -05001561 ret = -ENOENT;
1562 goto out;
1563 }
1564 state = rb_entry(node, struct extent_state, rb_node);
1565 if (state->start != start) {
1566 ret = -ENOENT;
1567 goto out;
1568 }
1569 *private = state->private;
1570out:
Chris Masoncad321a2008-12-17 14:51:42 -05001571 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001572 return ret;
1573}
1574
1575/*
1576 * searches a range in the state tree for a given mask.
Chris Mason70dec802008-01-29 09:59:12 -05001577 * If 'filled' == 1, this returns 1 only if every extent in the tree
Chris Masond1310b22008-01-24 16:13:08 -05001578 * has the bits set. Otherwise, 1 is returned if any bit in the
1579 * range is found set.
1580 */
1581int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
Chris Mason9655d292009-09-02 15:22:30 -04001582 int bits, int filled, struct extent_state *cached)
Chris Masond1310b22008-01-24 16:13:08 -05001583{
1584 struct extent_state *state = NULL;
1585 struct rb_node *node;
1586 int bitset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05001587
Chris Masoncad321a2008-12-17 14:51:42 -05001588 spin_lock(&tree->lock);
Chris Mason9655d292009-09-02 15:22:30 -04001589 if (cached && cached->tree && cached->start == start)
1590 node = &cached->rb_node;
1591 else
1592 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -05001593 while (node && start <= end) {
1594 state = rb_entry(node, struct extent_state, rb_node);
1595
1596 if (filled && state->start > start) {
1597 bitset = 0;
1598 break;
1599 }
1600
1601 if (state->start > end)
1602 break;
1603
1604 if (state->state & bits) {
1605 bitset = 1;
1606 if (!filled)
1607 break;
1608 } else if (filled) {
1609 bitset = 0;
1610 break;
1611 }
Chris Mason46562cec2009-09-23 20:23:16 -04001612
1613 if (state->end == (u64)-1)
1614 break;
1615
Chris Masond1310b22008-01-24 16:13:08 -05001616 start = state->end + 1;
1617 if (start > end)
1618 break;
1619 node = rb_next(node);
1620 if (!node) {
1621 if (filled)
1622 bitset = 0;
1623 break;
1624 }
1625 }
Chris Masoncad321a2008-12-17 14:51:42 -05001626 spin_unlock(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001627 return bitset;
1628}
Chris Masond1310b22008-01-24 16:13:08 -05001629
1630/*
1631 * helper function to set a given page up to date if all the
1632 * extents in the tree for that page are up to date
1633 */
1634static int check_page_uptodate(struct extent_io_tree *tree,
1635 struct page *page)
1636{
1637 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1638 u64 end = start + PAGE_CACHE_SIZE - 1;
Chris Mason9655d292009-09-02 15:22:30 -04001639 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
Chris Masond1310b22008-01-24 16:13:08 -05001640 SetPageUptodate(page);
1641 return 0;
1642}
1643
1644/*
1645 * helper function to unlock a page if all the extents in the tree
1646 * for that page are unlocked
1647 */
1648static int check_page_locked(struct extent_io_tree *tree,
1649 struct page *page)
1650{
1651 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1652 u64 end = start + PAGE_CACHE_SIZE - 1;
Chris Mason9655d292009-09-02 15:22:30 -04001653 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
Chris Masond1310b22008-01-24 16:13:08 -05001654 unlock_page(page);
1655 return 0;
1656}
1657
1658/*
1659 * helper function to end page writeback if all the extents
1660 * in the tree for that page are done with writeback
1661 */
1662static int check_page_writeback(struct extent_io_tree *tree,
1663 struct page *page)
1664{
Chris Mason1edbb732009-09-02 13:24:36 -04001665 end_page_writeback(page);
Chris Masond1310b22008-01-24 16:13:08 -05001666 return 0;
1667}
1668
1669/* lots and lots of room for performance fixes in the end_bio funcs */
1670
1671/*
1672 * after a writepage IO is done, we need to:
1673 * clear the uptodate bits on error
1674 * clear the writeback bits in the extent tree for this IO
1675 * end_page_writeback if the page has no more pending IO
1676 *
1677 * Scheduling is not allowed, so the extent state tree is expected
1678 * to have one and only one object corresponding to this IO.
1679 */
Chris Masond1310b22008-01-24 16:13:08 -05001680static void end_bio_extent_writepage(struct bio *bio, int err)
Chris Masond1310b22008-01-24 16:13:08 -05001681{
Chris Mason1259ab72008-05-12 13:39:03 -04001682 int uptodate = err == 0;
Chris Masond1310b22008-01-24 16:13:08 -05001683 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
David Woodhouse902b22f2008-08-20 08:51:49 -04001684 struct extent_io_tree *tree;
Chris Masond1310b22008-01-24 16:13:08 -05001685 u64 start;
1686 u64 end;
1687 int whole_page;
Chris Mason1259ab72008-05-12 13:39:03 -04001688 int ret;
Chris Masond1310b22008-01-24 16:13:08 -05001689
Chris Masond1310b22008-01-24 16:13:08 -05001690 do {
1691 struct page *page = bvec->bv_page;
David Woodhouse902b22f2008-08-20 08:51:49 -04001692 tree = &BTRFS_I(page->mapping->host)->io_tree;
1693
Chris Masond1310b22008-01-24 16:13:08 -05001694 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1695 bvec->bv_offset;
1696 end = start + bvec->bv_len - 1;
1697
1698 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1699 whole_page = 1;
1700 else
1701 whole_page = 0;
1702
1703 if (--bvec >= bio->bi_io_vec)
1704 prefetchw(&bvec->bv_page->flags);
Chris Mason1259ab72008-05-12 13:39:03 -04001705 if (tree->ops && tree->ops->writepage_end_io_hook) {
1706 ret = tree->ops->writepage_end_io_hook(page, start,
David Woodhouse902b22f2008-08-20 08:51:49 -04001707 end, NULL, uptodate);
Chris Mason1259ab72008-05-12 13:39:03 -04001708 if (ret)
1709 uptodate = 0;
1710 }
1711
1712 if (!uptodate && tree->ops &&
1713 tree->ops->writepage_io_failed_hook) {
1714 ret = tree->ops->writepage_io_failed_hook(bio, page,
David Woodhouse902b22f2008-08-20 08:51:49 -04001715 start, end, NULL);
Chris Mason1259ab72008-05-12 13:39:03 -04001716 if (ret == 0) {
Chris Mason1259ab72008-05-12 13:39:03 -04001717 uptodate = (err == 0);
1718 continue;
1719 }
1720 }
1721
Chris Masond1310b22008-01-24 16:13:08 -05001722 if (!uptodate) {
Josef Bacik2ac55d42010-02-03 19:33:23 +00001723 clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05001724 ClearPageUptodate(page);
1725 SetPageError(page);
1726 }
Chris Mason70dec802008-01-29 09:59:12 -05001727
Chris Masond1310b22008-01-24 16:13:08 -05001728 if (whole_page)
1729 end_page_writeback(page);
1730 else
1731 check_page_writeback(tree, page);
Chris Masond1310b22008-01-24 16:13:08 -05001732 } while (bvec >= bio->bi_io_vec);
Chris Mason2b1f55b2008-09-24 11:48:04 -04001733
Chris Masond1310b22008-01-24 16:13:08 -05001734 bio_put(bio);
Chris Masond1310b22008-01-24 16:13:08 -05001735}
1736
1737/*
1738 * after a readpage IO is done, we need to:
1739 * clear the uptodate bits on error
1740 * set the uptodate bits if things worked
1741 * set the page up to date if all extents in the tree are uptodate
1742 * clear the lock bit in the extent tree
1743 * unlock the page if there are no other extents locked for it
1744 *
1745 * Scheduling is not allowed, so the extent state tree is expected
1746 * to have one and only one object corresponding to this IO.
1747 */
Chris Masond1310b22008-01-24 16:13:08 -05001748static void end_bio_extent_readpage(struct bio *bio, int err)
Chris Masond1310b22008-01-24 16:13:08 -05001749{
1750 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
Chris Mason4125bf72010-02-03 18:18:45 +00001751 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
1752 struct bio_vec *bvec = bio->bi_io_vec;
David Woodhouse902b22f2008-08-20 08:51:49 -04001753 struct extent_io_tree *tree;
Chris Masond1310b22008-01-24 16:13:08 -05001754 u64 start;
1755 u64 end;
1756 int whole_page;
1757 int ret;
1758
Chris Masond20f7042008-12-08 16:58:54 -05001759 if (err)
1760 uptodate = 0;
1761
Chris Masond1310b22008-01-24 16:13:08 -05001762 do {
1763 struct page *page = bvec->bv_page;
Arne Jansen507903b2011-04-06 10:02:20 +00001764 struct extent_state *cached = NULL;
1765 struct extent_state *state;
1766
David Woodhouse902b22f2008-08-20 08:51:49 -04001767 tree = &BTRFS_I(page->mapping->host)->io_tree;
1768
Chris Masond1310b22008-01-24 16:13:08 -05001769 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1770 bvec->bv_offset;
1771 end = start + bvec->bv_len - 1;
1772
1773 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1774 whole_page = 1;
1775 else
1776 whole_page = 0;
1777
Chris Mason4125bf72010-02-03 18:18:45 +00001778 if (++bvec <= bvec_end)
Chris Masond1310b22008-01-24 16:13:08 -05001779 prefetchw(&bvec->bv_page->flags);
1780
Arne Jansen507903b2011-04-06 10:02:20 +00001781 spin_lock(&tree->lock);
Chris Mason0d399202011-04-16 06:55:39 -04001782 state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
Chris Mason109b36a2011-04-12 13:57:39 -04001783 if (state && state->start == start) {
Arne Jansen507903b2011-04-06 10:02:20 +00001784 /*
1785 * take a reference on the state, unlock will drop
1786 * the ref
1787 */
1788 cache_state(state, &cached);
1789 }
1790 spin_unlock(&tree->lock);
1791
Chris Masond1310b22008-01-24 16:13:08 -05001792 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
Chris Mason70dec802008-01-29 09:59:12 -05001793 ret = tree->ops->readpage_end_io_hook(page, start, end,
Arne Jansen507903b2011-04-06 10:02:20 +00001794 state);
Chris Masond1310b22008-01-24 16:13:08 -05001795 if (ret)
1796 uptodate = 0;
1797 }
Chris Mason7e383262008-04-09 16:28:12 -04001798 if (!uptodate && tree->ops &&
1799 tree->ops->readpage_io_failed_hook) {
1800 ret = tree->ops->readpage_io_failed_hook(bio, page,
David Woodhouse902b22f2008-08-20 08:51:49 -04001801 start, end, NULL);
Chris Mason7e383262008-04-09 16:28:12 -04001802 if (ret == 0) {
Chris Mason3b951512008-04-17 11:29:12 -04001803 uptodate =
1804 test_bit(BIO_UPTODATE, &bio->bi_flags);
Chris Masond20f7042008-12-08 16:58:54 -05001805 if (err)
1806 uptodate = 0;
Arne Jansen507903b2011-04-06 10:02:20 +00001807 uncache_state(&cached);
Chris Mason7e383262008-04-09 16:28:12 -04001808 continue;
1809 }
1810 }
Chris Mason70dec802008-01-29 09:59:12 -05001811
Chris Mason771ed682008-11-06 22:02:51 -05001812 if (uptodate) {
Arne Jansen507903b2011-04-06 10:02:20 +00001813 set_extent_uptodate(tree, start, end, &cached,
David Woodhouse902b22f2008-08-20 08:51:49 -04001814 GFP_ATOMIC);
Chris Mason771ed682008-11-06 22:02:51 -05001815 }
Arne Jansen507903b2011-04-06 10:02:20 +00001816 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -05001817
Chris Mason70dec802008-01-29 09:59:12 -05001818 if (whole_page) {
1819 if (uptodate) {
1820 SetPageUptodate(page);
1821 } else {
1822 ClearPageUptodate(page);
1823 SetPageError(page);
1824 }
Chris Masond1310b22008-01-24 16:13:08 -05001825 unlock_page(page);
Chris Mason70dec802008-01-29 09:59:12 -05001826 } else {
1827 if (uptodate) {
1828 check_page_uptodate(tree, page);
1829 } else {
1830 ClearPageUptodate(page);
1831 SetPageError(page);
1832 }
Chris Masond1310b22008-01-24 16:13:08 -05001833 check_page_locked(tree, page);
Chris Mason70dec802008-01-29 09:59:12 -05001834 }
Chris Mason4125bf72010-02-03 18:18:45 +00001835 } while (bvec <= bvec_end);
Chris Masond1310b22008-01-24 16:13:08 -05001836
1837 bio_put(bio);
Chris Masond1310b22008-01-24 16:13:08 -05001838}
1839
1840/*
1841 * IO done from prepare_write is pretty simple, we just unlock
1842 * the structs in the extent tree when done, and set the uptodate bits
1843 * as appropriate.
1844 */
Chris Masond1310b22008-01-24 16:13:08 -05001845static void end_bio_extent_preparewrite(struct bio *bio, int err)
Chris Masond1310b22008-01-24 16:13:08 -05001846{
1847 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1848 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
David Woodhouse902b22f2008-08-20 08:51:49 -04001849 struct extent_io_tree *tree;
Chris Masond1310b22008-01-24 16:13:08 -05001850 u64 start;
1851 u64 end;
1852
Chris Masond1310b22008-01-24 16:13:08 -05001853 do {
1854 struct page *page = bvec->bv_page;
Arne Jansen507903b2011-04-06 10:02:20 +00001855 struct extent_state *cached = NULL;
David Woodhouse902b22f2008-08-20 08:51:49 -04001856 tree = &BTRFS_I(page->mapping->host)->io_tree;
1857
Chris Masond1310b22008-01-24 16:13:08 -05001858 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1859 bvec->bv_offset;
1860 end = start + bvec->bv_len - 1;
1861
1862 if (--bvec >= bio->bi_io_vec)
1863 prefetchw(&bvec->bv_page->flags);
1864
1865 if (uptodate) {
Arne Jansen507903b2011-04-06 10:02:20 +00001866 set_extent_uptodate(tree, start, end, &cached,
1867 GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -05001868 } else {
1869 ClearPageUptodate(page);
1870 SetPageError(page);
1871 }
1872
Arne Jansen507903b2011-04-06 10:02:20 +00001873 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -05001874
1875 } while (bvec >= bio->bi_io_vec);
1876
1877 bio_put(bio);
Chris Masond1310b22008-01-24 16:13:08 -05001878}
1879
Miao Xie88f794e2010-11-22 03:02:55 +00001880struct bio *
1881btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1882 gfp_t gfp_flags)
Chris Masond1310b22008-01-24 16:13:08 -05001883{
1884 struct bio *bio;
1885
1886 bio = bio_alloc(gfp_flags, nr_vecs);
1887
1888 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
1889 while (!bio && (nr_vecs /= 2))
1890 bio = bio_alloc(gfp_flags, nr_vecs);
1891 }
1892
1893 if (bio) {
Chris Masone1c4b742008-04-22 13:26:46 -04001894 bio->bi_size = 0;
Chris Masond1310b22008-01-24 16:13:08 -05001895 bio->bi_bdev = bdev;
1896 bio->bi_sector = first_sector;
1897 }
1898 return bio;
1899}
1900
Chris Masonc8b97812008-10-29 14:49:59 -04001901static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
1902 unsigned long bio_flags)
Chris Masond1310b22008-01-24 16:13:08 -05001903{
Chris Masond1310b22008-01-24 16:13:08 -05001904 int ret = 0;
Chris Mason70dec802008-01-29 09:59:12 -05001905 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1906 struct page *page = bvec->bv_page;
1907 struct extent_io_tree *tree = bio->bi_private;
Chris Mason70dec802008-01-29 09:59:12 -05001908 u64 start;
Chris Mason70dec802008-01-29 09:59:12 -05001909
1910 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
Chris Mason70dec802008-01-29 09:59:12 -05001911
David Woodhouse902b22f2008-08-20 08:51:49 -04001912 bio->bi_private = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05001913
1914 bio_get(bio);
1915
Chris Mason065631f2008-02-20 12:07:25 -05001916 if (tree->ops && tree->ops->submit_bio_hook)
liubo6b82ce82011-01-26 06:21:39 +00001917 ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
Chris Masoneaf25d92010-05-25 09:48:28 -04001918 mirror_num, bio_flags, start);
Chris Mason0b86a832008-03-24 15:01:56 -04001919 else
1920 submit_bio(rw, bio);
Chris Masond1310b22008-01-24 16:13:08 -05001921 if (bio_flagged(bio, BIO_EOPNOTSUPP))
1922 ret = -EOPNOTSUPP;
1923 bio_put(bio);
1924 return ret;
1925}
1926
1927static int submit_extent_page(int rw, struct extent_io_tree *tree,
1928 struct page *page, sector_t sector,
1929 size_t size, unsigned long offset,
1930 struct block_device *bdev,
1931 struct bio **bio_ret,
1932 unsigned long max_pages,
Chris Masonf1885912008-04-09 16:28:12 -04001933 bio_end_io_t end_io_func,
Chris Masonc8b97812008-10-29 14:49:59 -04001934 int mirror_num,
1935 unsigned long prev_bio_flags,
1936 unsigned long bio_flags)
Chris Masond1310b22008-01-24 16:13:08 -05001937{
1938 int ret = 0;
1939 struct bio *bio;
1940 int nr;
Chris Masonc8b97812008-10-29 14:49:59 -04001941 int contig = 0;
1942 int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
1943 int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
Chris Mason5b050f02008-11-11 09:34:41 -05001944 size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
Chris Masond1310b22008-01-24 16:13:08 -05001945
1946 if (bio_ret && *bio_ret) {
1947 bio = *bio_ret;
Chris Masonc8b97812008-10-29 14:49:59 -04001948 if (old_compressed)
1949 contig = bio->bi_sector == sector;
1950 else
1951 contig = bio->bi_sector + (bio->bi_size >> 9) ==
1952 sector;
1953
1954 if (prev_bio_flags != bio_flags || !contig ||
Chris Mason239b14b2008-03-24 15:02:07 -04001955 (tree->ops && tree->ops->merge_bio_hook &&
Chris Masonc8b97812008-10-29 14:49:59 -04001956 tree->ops->merge_bio_hook(page, offset, page_size, bio,
1957 bio_flags)) ||
1958 bio_add_page(bio, page, page_size, offset) < page_size) {
1959 ret = submit_one_bio(rw, bio, mirror_num,
1960 prev_bio_flags);
Chris Masond1310b22008-01-24 16:13:08 -05001961 bio = NULL;
1962 } else {
1963 return 0;
1964 }
1965 }
Chris Masonc8b97812008-10-29 14:49:59 -04001966 if (this_compressed)
1967 nr = BIO_MAX_PAGES;
1968 else
1969 nr = bio_get_nr_vecs(bdev);
1970
Miao Xie88f794e2010-11-22 03:02:55 +00001971 bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
Tsutomu Itoh5df67082011-02-01 09:17:35 +00001972 if (!bio)
1973 return -ENOMEM;
Chris Mason70dec802008-01-29 09:59:12 -05001974
Chris Masonc8b97812008-10-29 14:49:59 -04001975 bio_add_page(bio, page, page_size, offset);
Chris Masond1310b22008-01-24 16:13:08 -05001976 bio->bi_end_io = end_io_func;
1977 bio->bi_private = tree;
Chris Mason70dec802008-01-29 09:59:12 -05001978
Chris Masond3977122009-01-05 21:25:51 -05001979 if (bio_ret)
Chris Masond1310b22008-01-24 16:13:08 -05001980 *bio_ret = bio;
Chris Masond3977122009-01-05 21:25:51 -05001981 else
Chris Masonc8b97812008-10-29 14:49:59 -04001982 ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
Chris Masond1310b22008-01-24 16:13:08 -05001983
1984 return ret;
1985}
1986
1987void set_page_extent_mapped(struct page *page)
1988{
1989 if (!PagePrivate(page)) {
1990 SetPagePrivate(page);
Chris Masond1310b22008-01-24 16:13:08 -05001991 page_cache_get(page);
Chris Mason6af118ce2008-07-22 11:18:07 -04001992 set_page_private(page, EXTENT_PAGE_PRIVATE);
Chris Masond1310b22008-01-24 16:13:08 -05001993 }
1994}
1995
Christoph Hellwigb2950862008-12-02 09:54:17 -05001996static void set_page_extent_head(struct page *page, unsigned long len)
Chris Masond1310b22008-01-24 16:13:08 -05001997{
Chris Masoneb14ab82011-02-10 12:35:00 -05001998 WARN_ON(!PagePrivate(page));
Chris Masond1310b22008-01-24 16:13:08 -05001999 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
2000}
2001
2002/*
2003 * basic readpage implementation. Locked extent state structs are inserted
2004 * into the tree that are removed when the IO is done (by the end_io
2005 * handlers)
2006 */
2007static int __extent_read_full_page(struct extent_io_tree *tree,
2008 struct page *page,
2009 get_extent_t *get_extent,
Chris Masonc8b97812008-10-29 14:49:59 -04002010 struct bio **bio, int mirror_num,
2011 unsigned long *bio_flags)
Chris Masond1310b22008-01-24 16:13:08 -05002012{
2013 struct inode *inode = page->mapping->host;
2014 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2015 u64 page_end = start + PAGE_CACHE_SIZE - 1;
2016 u64 end;
2017 u64 cur = start;
2018 u64 extent_offset;
2019 u64 last_byte = i_size_read(inode);
2020 u64 block_start;
2021 u64 cur_end;
2022 sector_t sector;
2023 struct extent_map *em;
2024 struct block_device *bdev;
Josef Bacik11c65dc2010-05-23 11:07:21 -04002025 struct btrfs_ordered_extent *ordered;
Chris Masond1310b22008-01-24 16:13:08 -05002026 int ret;
2027 int nr = 0;
2028 size_t page_offset = 0;
2029 size_t iosize;
Chris Masonc8b97812008-10-29 14:49:59 -04002030 size_t disk_io_size;
Chris Masond1310b22008-01-24 16:13:08 -05002031 size_t blocksize = inode->i_sb->s_blocksize;
Chris Masonc8b97812008-10-29 14:49:59 -04002032 unsigned long this_bio_flag = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002033
2034 set_page_extent_mapped(page);
2035
2036 end = page_end;
Josef Bacik11c65dc2010-05-23 11:07:21 -04002037 while (1) {
2038 lock_extent(tree, start, end, GFP_NOFS);
2039 ordered = btrfs_lookup_ordered_extent(inode, start);
2040 if (!ordered)
2041 break;
2042 unlock_extent(tree, start, end, GFP_NOFS);
2043 btrfs_start_ordered_extent(inode, ordered, 1);
2044 btrfs_put_ordered_extent(ordered);
2045 }
Chris Masond1310b22008-01-24 16:13:08 -05002046
Chris Masonc8b97812008-10-29 14:49:59 -04002047 if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
2048 char *userpage;
2049 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
2050
2051 if (zero_offset) {
2052 iosize = PAGE_CACHE_SIZE - zero_offset;
2053 userpage = kmap_atomic(page, KM_USER0);
2054 memset(userpage + zero_offset, 0, iosize);
2055 flush_dcache_page(page);
2056 kunmap_atomic(userpage, KM_USER0);
2057 }
2058 }
Chris Masond1310b22008-01-24 16:13:08 -05002059 while (cur <= end) {
2060 if (cur >= last_byte) {
2061 char *userpage;
Arne Jansen507903b2011-04-06 10:02:20 +00002062 struct extent_state *cached = NULL;
2063
Chris Masond1310b22008-01-24 16:13:08 -05002064 iosize = PAGE_CACHE_SIZE - page_offset;
2065 userpage = kmap_atomic(page, KM_USER0);
2066 memset(userpage + page_offset, 0, iosize);
2067 flush_dcache_page(page);
2068 kunmap_atomic(userpage, KM_USER0);
2069 set_extent_uptodate(tree, cur, cur + iosize - 1,
Arne Jansen507903b2011-04-06 10:02:20 +00002070 &cached, GFP_NOFS);
2071 unlock_extent_cached(tree, cur, cur + iosize - 1,
2072 &cached, GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05002073 break;
2074 }
2075 em = get_extent(inode, page, page_offset, cur,
2076 end - cur + 1, 0);
2077 if (IS_ERR(em) || !em) {
2078 SetPageError(page);
2079 unlock_extent(tree, cur, end, GFP_NOFS);
2080 break;
2081 }
Chris Masond1310b22008-01-24 16:13:08 -05002082 extent_offset = cur - em->start;
2083 BUG_ON(extent_map_end(em) <= cur);
2084 BUG_ON(end < cur);
2085
Li Zefan261507a02010-12-17 14:21:50 +08002086 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
Chris Masonc8b97812008-10-29 14:49:59 -04002087 this_bio_flag = EXTENT_BIO_COMPRESSED;
Li Zefan261507a02010-12-17 14:21:50 +08002088 extent_set_compress_type(&this_bio_flag,
2089 em->compress_type);
2090 }
Chris Masonc8b97812008-10-29 14:49:59 -04002091
Chris Masond1310b22008-01-24 16:13:08 -05002092 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2093 cur_end = min(extent_map_end(em) - 1, end);
2094 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
Chris Masonc8b97812008-10-29 14:49:59 -04002095 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
2096 disk_io_size = em->block_len;
2097 sector = em->block_start >> 9;
2098 } else {
2099 sector = (em->block_start + extent_offset) >> 9;
2100 disk_io_size = iosize;
2101 }
Chris Masond1310b22008-01-24 16:13:08 -05002102 bdev = em->bdev;
2103 block_start = em->block_start;
Yan Zhengd899e052008-10-30 14:25:28 -04002104 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
2105 block_start = EXTENT_MAP_HOLE;
Chris Masond1310b22008-01-24 16:13:08 -05002106 free_extent_map(em);
2107 em = NULL;
2108
2109 /* we've found a hole, just zero and go on */
2110 if (block_start == EXTENT_MAP_HOLE) {
2111 char *userpage;
Arne Jansen507903b2011-04-06 10:02:20 +00002112 struct extent_state *cached = NULL;
2113
Chris Masond1310b22008-01-24 16:13:08 -05002114 userpage = kmap_atomic(page, KM_USER0);
2115 memset(userpage + page_offset, 0, iosize);
2116 flush_dcache_page(page);
2117 kunmap_atomic(userpage, KM_USER0);
2118
2119 set_extent_uptodate(tree, cur, cur + iosize - 1,
Arne Jansen507903b2011-04-06 10:02:20 +00002120 &cached, GFP_NOFS);
2121 unlock_extent_cached(tree, cur, cur + iosize - 1,
2122 &cached, GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05002123 cur = cur + iosize;
2124 page_offset += iosize;
2125 continue;
2126 }
2127 /* the get_extent function already copied into the page */
Chris Mason9655d292009-09-02 15:22:30 -04002128 if (test_range_bit(tree, cur, cur_end,
2129 EXTENT_UPTODATE, 1, NULL)) {
Chris Masona1b32a52008-09-05 16:09:51 -04002130 check_page_uptodate(tree, page);
Chris Masond1310b22008-01-24 16:13:08 -05002131 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2132 cur = cur + iosize;
2133 page_offset += iosize;
2134 continue;
2135 }
Chris Mason70dec802008-01-29 09:59:12 -05002136 /* we have an inline extent but it didn't get marked up
2137 * to date. Error out
2138 */
2139 if (block_start == EXTENT_MAP_INLINE) {
2140 SetPageError(page);
2141 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
2142 cur = cur + iosize;
2143 page_offset += iosize;
2144 continue;
2145 }
Chris Masond1310b22008-01-24 16:13:08 -05002146
2147 ret = 0;
2148 if (tree->ops && tree->ops->readpage_io_hook) {
2149 ret = tree->ops->readpage_io_hook(page, cur,
2150 cur + iosize - 1);
2151 }
2152 if (!ret) {
Chris Mason89642222008-07-24 09:41:53 -04002153 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2154 pnr -= page->index;
Chris Masond1310b22008-01-24 16:13:08 -05002155 ret = submit_extent_page(READ, tree, page,
Chris Masonc8b97812008-10-29 14:49:59 -04002156 sector, disk_io_size, page_offset,
Chris Mason89642222008-07-24 09:41:53 -04002157 bdev, bio, pnr,
Chris Masonc8b97812008-10-29 14:49:59 -04002158 end_bio_extent_readpage, mirror_num,
2159 *bio_flags,
2160 this_bio_flag);
Chris Mason89642222008-07-24 09:41:53 -04002161 nr++;
Chris Masonc8b97812008-10-29 14:49:59 -04002162 *bio_flags = this_bio_flag;
Chris Masond1310b22008-01-24 16:13:08 -05002163 }
2164 if (ret)
2165 SetPageError(page);
2166 cur = cur + iosize;
2167 page_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002168 }
2169 if (!nr) {
2170 if (!PageError(page))
2171 SetPageUptodate(page);
2172 unlock_page(page);
2173 }
2174 return 0;
2175}
2176
2177int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2178 get_extent_t *get_extent)
2179{
2180 struct bio *bio = NULL;
Chris Masonc8b97812008-10-29 14:49:59 -04002181 unsigned long bio_flags = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002182 int ret;
2183
Chris Masonc8b97812008-10-29 14:49:59 -04002184 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
2185 &bio_flags);
Chris Masond1310b22008-01-24 16:13:08 -05002186 if (bio)
liubo6b82ce82011-01-26 06:21:39 +00002187 ret = submit_one_bio(READ, bio, 0, bio_flags);
Chris Masond1310b22008-01-24 16:13:08 -05002188 return ret;
2189}
Chris Masond1310b22008-01-24 16:13:08 -05002190
Chris Mason11c83492009-04-20 15:50:09 -04002191static noinline void update_nr_written(struct page *page,
2192 struct writeback_control *wbc,
2193 unsigned long nr_written)
2194{
2195 wbc->nr_to_write -= nr_written;
2196 if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
2197 wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
2198 page->mapping->writeback_index = page->index + nr_written;
2199}
2200
Chris Masond1310b22008-01-24 16:13:08 -05002201/*
2202 * the writepage semantics are similar to regular writepage. extent
2203 * records are inserted to lock ranges in the tree, and as dirty areas
2204 * are found, they are marked writeback. Then the lock bits are removed
2205 * and the end_io handler clears the writeback ranges
2206 */
2207static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2208 void *data)
2209{
2210 struct inode *inode = page->mapping->host;
2211 struct extent_page_data *epd = data;
2212 struct extent_io_tree *tree = epd->tree;
2213 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2214 u64 delalloc_start;
2215 u64 page_end = start + PAGE_CACHE_SIZE - 1;
2216 u64 end;
2217 u64 cur = start;
2218 u64 extent_offset;
2219 u64 last_byte = i_size_read(inode);
2220 u64 block_start;
2221 u64 iosize;
2222 sector_t sector;
Chris Mason2c64c532009-09-02 15:04:12 -04002223 struct extent_state *cached_state = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05002224 struct extent_map *em;
2225 struct block_device *bdev;
2226 int ret;
2227 int nr = 0;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002228 size_t pg_offset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002229 size_t blocksize;
2230 loff_t i_size = i_size_read(inode);
2231 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
2232 u64 nr_delalloc;
2233 u64 delalloc_end;
Chris Masonc8b97812008-10-29 14:49:59 -04002234 int page_started;
2235 int compressed;
Chris Masonffbd5172009-04-20 15:50:09 -04002236 int write_flags;
Chris Mason771ed682008-11-06 22:02:51 -05002237 unsigned long nr_written = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002238
Chris Masonffbd5172009-04-20 15:50:09 -04002239 if (wbc->sync_mode == WB_SYNC_ALL)
Jens Axboe721a9602011-03-09 11:56:30 +01002240 write_flags = WRITE_SYNC;
Chris Masonffbd5172009-04-20 15:50:09 -04002241 else
2242 write_flags = WRITE;
2243
liubo1abe9b82011-03-24 11:18:59 +00002244 trace___extent_writepage(page, inode, wbc);
2245
Chris Masond1310b22008-01-24 16:13:08 -05002246 WARN_ON(!PageLocked(page));
Chris Mason7f3c74f2008-07-18 12:01:11 -04002247 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
Chris Mason211c17f2008-05-15 09:13:45 -04002248 if (page->index > end_index ||
Chris Mason7f3c74f2008-07-18 12:01:11 -04002249 (page->index == end_index && !pg_offset)) {
Chris Mason39be25c2008-11-10 11:50:50 -05002250 page->mapping->a_ops->invalidatepage(page, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002251 unlock_page(page);
2252 return 0;
2253 }
2254
2255 if (page->index == end_index) {
2256 char *userpage;
2257
Chris Masond1310b22008-01-24 16:13:08 -05002258 userpage = kmap_atomic(page, KM_USER0);
Chris Mason7f3c74f2008-07-18 12:01:11 -04002259 memset(userpage + pg_offset, 0,
2260 PAGE_CACHE_SIZE - pg_offset);
Chris Masond1310b22008-01-24 16:13:08 -05002261 kunmap_atomic(userpage, KM_USER0);
Chris Mason211c17f2008-05-15 09:13:45 -04002262 flush_dcache_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002263 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04002264 pg_offset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002265
2266 set_page_extent_mapped(page);
2267
2268 delalloc_start = start;
2269 delalloc_end = 0;
Chris Masonc8b97812008-10-29 14:49:59 -04002270 page_started = 0;
Chris Mason771ed682008-11-06 22:02:51 -05002271 if (!epd->extent_locked) {
Chris Masonf85d7d6c2009-09-18 16:03:16 -04002272 u64 delalloc_to_write = 0;
Chris Mason11c83492009-04-20 15:50:09 -04002273 /*
2274 * make sure the wbc mapping index is at least updated
2275 * to this page.
2276 */
2277 update_nr_written(page, wbc, 0);
2278
Chris Masond3977122009-01-05 21:25:51 -05002279 while (delalloc_end < page_end) {
Chris Mason771ed682008-11-06 22:02:51 -05002280 nr_delalloc = find_lock_delalloc_range(inode, tree,
Chris Masonc8b97812008-10-29 14:49:59 -04002281 page,
2282 &delalloc_start,
Chris Masond1310b22008-01-24 16:13:08 -05002283 &delalloc_end,
2284 128 * 1024 * 1024);
Chris Mason771ed682008-11-06 22:02:51 -05002285 if (nr_delalloc == 0) {
2286 delalloc_start = delalloc_end + 1;
2287 continue;
2288 }
2289 tree->ops->fill_delalloc(inode, page, delalloc_start,
2290 delalloc_end, &page_started,
2291 &nr_written);
Chris Masonf85d7d6c2009-09-18 16:03:16 -04002292 /*
2293 * delalloc_end is already one less than the total
2294 * length, so we don't subtract one from
2295 * PAGE_CACHE_SIZE
2296 */
2297 delalloc_to_write += (delalloc_end - delalloc_start +
2298 PAGE_CACHE_SIZE) >>
2299 PAGE_CACHE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05002300 delalloc_start = delalloc_end + 1;
Chris Masond1310b22008-01-24 16:13:08 -05002301 }
Chris Masonf85d7d6c2009-09-18 16:03:16 -04002302 if (wbc->nr_to_write < delalloc_to_write) {
2303 int thresh = 8192;
2304
2305 if (delalloc_to_write < thresh * 2)
2306 thresh = delalloc_to_write;
2307 wbc->nr_to_write = min_t(u64, delalloc_to_write,
2308 thresh);
2309 }
Chris Masonc8b97812008-10-29 14:49:59 -04002310
Chris Mason771ed682008-11-06 22:02:51 -05002311 /* did the fill delalloc function already unlock and start
2312 * the IO?
2313 */
2314 if (page_started) {
2315 ret = 0;
Chris Mason11c83492009-04-20 15:50:09 -04002316 /*
2317 * we've unlocked the page, so we can't update
2318 * the mapping's writeback index, just update
2319 * nr_to_write.
2320 */
2321 wbc->nr_to_write -= nr_written;
2322 goto done_unlocked;
Chris Mason771ed682008-11-06 22:02:51 -05002323 }
Chris Masonc8b97812008-10-29 14:49:59 -04002324 }
Chris Mason247e7432008-07-17 12:53:51 -04002325 if (tree->ops && tree->ops->writepage_start_hook) {
Chris Masonc8b97812008-10-29 14:49:59 -04002326 ret = tree->ops->writepage_start_hook(page, start,
2327 page_end);
Chris Mason247e7432008-07-17 12:53:51 -04002328 if (ret == -EAGAIN) {
Chris Mason247e7432008-07-17 12:53:51 -04002329 redirty_page_for_writepage(wbc, page);
Chris Mason11c83492009-04-20 15:50:09 -04002330 update_nr_written(page, wbc, nr_written);
Chris Mason247e7432008-07-17 12:53:51 -04002331 unlock_page(page);
Chris Mason771ed682008-11-06 22:02:51 -05002332 ret = 0;
Chris Mason11c83492009-04-20 15:50:09 -04002333 goto done_unlocked;
Chris Mason247e7432008-07-17 12:53:51 -04002334 }
2335 }
2336
Chris Mason11c83492009-04-20 15:50:09 -04002337 /*
2338 * we don't want to touch the inode after unlocking the page,
2339 * so we update the mapping writeback index now
2340 */
2341 update_nr_written(page, wbc, nr_written + 1);
Chris Mason771ed682008-11-06 22:02:51 -05002342
Chris Masond1310b22008-01-24 16:13:08 -05002343 end = page_end;
Chris Masond1310b22008-01-24 16:13:08 -05002344 if (last_byte <= start) {
Chris Masone6dcd2d2008-07-17 12:53:50 -04002345 if (tree->ops && tree->ops->writepage_end_io_hook)
2346 tree->ops->writepage_end_io_hook(page, start,
2347 page_end, NULL, 1);
Chris Masond1310b22008-01-24 16:13:08 -05002348 goto done;
2349 }
2350
Chris Masond1310b22008-01-24 16:13:08 -05002351 blocksize = inode->i_sb->s_blocksize;
2352
2353 while (cur <= end) {
2354 if (cur >= last_byte) {
Chris Masone6dcd2d2008-07-17 12:53:50 -04002355 if (tree->ops && tree->ops->writepage_end_io_hook)
2356 tree->ops->writepage_end_io_hook(page, cur,
2357 page_end, NULL, 1);
Chris Masond1310b22008-01-24 16:13:08 -05002358 break;
2359 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04002360 em = epd->get_extent(inode, page, pg_offset, cur,
Chris Masond1310b22008-01-24 16:13:08 -05002361 end - cur + 1, 1);
2362 if (IS_ERR(em) || !em) {
2363 SetPageError(page);
2364 break;
2365 }
2366
2367 extent_offset = cur - em->start;
2368 BUG_ON(extent_map_end(em) <= cur);
2369 BUG_ON(end < cur);
2370 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2371 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
2372 sector = (em->block_start + extent_offset) >> 9;
2373 bdev = em->bdev;
2374 block_start = em->block_start;
Chris Masonc8b97812008-10-29 14:49:59 -04002375 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
Chris Masond1310b22008-01-24 16:13:08 -05002376 free_extent_map(em);
2377 em = NULL;
2378
Chris Masonc8b97812008-10-29 14:49:59 -04002379 /*
2380 * compressed and inline extents are written through other
2381 * paths in the FS
2382 */
2383 if (compressed || block_start == EXTENT_MAP_HOLE ||
Chris Masond1310b22008-01-24 16:13:08 -05002384 block_start == EXTENT_MAP_INLINE) {
Chris Masonc8b97812008-10-29 14:49:59 -04002385 /*
2386 * end_io notification does not happen here for
2387 * compressed extents
2388 */
2389 if (!compressed && tree->ops &&
2390 tree->ops->writepage_end_io_hook)
Chris Masone6dcd2d2008-07-17 12:53:50 -04002391 tree->ops->writepage_end_io_hook(page, cur,
2392 cur + iosize - 1,
2393 NULL, 1);
Chris Masonc8b97812008-10-29 14:49:59 -04002394 else if (compressed) {
2395 /* we don't want to end_page_writeback on
2396 * a compressed extent. this happens
2397 * elsewhere
2398 */
2399 nr++;
2400 }
2401
2402 cur += iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002403 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002404 continue;
2405 }
Chris Masond1310b22008-01-24 16:13:08 -05002406 /* leave this out until we have a page_mkwrite call */
2407 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
Chris Mason9655d292009-09-02 15:22:30 -04002408 EXTENT_DIRTY, 0, NULL)) {
Chris Masond1310b22008-01-24 16:13:08 -05002409 cur = cur + iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002410 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002411 continue;
2412 }
Chris Masonc8b97812008-10-29 14:49:59 -04002413
Chris Masond1310b22008-01-24 16:13:08 -05002414 if (tree->ops && tree->ops->writepage_io_hook) {
2415 ret = tree->ops->writepage_io_hook(page, cur,
2416 cur + iosize - 1);
2417 } else {
2418 ret = 0;
2419 }
Chris Mason1259ab72008-05-12 13:39:03 -04002420 if (ret) {
Chris Masond1310b22008-01-24 16:13:08 -05002421 SetPageError(page);
Chris Mason1259ab72008-05-12 13:39:03 -04002422 } else {
Chris Masond1310b22008-01-24 16:13:08 -05002423 unsigned long max_nr = end_index + 1;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002424
Chris Masond1310b22008-01-24 16:13:08 -05002425 set_range_writeback(tree, cur, cur + iosize - 1);
2426 if (!PageWriteback(page)) {
Chris Masond3977122009-01-05 21:25:51 -05002427 printk(KERN_ERR "btrfs warning page %lu not "
2428 "writeback, cur %llu end %llu\n",
2429 page->index, (unsigned long long)cur,
Chris Masond1310b22008-01-24 16:13:08 -05002430 (unsigned long long)end);
2431 }
2432
Chris Masonffbd5172009-04-20 15:50:09 -04002433 ret = submit_extent_page(write_flags, tree, page,
2434 sector, iosize, pg_offset,
2435 bdev, &epd->bio, max_nr,
Chris Masonc8b97812008-10-29 14:49:59 -04002436 end_bio_extent_writepage,
2437 0, 0, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002438 if (ret)
2439 SetPageError(page);
2440 }
2441 cur = cur + iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002442 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002443 nr++;
2444 }
2445done:
2446 if (nr == 0) {
2447 /* make sure the mapping tag for page dirty gets cleared */
2448 set_page_writeback(page);
2449 end_page_writeback(page);
2450 }
Chris Masond1310b22008-01-24 16:13:08 -05002451 unlock_page(page);
Chris Mason771ed682008-11-06 22:02:51 -05002452
Chris Mason11c83492009-04-20 15:50:09 -04002453done_unlocked:
2454
Chris Mason2c64c532009-09-02 15:04:12 -04002455 /* drop our reference on any cached states */
2456 free_extent_state(cached_state);
Chris Masond1310b22008-01-24 16:13:08 -05002457 return 0;
2458}
2459
Chris Masond1310b22008-01-24 16:13:08 -05002460/**
Chris Mason4bef0842008-09-08 11:18:08 -04002461 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
Chris Masond1310b22008-01-24 16:13:08 -05002462 * @mapping: address space structure to write
2463 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
2464 * @writepage: function called for each page
2465 * @data: data passed to writepage function
2466 *
2467 * If a page is already under I/O, write_cache_pages() skips it, even
2468 * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
2469 * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
2470 * and msync() need to guarantee that all the data which was dirty at the time
2471 * the call was made get new I/O started against them. If wbc->sync_mode is
2472 * WB_SYNC_ALL then we were called for data integrity and we must wait for
2473 * existing IO to complete.
2474 */
Christoph Hellwigb2950862008-12-02 09:54:17 -05002475static int extent_write_cache_pages(struct extent_io_tree *tree,
Chris Mason4bef0842008-09-08 11:18:08 -04002476 struct address_space *mapping,
2477 struct writeback_control *wbc,
Chris Masond2c3f4f2008-11-19 12:44:22 -05002478 writepage_t writepage, void *data,
2479 void (*flush_fn)(void *))
Chris Masond1310b22008-01-24 16:13:08 -05002480{
Chris Masond1310b22008-01-24 16:13:08 -05002481 int ret = 0;
2482 int done = 0;
Chris Masonf85d7d6c2009-09-18 16:03:16 -04002483 int nr_to_write_done = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002484 struct pagevec pvec;
2485 int nr_pages;
2486 pgoff_t index;
2487 pgoff_t end; /* Inclusive */
2488 int scanned = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002489
Chris Masond1310b22008-01-24 16:13:08 -05002490 pagevec_init(&pvec, 0);
2491 if (wbc->range_cyclic) {
2492 index = mapping->writeback_index; /* Start from prev offset */
2493 end = -1;
2494 } else {
2495 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2496 end = wbc->range_end >> PAGE_CACHE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05002497 scanned = 1;
2498 }
2499retry:
Chris Masonf85d7d6c2009-09-18 16:03:16 -04002500 while (!done && !nr_to_write_done && (index <= end) &&
Chris Masond1310b22008-01-24 16:13:08 -05002501 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
Chris Masond3977122009-01-05 21:25:51 -05002502 PAGECACHE_TAG_DIRTY, min(end - index,
2503 (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
Chris Masond1310b22008-01-24 16:13:08 -05002504 unsigned i;
2505
2506 scanned = 1;
2507 for (i = 0; i < nr_pages; i++) {
2508 struct page *page = pvec.pages[i];
2509
2510 /*
2511 * At this point we hold neither mapping->tree_lock nor
2512 * lock on the page itself: the page may be truncated or
2513 * invalidated (changing page->mapping to NULL), or even
2514 * swizzled back from swapper_space to tmpfs file
2515 * mapping
2516 */
Chris Mason4bef0842008-09-08 11:18:08 -04002517 if (tree->ops && tree->ops->write_cache_pages_lock_hook)
2518 tree->ops->write_cache_pages_lock_hook(page);
2519 else
2520 lock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002521
2522 if (unlikely(page->mapping != mapping)) {
2523 unlock_page(page);
2524 continue;
2525 }
2526
2527 if (!wbc->range_cyclic && page->index > end) {
2528 done = 1;
2529 unlock_page(page);
2530 continue;
2531 }
2532
Chris Masond2c3f4f2008-11-19 12:44:22 -05002533 if (wbc->sync_mode != WB_SYNC_NONE) {
Chris Mason0e6bd952008-11-20 10:46:35 -05002534 if (PageWriteback(page))
2535 flush_fn(data);
Chris Masond1310b22008-01-24 16:13:08 -05002536 wait_on_page_writeback(page);
Chris Masond2c3f4f2008-11-19 12:44:22 -05002537 }
Chris Masond1310b22008-01-24 16:13:08 -05002538
2539 if (PageWriteback(page) ||
2540 !clear_page_dirty_for_io(page)) {
2541 unlock_page(page);
2542 continue;
2543 }
2544
2545 ret = (*writepage)(page, wbc, data);
2546
2547 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
2548 unlock_page(page);
2549 ret = 0;
2550 }
Chris Masonf85d7d6c2009-09-18 16:03:16 -04002551 if (ret)
Chris Masond1310b22008-01-24 16:13:08 -05002552 done = 1;
Chris Masonf85d7d6c2009-09-18 16:03:16 -04002553
2554 /*
2555 * the filesystem may choose to bump up nr_to_write.
2556 * We have to make sure to honor the new nr_to_write
2557 * at any time
2558 */
2559 nr_to_write_done = wbc->nr_to_write <= 0;
Chris Masond1310b22008-01-24 16:13:08 -05002560 }
2561 pagevec_release(&pvec);
2562 cond_resched();
2563 }
2564 if (!scanned && !done) {
2565 /*
2566 * We hit the last page and there is more work to be done: wrap
2567 * back to the start of the file
2568 */
2569 scanned = 1;
2570 index = 0;
2571 goto retry;
2572 }
Chris Masond1310b22008-01-24 16:13:08 -05002573 return ret;
2574}
Chris Masond1310b22008-01-24 16:13:08 -05002575
Chris Masonffbd5172009-04-20 15:50:09 -04002576static void flush_epd_write_bio(struct extent_page_data *epd)
2577{
2578 if (epd->bio) {
2579 if (epd->sync_io)
2580 submit_one_bio(WRITE_SYNC, epd->bio, 0, 0);
2581 else
2582 submit_one_bio(WRITE, epd->bio, 0, 0);
2583 epd->bio = NULL;
2584 }
2585}
2586
Chris Masond2c3f4f2008-11-19 12:44:22 -05002587static noinline void flush_write_bio(void *data)
2588{
2589 struct extent_page_data *epd = data;
Chris Masonffbd5172009-04-20 15:50:09 -04002590 flush_epd_write_bio(epd);
Chris Masond2c3f4f2008-11-19 12:44:22 -05002591}
2592
Chris Masond1310b22008-01-24 16:13:08 -05002593int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2594 get_extent_t *get_extent,
2595 struct writeback_control *wbc)
2596{
2597 int ret;
2598 struct address_space *mapping = page->mapping;
2599 struct extent_page_data epd = {
2600 .bio = NULL,
2601 .tree = tree,
2602 .get_extent = get_extent,
Chris Mason771ed682008-11-06 22:02:51 -05002603 .extent_locked = 0,
Chris Masonffbd5172009-04-20 15:50:09 -04002604 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
Chris Masond1310b22008-01-24 16:13:08 -05002605 };
2606 struct writeback_control wbc_writepages = {
Chris Masond313d7a2009-04-20 15:50:09 -04002607 .sync_mode = wbc->sync_mode,
Chris Masond1310b22008-01-24 16:13:08 -05002608 .older_than_this = NULL,
2609 .nr_to_write = 64,
2610 .range_start = page_offset(page) + PAGE_CACHE_SIZE,
2611 .range_end = (loff_t)-1,
2612 };
2613
Chris Masond1310b22008-01-24 16:13:08 -05002614 ret = __extent_writepage(page, wbc, &epd);
2615
Chris Mason4bef0842008-09-08 11:18:08 -04002616 extent_write_cache_pages(tree, mapping, &wbc_writepages,
Chris Masond2c3f4f2008-11-19 12:44:22 -05002617 __extent_writepage, &epd, flush_write_bio);
Chris Masonffbd5172009-04-20 15:50:09 -04002618 flush_epd_write_bio(&epd);
Chris Masond1310b22008-01-24 16:13:08 -05002619 return ret;
2620}
Chris Masond1310b22008-01-24 16:13:08 -05002621
Chris Mason771ed682008-11-06 22:02:51 -05002622int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2623 u64 start, u64 end, get_extent_t *get_extent,
2624 int mode)
2625{
2626 int ret = 0;
2627 struct address_space *mapping = inode->i_mapping;
2628 struct page *page;
2629 unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
2630 PAGE_CACHE_SHIFT;
2631
2632 struct extent_page_data epd = {
2633 .bio = NULL,
2634 .tree = tree,
2635 .get_extent = get_extent,
2636 .extent_locked = 1,
Chris Masonffbd5172009-04-20 15:50:09 -04002637 .sync_io = mode == WB_SYNC_ALL,
Chris Mason771ed682008-11-06 22:02:51 -05002638 };
2639 struct writeback_control wbc_writepages = {
Chris Mason771ed682008-11-06 22:02:51 -05002640 .sync_mode = mode,
2641 .older_than_this = NULL,
2642 .nr_to_write = nr_pages * 2,
2643 .range_start = start,
2644 .range_end = end + 1,
2645 };
2646
Chris Masond3977122009-01-05 21:25:51 -05002647 while (start <= end) {
Chris Mason771ed682008-11-06 22:02:51 -05002648 page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
2649 if (clear_page_dirty_for_io(page))
2650 ret = __extent_writepage(page, &wbc_writepages, &epd);
2651 else {
2652 if (tree->ops && tree->ops->writepage_end_io_hook)
2653 tree->ops->writepage_end_io_hook(page, start,
2654 start + PAGE_CACHE_SIZE - 1,
2655 NULL, 1);
2656 unlock_page(page);
2657 }
2658 page_cache_release(page);
2659 start += PAGE_CACHE_SIZE;
2660 }
2661
Chris Masonffbd5172009-04-20 15:50:09 -04002662 flush_epd_write_bio(&epd);
Chris Mason771ed682008-11-06 22:02:51 -05002663 return ret;
2664}
Chris Masond1310b22008-01-24 16:13:08 -05002665
2666int extent_writepages(struct extent_io_tree *tree,
2667 struct address_space *mapping,
2668 get_extent_t *get_extent,
2669 struct writeback_control *wbc)
2670{
2671 int ret = 0;
2672 struct extent_page_data epd = {
2673 .bio = NULL,
2674 .tree = tree,
2675 .get_extent = get_extent,
Chris Mason771ed682008-11-06 22:02:51 -05002676 .extent_locked = 0,
Chris Masonffbd5172009-04-20 15:50:09 -04002677 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
Chris Masond1310b22008-01-24 16:13:08 -05002678 };
2679
Chris Mason4bef0842008-09-08 11:18:08 -04002680 ret = extent_write_cache_pages(tree, mapping, wbc,
Chris Masond2c3f4f2008-11-19 12:44:22 -05002681 __extent_writepage, &epd,
2682 flush_write_bio);
Chris Masonffbd5172009-04-20 15:50:09 -04002683 flush_epd_write_bio(&epd);
Chris Masond1310b22008-01-24 16:13:08 -05002684 return ret;
2685}
Chris Masond1310b22008-01-24 16:13:08 -05002686
2687int extent_readpages(struct extent_io_tree *tree,
2688 struct address_space *mapping,
2689 struct list_head *pages, unsigned nr_pages,
2690 get_extent_t get_extent)
2691{
2692 struct bio *bio = NULL;
2693 unsigned page_idx;
Chris Masonc8b97812008-10-29 14:49:59 -04002694 unsigned long bio_flags = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002695
Chris Masond1310b22008-01-24 16:13:08 -05002696 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
2697 struct page *page = list_entry(pages->prev, struct page, lru);
2698
2699 prefetchw(&page->flags);
2700 list_del(&page->lru);
Nick Piggin28ecb6092010-03-17 13:31:04 +00002701 if (!add_to_page_cache_lru(page, mapping,
Itaru Kitayama43e817a2011-04-25 19:43:51 -04002702 page->index, GFP_NOFS)) {
Chris Masonf1885912008-04-09 16:28:12 -04002703 __extent_read_full_page(tree, page, get_extent,
Chris Masonc8b97812008-10-29 14:49:59 -04002704 &bio, 0, &bio_flags);
Chris Masond1310b22008-01-24 16:13:08 -05002705 }
2706 page_cache_release(page);
2707 }
Chris Masond1310b22008-01-24 16:13:08 -05002708 BUG_ON(!list_empty(pages));
2709 if (bio)
Chris Masonc8b97812008-10-29 14:49:59 -04002710 submit_one_bio(READ, bio, 0, bio_flags);
Chris Masond1310b22008-01-24 16:13:08 -05002711 return 0;
2712}
Chris Masond1310b22008-01-24 16:13:08 -05002713
2714/*
2715 * basic invalidatepage code, this waits on any locked or writeback
2716 * ranges corresponding to the page, and then deletes any extent state
2717 * records from the tree
2718 */
2719int extent_invalidatepage(struct extent_io_tree *tree,
2720 struct page *page, unsigned long offset)
2721{
Josef Bacik2ac55d42010-02-03 19:33:23 +00002722 struct extent_state *cached_state = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05002723 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
2724 u64 end = start + PAGE_CACHE_SIZE - 1;
2725 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
2726
Chris Masond3977122009-01-05 21:25:51 -05002727 start += (offset + blocksize - 1) & ~(blocksize - 1);
Chris Masond1310b22008-01-24 16:13:08 -05002728 if (start > end)
2729 return 0;
2730
Josef Bacik2ac55d42010-02-03 19:33:23 +00002731 lock_extent_bits(tree, start, end, 0, &cached_state, GFP_NOFS);
Chris Mason1edbb732009-09-02 13:24:36 -04002732 wait_on_page_writeback(page);
Chris Masond1310b22008-01-24 16:13:08 -05002733 clear_extent_bit(tree, start, end,
Josef Bacik32c00af2009-10-08 13:34:05 -04002734 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
2735 EXTENT_DO_ACCOUNTING,
Josef Bacik2ac55d42010-02-03 19:33:23 +00002736 1, 1, &cached_state, GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05002737 return 0;
2738}
Chris Masond1310b22008-01-24 16:13:08 -05002739
2740/*
2741 * simple commit_write call, set_range_dirty is used to mark both
2742 * the pages and the extent records as dirty
2743 */
2744int extent_commit_write(struct extent_io_tree *tree,
2745 struct inode *inode, struct page *page,
2746 unsigned from, unsigned to)
2747{
2748 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2749
2750 set_page_extent_mapped(page);
2751 set_page_dirty(page);
2752
2753 if (pos > inode->i_size) {
2754 i_size_write(inode, pos);
2755 mark_inode_dirty(inode);
2756 }
2757 return 0;
2758}
Chris Masond1310b22008-01-24 16:13:08 -05002759
2760int extent_prepare_write(struct extent_io_tree *tree,
2761 struct inode *inode, struct page *page,
2762 unsigned from, unsigned to, get_extent_t *get_extent)
2763{
2764 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2765 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
2766 u64 block_start;
2767 u64 orig_block_start;
2768 u64 block_end;
2769 u64 cur_end;
2770 struct extent_map *em;
2771 unsigned blocksize = 1 << inode->i_blkbits;
2772 size_t page_offset = 0;
2773 size_t block_off_start;
2774 size_t block_off_end;
2775 int err = 0;
2776 int iocount = 0;
2777 int ret = 0;
2778 int isnew;
2779
2780 set_page_extent_mapped(page);
2781
2782 block_start = (page_start + from) & ~((u64)blocksize - 1);
2783 block_end = (page_start + to - 1) | (blocksize - 1);
2784 orig_block_start = block_start;
2785
2786 lock_extent(tree, page_start, page_end, GFP_NOFS);
Chris Masond3977122009-01-05 21:25:51 -05002787 while (block_start <= block_end) {
Chris Masond1310b22008-01-24 16:13:08 -05002788 em = get_extent(inode, page, page_offset, block_start,
2789 block_end - block_start + 1, 1);
Chris Masond3977122009-01-05 21:25:51 -05002790 if (IS_ERR(em) || !em)
Chris Masond1310b22008-01-24 16:13:08 -05002791 goto err;
Chris Masond3977122009-01-05 21:25:51 -05002792
Chris Masond1310b22008-01-24 16:13:08 -05002793 cur_end = min(block_end, extent_map_end(em) - 1);
2794 block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
2795 block_off_end = block_off_start + blocksize;
2796 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
2797
2798 if (!PageUptodate(page) && isnew &&
2799 (block_off_end > to || block_off_start < from)) {
2800 void *kaddr;
2801
2802 kaddr = kmap_atomic(page, KM_USER0);
2803 if (block_off_end > to)
2804 memset(kaddr + to, 0, block_off_end - to);
2805 if (block_off_start < from)
2806 memset(kaddr + block_off_start, 0,
2807 from - block_off_start);
2808 flush_dcache_page(page);
2809 kunmap_atomic(kaddr, KM_USER0);
2810 }
2811 if ((em->block_start != EXTENT_MAP_HOLE &&
2812 em->block_start != EXTENT_MAP_INLINE) &&
2813 !isnew && !PageUptodate(page) &&
2814 (block_off_end > to || block_off_start < from) &&
2815 !test_range_bit(tree, block_start, cur_end,
Chris Mason9655d292009-09-02 15:22:30 -04002816 EXTENT_UPTODATE, 1, NULL)) {
Chris Masond1310b22008-01-24 16:13:08 -05002817 u64 sector;
2818 u64 extent_offset = block_start - em->start;
2819 size_t iosize;
2820 sector = (em->block_start + extent_offset) >> 9;
2821 iosize = (cur_end - block_start + blocksize) &
2822 ~((u64)blocksize - 1);
2823 /*
2824 * we've already got the extent locked, but we
2825 * need to split the state such that our end_bio
2826 * handler can clear the lock.
2827 */
2828 set_extent_bit(tree, block_start,
2829 block_start + iosize - 1,
Chris Mason2c64c532009-09-02 15:04:12 -04002830 EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05002831 ret = submit_extent_page(READ, tree, page,
2832 sector, iosize, page_offset, em->bdev,
2833 NULL, 1,
Chris Masonc8b97812008-10-29 14:49:59 -04002834 end_bio_extent_preparewrite, 0,
2835 0, 0);
Andi Kleen411fc6b2010-10-29 15:14:31 -04002836 if (ret && !err)
2837 err = ret;
Chris Masond1310b22008-01-24 16:13:08 -05002838 iocount++;
2839 block_start = block_start + iosize;
2840 } else {
Arne Jansen507903b2011-04-06 10:02:20 +00002841 struct extent_state *cached = NULL;
2842
2843 set_extent_uptodate(tree, block_start, cur_end, &cached,
Chris Masond1310b22008-01-24 16:13:08 -05002844 GFP_NOFS);
Arne Jansen507903b2011-04-06 10:02:20 +00002845 unlock_extent_cached(tree, block_start, cur_end,
2846 &cached, GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05002847 block_start = cur_end + 1;
2848 }
2849 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
2850 free_extent_map(em);
2851 }
2852 if (iocount) {
2853 wait_extent_bit(tree, orig_block_start,
2854 block_end, EXTENT_LOCKED);
2855 }
2856 check_page_uptodate(tree, page);
2857err:
2858 /* FIXME, zero out newly allocated blocks on error */
2859 return err;
2860}
Chris Masond1310b22008-01-24 16:13:08 -05002861
2862/*
Chris Mason7b13b7b2008-04-18 10:29:50 -04002863 * a helper for releasepage, this tests for areas of the page that
2864 * are locked or under IO and drops the related state bits if it is safe
2865 * to drop the page.
2866 */
2867int try_release_extent_state(struct extent_map_tree *map,
2868 struct extent_io_tree *tree, struct page *page,
2869 gfp_t mask)
2870{
2871 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2872 u64 end = start + PAGE_CACHE_SIZE - 1;
2873 int ret = 1;
2874
Chris Mason211f90e2008-07-18 11:56:15 -04002875 if (test_range_bit(tree, start, end,
Chris Mason8b62b722009-09-02 16:53:46 -04002876 EXTENT_IOBITS, 0, NULL))
Chris Mason7b13b7b2008-04-18 10:29:50 -04002877 ret = 0;
2878 else {
2879 if ((mask & GFP_NOFS) == GFP_NOFS)
2880 mask = GFP_NOFS;
Chris Mason11ef1602009-09-23 20:28:46 -04002881 /*
2882 * at this point we can safely clear everything except the
2883 * locked bit and the nodatasum bit
2884 */
Chris Masone3f24cc2011-02-14 12:52:08 -05002885 ret = clear_extent_bit(tree, start, end,
Chris Mason11ef1602009-09-23 20:28:46 -04002886 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
2887 0, 0, NULL, mask);
Chris Masone3f24cc2011-02-14 12:52:08 -05002888
2889 /* if clear_extent_bit failed for enomem reasons,
2890 * we can't allow the release to continue.
2891 */
2892 if (ret < 0)
2893 ret = 0;
2894 else
2895 ret = 1;
Chris Mason7b13b7b2008-04-18 10:29:50 -04002896 }
2897 return ret;
2898}
Chris Mason7b13b7b2008-04-18 10:29:50 -04002899
2900/*
Chris Masond1310b22008-01-24 16:13:08 -05002901 * a helper for releasepage. As long as there are no locked extents
2902 * in the range corresponding to the page, both state records and extent
2903 * map records are removed
2904 */
2905int try_release_extent_mapping(struct extent_map_tree *map,
Chris Mason70dec802008-01-29 09:59:12 -05002906 struct extent_io_tree *tree, struct page *page,
2907 gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -05002908{
2909 struct extent_map *em;
2910 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2911 u64 end = start + PAGE_CACHE_SIZE - 1;
Chris Mason7b13b7b2008-04-18 10:29:50 -04002912
Chris Mason70dec802008-01-29 09:59:12 -05002913 if ((mask & __GFP_WAIT) &&
2914 page->mapping->host->i_size > 16 * 1024 * 1024) {
Yan39b56372008-02-15 10:40:50 -05002915 u64 len;
Chris Mason70dec802008-01-29 09:59:12 -05002916 while (start <= end) {
Yan39b56372008-02-15 10:40:50 -05002917 len = end - start + 1;
Chris Mason890871b2009-09-02 16:24:52 -04002918 write_lock(&map->lock);
Yan39b56372008-02-15 10:40:50 -05002919 em = lookup_extent_mapping(map, start, len);
Chris Mason70dec802008-01-29 09:59:12 -05002920 if (!em || IS_ERR(em)) {
Chris Mason890871b2009-09-02 16:24:52 -04002921 write_unlock(&map->lock);
Chris Mason70dec802008-01-29 09:59:12 -05002922 break;
2923 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04002924 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
2925 em->start != start) {
Chris Mason890871b2009-09-02 16:24:52 -04002926 write_unlock(&map->lock);
Chris Mason70dec802008-01-29 09:59:12 -05002927 free_extent_map(em);
2928 break;
2929 }
2930 if (!test_range_bit(tree, em->start,
2931 extent_map_end(em) - 1,
Chris Mason8b62b722009-09-02 16:53:46 -04002932 EXTENT_LOCKED | EXTENT_WRITEBACK,
Chris Mason9655d292009-09-02 15:22:30 -04002933 0, NULL)) {
Chris Mason70dec802008-01-29 09:59:12 -05002934 remove_extent_mapping(map, em);
2935 /* once for the rb tree */
2936 free_extent_map(em);
2937 }
2938 start = extent_map_end(em);
Chris Mason890871b2009-09-02 16:24:52 -04002939 write_unlock(&map->lock);
Chris Mason70dec802008-01-29 09:59:12 -05002940
2941 /* once for us */
Chris Masond1310b22008-01-24 16:13:08 -05002942 free_extent_map(em);
2943 }
Chris Masond1310b22008-01-24 16:13:08 -05002944 }
Chris Mason7b13b7b2008-04-18 10:29:50 -04002945 return try_release_extent_state(map, tree, page, mask);
Chris Masond1310b22008-01-24 16:13:08 -05002946}
Chris Masond1310b22008-01-24 16:13:08 -05002947
2948sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
2949 get_extent_t *get_extent)
2950{
2951 struct inode *inode = mapping->host;
Josef Bacik2ac55d42010-02-03 19:33:23 +00002952 struct extent_state *cached_state = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05002953 u64 start = iblock << inode->i_blkbits;
2954 sector_t sector = 0;
Yan Zhengd899e052008-10-30 14:25:28 -04002955 size_t blksize = (1 << inode->i_blkbits);
Chris Masond1310b22008-01-24 16:13:08 -05002956 struct extent_map *em;
2957
Josef Bacik2ac55d42010-02-03 19:33:23 +00002958 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
2959 0, &cached_state, GFP_NOFS);
Yan Zhengd899e052008-10-30 14:25:28 -04002960 em = get_extent(inode, NULL, 0, start, blksize, 0);
Josef Bacik2ac55d42010-02-03 19:33:23 +00002961 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start,
2962 start + blksize - 1, &cached_state, GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05002963 if (!em || IS_ERR(em))
2964 return 0;
2965
Yan Zhengd899e052008-10-30 14:25:28 -04002966 if (em->block_start > EXTENT_MAP_LAST_BYTE)
Chris Masond1310b22008-01-24 16:13:08 -05002967 goto out;
2968
2969 sector = (em->block_start + start - em->start) >> inode->i_blkbits;
Chris Masond1310b22008-01-24 16:13:08 -05002970out:
2971 free_extent_map(em);
2972 return sector;
2973}
2974
Chris Masonec29ed52011-02-23 16:23:20 -05002975/*
2976 * helper function for fiemap, which doesn't want to see any holes.
2977 * This maps until we find something past 'last'
2978 */
2979static struct extent_map *get_extent_skip_holes(struct inode *inode,
2980 u64 offset,
2981 u64 last,
2982 get_extent_t *get_extent)
2983{
2984 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
2985 struct extent_map *em;
2986 u64 len;
2987
2988 if (offset >= last)
2989 return NULL;
2990
2991 while(1) {
2992 len = last - offset;
2993 if (len == 0)
2994 break;
2995 len = (len + sectorsize - 1) & ~(sectorsize - 1);
2996 em = get_extent(inode, NULL, 0, offset, len, 0);
2997 if (!em || IS_ERR(em))
2998 return em;
2999
3000 /* if this isn't a hole return it */
3001 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
3002 em->block_start != EXTENT_MAP_HOLE) {
3003 return em;
3004 }
3005
3006 /* this is a hole, advance to the next extent */
3007 offset = extent_map_end(em);
3008 free_extent_map(em);
3009 if (offset >= last)
3010 break;
3011 }
3012 return NULL;
3013}
3014
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003015int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3016 __u64 start, __u64 len, get_extent_t *get_extent)
3017{
Josef Bacik975f84f2010-11-23 19:36:57 +00003018 int ret = 0;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003019 u64 off = start;
3020 u64 max = start + len;
3021 u32 flags = 0;
Josef Bacik975f84f2010-11-23 19:36:57 +00003022 u32 found_type;
3023 u64 last;
Chris Masonec29ed52011-02-23 16:23:20 -05003024 u64 last_for_get_extent = 0;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003025 u64 disko = 0;
Chris Masonec29ed52011-02-23 16:23:20 -05003026 u64 isize = i_size_read(inode);
Josef Bacik975f84f2010-11-23 19:36:57 +00003027 struct btrfs_key found_key;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003028 struct extent_map *em = NULL;
Josef Bacik2ac55d42010-02-03 19:33:23 +00003029 struct extent_state *cached_state = NULL;
Josef Bacik975f84f2010-11-23 19:36:57 +00003030 struct btrfs_path *path;
3031 struct btrfs_file_extent_item *item;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003032 int end = 0;
Chris Masonec29ed52011-02-23 16:23:20 -05003033 u64 em_start = 0;
3034 u64 em_len = 0;
3035 u64 em_end = 0;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003036 unsigned long emflags;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003037
3038 if (len == 0)
3039 return -EINVAL;
3040
Josef Bacik975f84f2010-11-23 19:36:57 +00003041 path = btrfs_alloc_path();
3042 if (!path)
3043 return -ENOMEM;
3044 path->leave_spinning = 1;
3045
Chris Masonec29ed52011-02-23 16:23:20 -05003046 /*
3047 * lookup the last file extent. We're not using i_size here
3048 * because there might be preallocation past i_size
3049 */
Josef Bacik975f84f2010-11-23 19:36:57 +00003050 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
3051 path, inode->i_ino, -1, 0);
3052 if (ret < 0) {
3053 btrfs_free_path(path);
3054 return ret;
3055 }
3056 WARN_ON(!ret);
3057 path->slots[0]--;
3058 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3059 struct btrfs_file_extent_item);
3060 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
3061 found_type = btrfs_key_type(&found_key);
3062
Chris Masonec29ed52011-02-23 16:23:20 -05003063 /* No extents, but there might be delalloc bits */
Josef Bacik975f84f2010-11-23 19:36:57 +00003064 if (found_key.objectid != inode->i_ino ||
3065 found_type != BTRFS_EXTENT_DATA_KEY) {
Chris Masonec29ed52011-02-23 16:23:20 -05003066 /* have to trust i_size as the end */
3067 last = (u64)-1;
3068 last_for_get_extent = isize;
3069 } else {
3070 /*
3071 * remember the start of the last extent. There are a
3072 * bunch of different factors that go into the length of the
3073 * extent, so its much less complex to remember where it started
3074 */
3075 last = found_key.offset;
3076 last_for_get_extent = last + 1;
Josef Bacik975f84f2010-11-23 19:36:57 +00003077 }
Josef Bacik975f84f2010-11-23 19:36:57 +00003078 btrfs_free_path(path);
3079
Chris Masonec29ed52011-02-23 16:23:20 -05003080 /*
3081 * we might have some extents allocated but more delalloc past those
3082 * extents. so, we trust isize unless the start of the last extent is
3083 * beyond isize
3084 */
3085 if (last < isize) {
3086 last = (u64)-1;
3087 last_for_get_extent = isize;
3088 }
3089
Josef Bacik2ac55d42010-02-03 19:33:23 +00003090 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
3091 &cached_state, GFP_NOFS);
Chris Masonec29ed52011-02-23 16:23:20 -05003092
3093 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3094 get_extent);
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003095 if (!em)
3096 goto out;
3097 if (IS_ERR(em)) {
3098 ret = PTR_ERR(em);
3099 goto out;
3100 }
Josef Bacik975f84f2010-11-23 19:36:57 +00003101
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003102 while (!end) {
Chris Masonea8efc72011-03-08 11:54:40 -05003103 u64 offset_in_extent;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003104
Chris Masonea8efc72011-03-08 11:54:40 -05003105 /* break if the extent we found is outside the range */
3106 if (em->start >= max || extent_map_end(em) < off)
3107 break;
3108
3109 /*
3110 * get_extent may return an extent that starts before our
3111 * requested range. We have to make sure the ranges
3112 * we return to fiemap always move forward and don't
3113 * overlap, so adjust the offsets here
3114 */
3115 em_start = max(em->start, off);
3116
3117 /*
3118 * record the offset from the start of the extent
3119 * for adjusting the disk offset below
3120 */
3121 offset_in_extent = em_start - em->start;
Chris Masonec29ed52011-02-23 16:23:20 -05003122 em_end = extent_map_end(em);
Chris Masonea8efc72011-03-08 11:54:40 -05003123 em_len = em_end - em_start;
Chris Masonec29ed52011-02-23 16:23:20 -05003124 emflags = em->flags;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003125 disko = 0;
3126 flags = 0;
3127
Chris Masonea8efc72011-03-08 11:54:40 -05003128 /*
3129 * bump off for our next call to get_extent
3130 */
3131 off = extent_map_end(em);
3132 if (off >= max)
3133 end = 1;
3134
Heiko Carstens93dbfad2009-04-03 10:33:45 -04003135 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003136 end = 1;
3137 flags |= FIEMAP_EXTENT_LAST;
Heiko Carstens93dbfad2009-04-03 10:33:45 -04003138 } else if (em->block_start == EXTENT_MAP_INLINE) {
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003139 flags |= (FIEMAP_EXTENT_DATA_INLINE |
3140 FIEMAP_EXTENT_NOT_ALIGNED);
Heiko Carstens93dbfad2009-04-03 10:33:45 -04003141 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003142 flags |= (FIEMAP_EXTENT_DELALLOC |
3143 FIEMAP_EXTENT_UNKNOWN);
Heiko Carstens93dbfad2009-04-03 10:33:45 -04003144 } else {
Chris Masonea8efc72011-03-08 11:54:40 -05003145 disko = em->block_start + offset_in_extent;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003146 }
3147 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
3148 flags |= FIEMAP_EXTENT_ENCODED;
3149
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003150 free_extent_map(em);
3151 em = NULL;
Chris Masonec29ed52011-02-23 16:23:20 -05003152 if ((em_start >= last) || em_len == (u64)-1 ||
3153 (last == (u64)-1 && isize <= em_end)) {
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003154 flags |= FIEMAP_EXTENT_LAST;
3155 end = 1;
3156 }
3157
Chris Masonec29ed52011-02-23 16:23:20 -05003158 /* now scan forward to see if this is really the last extent. */
3159 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3160 get_extent);
3161 if (IS_ERR(em)) {
3162 ret = PTR_ERR(em);
3163 goto out;
3164 }
3165 if (!em) {
Josef Bacik975f84f2010-11-23 19:36:57 +00003166 flags |= FIEMAP_EXTENT_LAST;
3167 end = 1;
3168 }
Chris Masonec29ed52011-02-23 16:23:20 -05003169 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3170 em_len, flags);
3171 if (ret)
3172 goto out_free;
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003173 }
3174out_free:
3175 free_extent_map(em);
3176out:
Josef Bacik2ac55d42010-02-03 19:33:23 +00003177 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len,
3178 &cached_state, GFP_NOFS);
Yehuda Sadeh1506fcc2009-01-21 14:39:14 -05003179 return ret;
3180}
3181
Chris Masond1310b22008-01-24 16:13:08 -05003182static inline struct page *extent_buffer_page(struct extent_buffer *eb,
3183 unsigned long i)
3184{
3185 struct page *p;
3186 struct address_space *mapping;
3187
3188 if (i == 0)
3189 return eb->first_page;
3190 i += eb->start >> PAGE_CACHE_SHIFT;
3191 mapping = eb->first_page->mapping;
Chris Mason33958dc2008-07-30 10:29:12 -04003192 if (!mapping)
3193 return NULL;
Sven Wegener0ee0fda2008-07-30 16:54:26 -04003194
3195 /*
3196 * extent_buffer_page is only called after pinning the page
3197 * by increasing the reference count. So we know the page must
3198 * be in the radix tree.
3199 */
Sven Wegener0ee0fda2008-07-30 16:54:26 -04003200 rcu_read_lock();
Chris Masond1310b22008-01-24 16:13:08 -05003201 p = radix_tree_lookup(&mapping->page_tree, i);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04003202 rcu_read_unlock();
Chris Mason2b1f55b2008-09-24 11:48:04 -04003203
Chris Masond1310b22008-01-24 16:13:08 -05003204 return p;
3205}
3206
Chris Mason6af118ce2008-07-22 11:18:07 -04003207static inline unsigned long num_extent_pages(u64 start, u64 len)
Chris Masonce9adaa2008-04-09 16:28:12 -04003208{
Chris Mason6af118ce2008-07-22 11:18:07 -04003209 return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
3210 (start >> PAGE_CACHE_SHIFT);
Chris Mason728131d2008-04-09 16:28:12 -04003211}
3212
Chris Masond1310b22008-01-24 16:13:08 -05003213static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3214 u64 start,
3215 unsigned long len,
3216 gfp_t mask)
3217{
3218 struct extent_buffer *eb = NULL;
Chris Mason39351272009-02-04 09:24:05 -05003219#if LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -04003220 unsigned long flags;
Chris Mason4bef0842008-09-08 11:18:08 -04003221#endif
Chris Masond1310b22008-01-24 16:13:08 -05003222
Chris Masond1310b22008-01-24 16:13:08 -05003223 eb = kmem_cache_zalloc(extent_buffer_cache, mask);
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00003224 if (eb == NULL)
3225 return NULL;
Chris Masond1310b22008-01-24 16:13:08 -05003226 eb->start = start;
3227 eb->len = len;
Chris Masonb4ce94d2009-02-04 09:25:08 -05003228 spin_lock_init(&eb->lock);
3229 init_waitqueue_head(&eb->lock_wq);
3230
Chris Mason39351272009-02-04 09:24:05 -05003231#if LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -04003232 spin_lock_irqsave(&leak_lock, flags);
3233 list_add(&eb->leak_list, &buffers);
3234 spin_unlock_irqrestore(&leak_lock, flags);
Chris Mason4bef0842008-09-08 11:18:08 -04003235#endif
Chris Masond1310b22008-01-24 16:13:08 -05003236 atomic_set(&eb->refs, 1);
3237
3238 return eb;
3239}
3240
3241static void __free_extent_buffer(struct extent_buffer *eb)
3242{
Chris Mason39351272009-02-04 09:24:05 -05003243#if LEAK_DEBUG
Chris Mason2d2ae542008-03-26 16:24:23 -04003244 unsigned long flags;
3245 spin_lock_irqsave(&leak_lock, flags);
3246 list_del(&eb->leak_list);
3247 spin_unlock_irqrestore(&leak_lock, flags);
Chris Mason4bef0842008-09-08 11:18:08 -04003248#endif
Chris Masond1310b22008-01-24 16:13:08 -05003249 kmem_cache_free(extent_buffer_cache, eb);
3250}
3251
Miao Xie897ca6e92010-10-26 20:57:29 -04003252/*
3253 * Helper for releasing extent buffer page.
3254 */
3255static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
3256 unsigned long start_idx)
3257{
3258 unsigned long index;
3259 struct page *page;
3260
3261 if (!eb->first_page)
3262 return;
3263
3264 index = num_extent_pages(eb->start, eb->len);
3265 if (start_idx >= index)
3266 return;
3267
3268 do {
3269 index--;
3270 page = extent_buffer_page(eb, index);
3271 if (page)
3272 page_cache_release(page);
3273 } while (index != start_idx);
3274}
3275
3276/*
3277 * Helper for releasing the extent buffer.
3278 */
3279static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
3280{
3281 btrfs_release_extent_buffer_page(eb, 0);
3282 __free_extent_buffer(eb);
3283}
3284
Chris Masond1310b22008-01-24 16:13:08 -05003285struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3286 u64 start, unsigned long len,
3287 struct page *page0,
3288 gfp_t mask)
3289{
3290 unsigned long num_pages = num_extent_pages(start, len);
3291 unsigned long i;
3292 unsigned long index = start >> PAGE_CACHE_SHIFT;
3293 struct extent_buffer *eb;
Chris Mason6af118ce2008-07-22 11:18:07 -04003294 struct extent_buffer *exists = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05003295 struct page *p;
3296 struct address_space *mapping = tree->mapping;
3297 int uptodate = 1;
Miao Xie19fe0a82010-10-26 20:57:29 -04003298 int ret;
Chris Masond1310b22008-01-24 16:13:08 -05003299
Miao Xie19fe0a82010-10-26 20:57:29 -04003300 rcu_read_lock();
3301 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3302 if (eb && atomic_inc_not_zero(&eb->refs)) {
3303 rcu_read_unlock();
Josef Bacik0f9dd462008-09-23 13:14:11 -04003304 mark_page_accessed(eb->first_page);
Chris Mason6af118ce2008-07-22 11:18:07 -04003305 return eb;
3306 }
Miao Xie19fe0a82010-10-26 20:57:29 -04003307 rcu_read_unlock();
Chris Mason6af118ce2008-07-22 11:18:07 -04003308
Chris Masond1310b22008-01-24 16:13:08 -05003309 eb = __alloc_extent_buffer(tree, start, len, mask);
Peter2b114d12008-04-01 11:21:40 -04003310 if (!eb)
Chris Masond1310b22008-01-24 16:13:08 -05003311 return NULL;
3312
Chris Masond1310b22008-01-24 16:13:08 -05003313 if (page0) {
3314 eb->first_page = page0;
3315 i = 1;
3316 index++;
3317 page_cache_get(page0);
3318 mark_page_accessed(page0);
3319 set_page_extent_mapped(page0);
Chris Masond1310b22008-01-24 16:13:08 -05003320 set_page_extent_head(page0, len);
Chris Masonf1885912008-04-09 16:28:12 -04003321 uptodate = PageUptodate(page0);
Chris Masond1310b22008-01-24 16:13:08 -05003322 } else {
3323 i = 0;
3324 }
3325 for (; i < num_pages; i++, index++) {
3326 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
3327 if (!p) {
3328 WARN_ON(1);
Chris Mason6af118ce2008-07-22 11:18:07 -04003329 goto free_eb;
Chris Masond1310b22008-01-24 16:13:08 -05003330 }
3331 set_page_extent_mapped(p);
3332 mark_page_accessed(p);
3333 if (i == 0) {
3334 eb->first_page = p;
3335 set_page_extent_head(p, len);
3336 } else {
3337 set_page_private(p, EXTENT_PAGE_PRIVATE);
3338 }
3339 if (!PageUptodate(p))
3340 uptodate = 0;
Chris Masoneb14ab82011-02-10 12:35:00 -05003341
3342 /*
3343 * see below about how we avoid a nasty race with release page
3344 * and why we unlock later
3345 */
3346 if (i != 0)
3347 unlock_page(p);
Chris Masond1310b22008-01-24 16:13:08 -05003348 }
3349 if (uptodate)
Chris Masonb4ce94d2009-02-04 09:25:08 -05003350 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
Chris Masond1310b22008-01-24 16:13:08 -05003351
Miao Xie19fe0a82010-10-26 20:57:29 -04003352 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
3353 if (ret)
3354 goto free_eb;
3355
Chris Mason6af118ce2008-07-22 11:18:07 -04003356 spin_lock(&tree->buffer_lock);
Miao Xie19fe0a82010-10-26 20:57:29 -04003357 ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
3358 if (ret == -EEXIST) {
3359 exists = radix_tree_lookup(&tree->buffer,
3360 start >> PAGE_CACHE_SHIFT);
Chris Mason6af118ce2008-07-22 11:18:07 -04003361 /* add one reference for the caller */
3362 atomic_inc(&exists->refs);
3363 spin_unlock(&tree->buffer_lock);
Miao Xie19fe0a82010-10-26 20:57:29 -04003364 radix_tree_preload_end();
Chris Mason6af118ce2008-07-22 11:18:07 -04003365 goto free_eb;
3366 }
Chris Mason6af118ce2008-07-22 11:18:07 -04003367 /* add one reference for the tree */
3368 atomic_inc(&eb->refs);
Yan, Zhengf044ba72010-02-04 08:46:56 +00003369 spin_unlock(&tree->buffer_lock);
Miao Xie19fe0a82010-10-26 20:57:29 -04003370 radix_tree_preload_end();
Chris Masoneb14ab82011-02-10 12:35:00 -05003371
3372 /*
3373 * there is a race where release page may have
3374 * tried to find this extent buffer in the radix
3375 * but failed. It will tell the VM it is safe to
3376 * reclaim the, and it will clear the page private bit.
3377 * We must make sure to set the page private bit properly
3378 * after the extent buffer is in the radix tree so
3379 * it doesn't get lost
3380 */
3381 set_page_extent_mapped(eb->first_page);
3382 set_page_extent_head(eb->first_page, eb->len);
3383 if (!page0)
3384 unlock_page(eb->first_page);
Chris Masond1310b22008-01-24 16:13:08 -05003385 return eb;
3386
Chris Mason6af118ce2008-07-22 11:18:07 -04003387free_eb:
Chris Masoneb14ab82011-02-10 12:35:00 -05003388 if (eb->first_page && !page0)
3389 unlock_page(eb->first_page);
3390
Chris Masond1310b22008-01-24 16:13:08 -05003391 if (!atomic_dec_and_test(&eb->refs))
Chris Mason6af118ce2008-07-22 11:18:07 -04003392 return exists;
Miao Xie897ca6e92010-10-26 20:57:29 -04003393 btrfs_release_extent_buffer(eb);
Chris Mason6af118ce2008-07-22 11:18:07 -04003394 return exists;
Chris Masond1310b22008-01-24 16:13:08 -05003395}
Chris Masond1310b22008-01-24 16:13:08 -05003396
3397struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3398 u64 start, unsigned long len,
3399 gfp_t mask)
3400{
Chris Masond1310b22008-01-24 16:13:08 -05003401 struct extent_buffer *eb;
Chris Masond1310b22008-01-24 16:13:08 -05003402
Miao Xie19fe0a82010-10-26 20:57:29 -04003403 rcu_read_lock();
3404 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3405 if (eb && atomic_inc_not_zero(&eb->refs)) {
3406 rcu_read_unlock();
Josef Bacik0f9dd462008-09-23 13:14:11 -04003407 mark_page_accessed(eb->first_page);
Miao Xie19fe0a82010-10-26 20:57:29 -04003408 return eb;
3409 }
3410 rcu_read_unlock();
Josef Bacik0f9dd462008-09-23 13:14:11 -04003411
Miao Xie19fe0a82010-10-26 20:57:29 -04003412 return NULL;
Chris Masond1310b22008-01-24 16:13:08 -05003413}
Chris Masond1310b22008-01-24 16:13:08 -05003414
3415void free_extent_buffer(struct extent_buffer *eb)
3416{
Chris Masond1310b22008-01-24 16:13:08 -05003417 if (!eb)
3418 return;
3419
3420 if (!atomic_dec_and_test(&eb->refs))
3421 return;
3422
Chris Mason6af118ce2008-07-22 11:18:07 -04003423 WARN_ON(1);
Chris Masond1310b22008-01-24 16:13:08 -05003424}
Chris Masond1310b22008-01-24 16:13:08 -05003425
3426int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3427 struct extent_buffer *eb)
3428{
Chris Masond1310b22008-01-24 16:13:08 -05003429 unsigned long i;
3430 unsigned long num_pages;
3431 struct page *page;
3432
Chris Masond1310b22008-01-24 16:13:08 -05003433 num_pages = num_extent_pages(eb->start, eb->len);
3434
3435 for (i = 0; i < num_pages; i++) {
3436 page = extent_buffer_page(eb, i);
Chris Masonb9473432009-03-13 11:00:37 -04003437 if (!PageDirty(page))
Chris Masond2c3f4f2008-11-19 12:44:22 -05003438 continue;
3439
Chris Masona61e6f22008-07-22 11:18:08 -04003440 lock_page(page);
Chris Masoneb14ab82011-02-10 12:35:00 -05003441 WARN_ON(!PagePrivate(page));
3442
3443 set_page_extent_mapped(page);
Chris Masond1310b22008-01-24 16:13:08 -05003444 if (i == 0)
3445 set_page_extent_head(page, eb->len);
Chris Masond1310b22008-01-24 16:13:08 -05003446
Chris Masond1310b22008-01-24 16:13:08 -05003447 clear_page_dirty_for_io(page);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04003448 spin_lock_irq(&page->mapping->tree_lock);
Chris Masond1310b22008-01-24 16:13:08 -05003449 if (!PageDirty(page)) {
3450 radix_tree_tag_clear(&page->mapping->page_tree,
3451 page_index(page),
3452 PAGECACHE_TAG_DIRTY);
3453 }
Sven Wegener0ee0fda2008-07-30 16:54:26 -04003454 spin_unlock_irq(&page->mapping->tree_lock);
Chris Masona61e6f22008-07-22 11:18:08 -04003455 unlock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05003456 }
3457 return 0;
3458}
Chris Masond1310b22008-01-24 16:13:08 -05003459
3460int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
3461 struct extent_buffer *eb)
3462{
3463 return wait_on_extent_writeback(tree, eb->start,
3464 eb->start + eb->len - 1);
3465}
Chris Masond1310b22008-01-24 16:13:08 -05003466
3467int set_extent_buffer_dirty(struct extent_io_tree *tree,
3468 struct extent_buffer *eb)
3469{
3470 unsigned long i;
3471 unsigned long num_pages;
Chris Masonb9473432009-03-13 11:00:37 -04003472 int was_dirty = 0;
Chris Masond1310b22008-01-24 16:13:08 -05003473
Chris Masonb9473432009-03-13 11:00:37 -04003474 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
Chris Masond1310b22008-01-24 16:13:08 -05003475 num_pages = num_extent_pages(eb->start, eb->len);
Chris Masonb9473432009-03-13 11:00:37 -04003476 for (i = 0; i < num_pages; i++)
Chris Masond1310b22008-01-24 16:13:08 -05003477 __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
Chris Masonb9473432009-03-13 11:00:37 -04003478 return was_dirty;
Chris Masond1310b22008-01-24 16:13:08 -05003479}
Chris Masond1310b22008-01-24 16:13:08 -05003480
Chris Mason1259ab72008-05-12 13:39:03 -04003481int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
Josef Bacik2ac55d42010-02-03 19:33:23 +00003482 struct extent_buffer *eb,
3483 struct extent_state **cached_state)
Chris Mason1259ab72008-05-12 13:39:03 -04003484{
3485 unsigned long i;
3486 struct page *page;
3487 unsigned long num_pages;
3488
3489 num_pages = num_extent_pages(eb->start, eb->len);
Chris Masonb4ce94d2009-02-04 09:25:08 -05003490 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
Chris Mason1259ab72008-05-12 13:39:03 -04003491
3492 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
Josef Bacik2ac55d42010-02-03 19:33:23 +00003493 cached_state, GFP_NOFS);
Chris Mason1259ab72008-05-12 13:39:03 -04003494 for (i = 0; i < num_pages; i++) {
3495 page = extent_buffer_page(eb, i);
Chris Mason33958dc2008-07-30 10:29:12 -04003496 if (page)
3497 ClearPageUptodate(page);
Chris Mason1259ab72008-05-12 13:39:03 -04003498 }
3499 return 0;
3500}
3501
Chris Masond1310b22008-01-24 16:13:08 -05003502int set_extent_buffer_uptodate(struct extent_io_tree *tree,
3503 struct extent_buffer *eb)
3504{
3505 unsigned long i;
3506 struct page *page;
3507 unsigned long num_pages;
3508
3509 num_pages = num_extent_pages(eb->start, eb->len);
3510
3511 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
Arne Jansen507903b2011-04-06 10:02:20 +00003512 NULL, GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05003513 for (i = 0; i < num_pages; i++) {
3514 page = extent_buffer_page(eb, i);
3515 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
3516 ((i == num_pages - 1) &&
3517 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
3518 check_page_uptodate(tree, page);
3519 continue;
3520 }
3521 SetPageUptodate(page);
3522 }
3523 return 0;
3524}
Chris Masond1310b22008-01-24 16:13:08 -05003525
Chris Masonce9adaa2008-04-09 16:28:12 -04003526int extent_range_uptodate(struct extent_io_tree *tree,
3527 u64 start, u64 end)
3528{
3529 struct page *page;
3530 int ret;
3531 int pg_uptodate = 1;
3532 int uptodate;
3533 unsigned long index;
3534
Chris Mason9655d292009-09-02 15:22:30 -04003535 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
Chris Masonce9adaa2008-04-09 16:28:12 -04003536 if (ret)
3537 return 1;
Chris Masond3977122009-01-05 21:25:51 -05003538 while (start <= end) {
Chris Masonce9adaa2008-04-09 16:28:12 -04003539 index = start >> PAGE_CACHE_SHIFT;
3540 page = find_get_page(tree->mapping, index);
3541 uptodate = PageUptodate(page);
3542 page_cache_release(page);
3543 if (!uptodate) {
3544 pg_uptodate = 0;
3545 break;
3546 }
3547 start += PAGE_CACHE_SIZE;
3548 }
3549 return pg_uptodate;
3550}
3551
Chris Masond1310b22008-01-24 16:13:08 -05003552int extent_buffer_uptodate(struct extent_io_tree *tree,
Josef Bacik2ac55d42010-02-03 19:33:23 +00003553 struct extent_buffer *eb,
3554 struct extent_state *cached_state)
Chris Masond1310b22008-01-24 16:13:08 -05003555{
Chris Mason728131d2008-04-09 16:28:12 -04003556 int ret = 0;
Chris Masonce9adaa2008-04-09 16:28:12 -04003557 unsigned long num_pages;
3558 unsigned long i;
Chris Mason728131d2008-04-09 16:28:12 -04003559 struct page *page;
3560 int pg_uptodate = 1;
3561
Chris Masonb4ce94d2009-02-04 09:25:08 -05003562 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
Chris Mason42352982008-04-28 16:40:52 -04003563 return 1;
Chris Mason728131d2008-04-09 16:28:12 -04003564
Chris Mason42352982008-04-28 16:40:52 -04003565 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
Josef Bacik2ac55d42010-02-03 19:33:23 +00003566 EXTENT_UPTODATE, 1, cached_state);
Chris Mason42352982008-04-28 16:40:52 -04003567 if (ret)
3568 return ret;
Chris Mason728131d2008-04-09 16:28:12 -04003569
3570 num_pages = num_extent_pages(eb->start, eb->len);
3571 for (i = 0; i < num_pages; i++) {
3572 page = extent_buffer_page(eb, i);
3573 if (!PageUptodate(page)) {
3574 pg_uptodate = 0;
3575 break;
3576 }
3577 }
Chris Mason42352982008-04-28 16:40:52 -04003578 return pg_uptodate;
Chris Masond1310b22008-01-24 16:13:08 -05003579}
Chris Masond1310b22008-01-24 16:13:08 -05003580
3581int read_extent_buffer_pages(struct extent_io_tree *tree,
3582 struct extent_buffer *eb,
Chris Masona86c12c2008-02-07 10:50:54 -05003583 u64 start, int wait,
Chris Masonf1885912008-04-09 16:28:12 -04003584 get_extent_t *get_extent, int mirror_num)
Chris Masond1310b22008-01-24 16:13:08 -05003585{
3586 unsigned long i;
3587 unsigned long start_i;
3588 struct page *page;
3589 int err;
3590 int ret = 0;
Chris Masonce9adaa2008-04-09 16:28:12 -04003591 int locked_pages = 0;
3592 int all_uptodate = 1;
3593 int inc_all_pages = 0;
Chris Masond1310b22008-01-24 16:13:08 -05003594 unsigned long num_pages;
Chris Masona86c12c2008-02-07 10:50:54 -05003595 struct bio *bio = NULL;
Chris Masonc8b97812008-10-29 14:49:59 -04003596 unsigned long bio_flags = 0;
Chris Masona86c12c2008-02-07 10:50:54 -05003597
Chris Masonb4ce94d2009-02-04 09:25:08 -05003598 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
Chris Masond1310b22008-01-24 16:13:08 -05003599 return 0;
3600
Chris Masonce9adaa2008-04-09 16:28:12 -04003601 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
Chris Mason9655d292009-09-02 15:22:30 -04003602 EXTENT_UPTODATE, 1, NULL)) {
Chris Masond1310b22008-01-24 16:13:08 -05003603 return 0;
3604 }
3605
3606 if (start) {
3607 WARN_ON(start < eb->start);
3608 start_i = (start >> PAGE_CACHE_SHIFT) -
3609 (eb->start >> PAGE_CACHE_SHIFT);
3610 } else {
3611 start_i = 0;
3612 }
3613
3614 num_pages = num_extent_pages(eb->start, eb->len);
3615 for (i = start_i; i < num_pages; i++) {
3616 page = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05003617 if (!wait) {
David Woodhouse2db04962008-08-07 11:19:43 -04003618 if (!trylock_page(page))
Chris Masonce9adaa2008-04-09 16:28:12 -04003619 goto unlock_exit;
Chris Masond1310b22008-01-24 16:13:08 -05003620 } else {
3621 lock_page(page);
3622 }
Chris Masonce9adaa2008-04-09 16:28:12 -04003623 locked_pages++;
Chris Masond3977122009-01-05 21:25:51 -05003624 if (!PageUptodate(page))
Chris Masonce9adaa2008-04-09 16:28:12 -04003625 all_uptodate = 0;
Chris Masonce9adaa2008-04-09 16:28:12 -04003626 }
3627 if (all_uptodate) {
3628 if (start_i == 0)
Chris Masonb4ce94d2009-02-04 09:25:08 -05003629 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
Chris Masonce9adaa2008-04-09 16:28:12 -04003630 goto unlock_exit;
3631 }
3632
3633 for (i = start_i; i < num_pages; i++) {
3634 page = extent_buffer_page(eb, i);
Chris Masoneb14ab82011-02-10 12:35:00 -05003635
3636 WARN_ON(!PagePrivate(page));
3637
3638 set_page_extent_mapped(page);
3639 if (i == 0)
3640 set_page_extent_head(page, eb->len);
3641
Chris Masonce9adaa2008-04-09 16:28:12 -04003642 if (inc_all_pages)
3643 page_cache_get(page);
3644 if (!PageUptodate(page)) {
3645 if (start_i == 0)
3646 inc_all_pages = 1;
Chris Masonf1885912008-04-09 16:28:12 -04003647 ClearPageError(page);
Chris Masona86c12c2008-02-07 10:50:54 -05003648 err = __extent_read_full_page(tree, page,
Chris Masonf1885912008-04-09 16:28:12 -04003649 get_extent, &bio,
Chris Masonc8b97812008-10-29 14:49:59 -04003650 mirror_num, &bio_flags);
Chris Masond3977122009-01-05 21:25:51 -05003651 if (err)
Chris Masond1310b22008-01-24 16:13:08 -05003652 ret = err;
Chris Masond1310b22008-01-24 16:13:08 -05003653 } else {
3654 unlock_page(page);
3655 }
3656 }
3657
Chris Masona86c12c2008-02-07 10:50:54 -05003658 if (bio)
Chris Masonc8b97812008-10-29 14:49:59 -04003659 submit_one_bio(READ, bio, mirror_num, bio_flags);
Chris Masona86c12c2008-02-07 10:50:54 -05003660
Chris Masond3977122009-01-05 21:25:51 -05003661 if (ret || !wait)
Chris Masond1310b22008-01-24 16:13:08 -05003662 return ret;
Chris Masond3977122009-01-05 21:25:51 -05003663
Chris Masond1310b22008-01-24 16:13:08 -05003664 for (i = start_i; i < num_pages; i++) {
3665 page = extent_buffer_page(eb, i);
3666 wait_on_page_locked(page);
Chris Masond3977122009-01-05 21:25:51 -05003667 if (!PageUptodate(page))
Chris Masond1310b22008-01-24 16:13:08 -05003668 ret = -EIO;
Chris Masond1310b22008-01-24 16:13:08 -05003669 }
Chris Masond3977122009-01-05 21:25:51 -05003670
Chris Masond1310b22008-01-24 16:13:08 -05003671 if (!ret)
Chris Masonb4ce94d2009-02-04 09:25:08 -05003672 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
Chris Masond1310b22008-01-24 16:13:08 -05003673 return ret;
Chris Masonce9adaa2008-04-09 16:28:12 -04003674
3675unlock_exit:
3676 i = start_i;
Chris Masond3977122009-01-05 21:25:51 -05003677 while (locked_pages > 0) {
Chris Masonce9adaa2008-04-09 16:28:12 -04003678 page = extent_buffer_page(eb, i);
3679 i++;
3680 unlock_page(page);
3681 locked_pages--;
3682 }
3683 return ret;
Chris Masond1310b22008-01-24 16:13:08 -05003684}
Chris Masond1310b22008-01-24 16:13:08 -05003685
3686void read_extent_buffer(struct extent_buffer *eb, void *dstv,
3687 unsigned long start,
3688 unsigned long len)
3689{
3690 size_t cur;
3691 size_t offset;
3692 struct page *page;
3693 char *kaddr;
3694 char *dst = (char *)dstv;
3695 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3696 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05003697
3698 WARN_ON(start > eb->len);
3699 WARN_ON(start + len > eb->start + eb->len);
3700
3701 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3702
Chris Masond3977122009-01-05 21:25:51 -05003703 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05003704 page = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05003705
3706 cur = min(len, (PAGE_CACHE_SIZE - offset));
3707 kaddr = kmap_atomic(page, KM_USER1);
3708 memcpy(dst, kaddr + offset, cur);
3709 kunmap_atomic(kaddr, KM_USER1);
3710
3711 dst += cur;
3712 len -= cur;
3713 offset = 0;
3714 i++;
3715 }
3716}
Chris Masond1310b22008-01-24 16:13:08 -05003717
3718int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
3719 unsigned long min_len, char **token, char **map,
3720 unsigned long *map_start,
3721 unsigned long *map_len, int km)
3722{
3723 size_t offset = start & (PAGE_CACHE_SIZE - 1);
3724 char *kaddr;
3725 struct page *p;
3726 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3727 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3728 unsigned long end_i = (start_offset + start + min_len - 1) >>
3729 PAGE_CACHE_SHIFT;
3730
3731 if (i != end_i)
3732 return -EINVAL;
3733
3734 if (i == 0) {
3735 offset = start_offset;
3736 *map_start = 0;
3737 } else {
3738 offset = 0;
3739 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
3740 }
Chris Masond3977122009-01-05 21:25:51 -05003741
Chris Masond1310b22008-01-24 16:13:08 -05003742 if (start + min_len > eb->len) {
Chris Masond3977122009-01-05 21:25:51 -05003743 printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
3744 "wanted %lu %lu\n", (unsigned long long)eb->start,
3745 eb->len, start, min_len);
Chris Masond1310b22008-01-24 16:13:08 -05003746 WARN_ON(1);
Josef Bacik850265332011-03-15 14:52:12 -04003747 return -EINVAL;
Chris Masond1310b22008-01-24 16:13:08 -05003748 }
3749
3750 p = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05003751 kaddr = kmap_atomic(p, km);
3752 *token = kaddr;
3753 *map = kaddr + offset;
3754 *map_len = PAGE_CACHE_SIZE - offset;
3755 return 0;
3756}
Chris Masond1310b22008-01-24 16:13:08 -05003757
3758int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
3759 unsigned long min_len,
3760 char **token, char **map,
3761 unsigned long *map_start,
3762 unsigned long *map_len, int km)
3763{
3764 int err;
3765 int save = 0;
3766 if (eb->map_token) {
3767 unmap_extent_buffer(eb, eb->map_token, km);
3768 eb->map_token = NULL;
3769 save = 1;
3770 }
3771 err = map_private_extent_buffer(eb, start, min_len, token, map,
3772 map_start, map_len, km);
3773 if (!err && save) {
3774 eb->map_token = *token;
3775 eb->kaddr = *map;
3776 eb->map_start = *map_start;
3777 eb->map_len = *map_len;
3778 }
3779 return err;
3780}
Chris Masond1310b22008-01-24 16:13:08 -05003781
3782void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
3783{
3784 kunmap_atomic(token, km);
3785}
Chris Masond1310b22008-01-24 16:13:08 -05003786
3787int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
3788 unsigned long start,
3789 unsigned long len)
3790{
3791 size_t cur;
3792 size_t offset;
3793 struct page *page;
3794 char *kaddr;
3795 char *ptr = (char *)ptrv;
3796 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3797 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3798 int ret = 0;
3799
3800 WARN_ON(start > eb->len);
3801 WARN_ON(start + len > eb->start + eb->len);
3802
3803 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3804
Chris Masond3977122009-01-05 21:25:51 -05003805 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05003806 page = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05003807
3808 cur = min(len, (PAGE_CACHE_SIZE - offset));
3809
3810 kaddr = kmap_atomic(page, KM_USER0);
3811 ret = memcmp(ptr, kaddr + offset, cur);
3812 kunmap_atomic(kaddr, KM_USER0);
3813 if (ret)
3814 break;
3815
3816 ptr += cur;
3817 len -= cur;
3818 offset = 0;
3819 i++;
3820 }
3821 return ret;
3822}
Chris Masond1310b22008-01-24 16:13:08 -05003823
3824void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
3825 unsigned long start, unsigned long len)
3826{
3827 size_t cur;
3828 size_t offset;
3829 struct page *page;
3830 char *kaddr;
3831 char *src = (char *)srcv;
3832 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3833 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3834
3835 WARN_ON(start > eb->len);
3836 WARN_ON(start + len > eb->start + eb->len);
3837
3838 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3839
Chris Masond3977122009-01-05 21:25:51 -05003840 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05003841 page = extent_buffer_page(eb, i);
3842 WARN_ON(!PageUptodate(page));
3843
3844 cur = min(len, PAGE_CACHE_SIZE - offset);
3845 kaddr = kmap_atomic(page, KM_USER1);
3846 memcpy(kaddr + offset, src, cur);
3847 kunmap_atomic(kaddr, KM_USER1);
3848
3849 src += cur;
3850 len -= cur;
3851 offset = 0;
3852 i++;
3853 }
3854}
Chris Masond1310b22008-01-24 16:13:08 -05003855
3856void memset_extent_buffer(struct extent_buffer *eb, char c,
3857 unsigned long start, unsigned long len)
3858{
3859 size_t cur;
3860 size_t offset;
3861 struct page *page;
3862 char *kaddr;
3863 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3864 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3865
3866 WARN_ON(start > eb->len);
3867 WARN_ON(start + len > eb->start + eb->len);
3868
3869 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3870
Chris Masond3977122009-01-05 21:25:51 -05003871 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05003872 page = extent_buffer_page(eb, i);
3873 WARN_ON(!PageUptodate(page));
3874
3875 cur = min(len, PAGE_CACHE_SIZE - offset);
3876 kaddr = kmap_atomic(page, KM_USER0);
3877 memset(kaddr + offset, c, cur);
3878 kunmap_atomic(kaddr, KM_USER0);
3879
3880 len -= cur;
3881 offset = 0;
3882 i++;
3883 }
3884}
Chris Masond1310b22008-01-24 16:13:08 -05003885
3886void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
3887 unsigned long dst_offset, unsigned long src_offset,
3888 unsigned long len)
3889{
3890 u64 dst_len = dst->len;
3891 size_t cur;
3892 size_t offset;
3893 struct page *page;
3894 char *kaddr;
3895 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3896 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
3897
3898 WARN_ON(src->len != dst_len);
3899
3900 offset = (start_offset + dst_offset) &
3901 ((unsigned long)PAGE_CACHE_SIZE - 1);
3902
Chris Masond3977122009-01-05 21:25:51 -05003903 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05003904 page = extent_buffer_page(dst, i);
3905 WARN_ON(!PageUptodate(page));
3906
3907 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
3908
3909 kaddr = kmap_atomic(page, KM_USER0);
3910 read_extent_buffer(src, kaddr + offset, src_offset, cur);
3911 kunmap_atomic(kaddr, KM_USER0);
3912
3913 src_offset += cur;
3914 len -= cur;
3915 offset = 0;
3916 i++;
3917 }
3918}
Chris Masond1310b22008-01-24 16:13:08 -05003919
3920static void move_pages(struct page *dst_page, struct page *src_page,
3921 unsigned long dst_off, unsigned long src_off,
3922 unsigned long len)
3923{
3924 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3925 if (dst_page == src_page) {
3926 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
3927 } else {
3928 char *src_kaddr = kmap_atomic(src_page, KM_USER1);
3929 char *p = dst_kaddr + dst_off + len;
3930 char *s = src_kaddr + src_off + len;
3931
3932 while (len--)
3933 *--p = *--s;
3934
3935 kunmap_atomic(src_kaddr, KM_USER1);
3936 }
3937 kunmap_atomic(dst_kaddr, KM_USER0);
3938}
3939
Sergei Trofimovich33872062011-04-11 21:52:52 +00003940static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
3941{
3942 unsigned long distance = (src > dst) ? src - dst : dst - src;
3943 return distance < len;
3944}
3945
Chris Masond1310b22008-01-24 16:13:08 -05003946static void copy_pages(struct page *dst_page, struct page *src_page,
3947 unsigned long dst_off, unsigned long src_off,
3948 unsigned long len)
3949{
3950 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3951 char *src_kaddr;
3952
Sergei Trofimovich33872062011-04-11 21:52:52 +00003953 if (dst_page != src_page) {
Chris Masond1310b22008-01-24 16:13:08 -05003954 src_kaddr = kmap_atomic(src_page, KM_USER1);
Sergei Trofimovich33872062011-04-11 21:52:52 +00003955 } else {
Chris Masond1310b22008-01-24 16:13:08 -05003956 src_kaddr = dst_kaddr;
Sergei Trofimovich33872062011-04-11 21:52:52 +00003957 BUG_ON(areas_overlap(src_off, dst_off, len));
3958 }
Chris Masond1310b22008-01-24 16:13:08 -05003959
3960 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
3961 kunmap_atomic(dst_kaddr, KM_USER0);
3962 if (dst_page != src_page)
3963 kunmap_atomic(src_kaddr, KM_USER1);
3964}
3965
3966void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3967 unsigned long src_offset, unsigned long len)
3968{
3969 size_t cur;
3970 size_t dst_off_in_page;
3971 size_t src_off_in_page;
3972 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3973 unsigned long dst_i;
3974 unsigned long src_i;
3975
3976 if (src_offset + len > dst->len) {
Chris Masond3977122009-01-05 21:25:51 -05003977 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
3978 "len %lu dst len %lu\n", src_offset, len, dst->len);
Chris Masond1310b22008-01-24 16:13:08 -05003979 BUG_ON(1);
3980 }
3981 if (dst_offset + len > dst->len) {
Chris Masond3977122009-01-05 21:25:51 -05003982 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
3983 "len %lu dst len %lu\n", dst_offset, len, dst->len);
Chris Masond1310b22008-01-24 16:13:08 -05003984 BUG_ON(1);
3985 }
3986
Chris Masond3977122009-01-05 21:25:51 -05003987 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05003988 dst_off_in_page = (start_offset + dst_offset) &
3989 ((unsigned long)PAGE_CACHE_SIZE - 1);
3990 src_off_in_page = (start_offset + src_offset) &
3991 ((unsigned long)PAGE_CACHE_SIZE - 1);
3992
3993 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
3994 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
3995
3996 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
3997 src_off_in_page));
3998 cur = min_t(unsigned long, cur,
3999 (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
4000
4001 copy_pages(extent_buffer_page(dst, dst_i),
4002 extent_buffer_page(dst, src_i),
4003 dst_off_in_page, src_off_in_page, cur);
4004
4005 src_offset += cur;
4006 dst_offset += cur;
4007 len -= cur;
4008 }
4009}
Chris Masond1310b22008-01-24 16:13:08 -05004010
4011void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
4012 unsigned long src_offset, unsigned long len)
4013{
4014 size_t cur;
4015 size_t dst_off_in_page;
4016 size_t src_off_in_page;
4017 unsigned long dst_end = dst_offset + len - 1;
4018 unsigned long src_end = src_offset + len - 1;
4019 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
4020 unsigned long dst_i;
4021 unsigned long src_i;
4022
4023 if (src_offset + len > dst->len) {
Chris Masond3977122009-01-05 21:25:51 -05004024 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
4025 "len %lu len %lu\n", src_offset, len, dst->len);
Chris Masond1310b22008-01-24 16:13:08 -05004026 BUG_ON(1);
4027 }
4028 if (dst_offset + len > dst->len) {
Chris Masond3977122009-01-05 21:25:51 -05004029 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
4030 "len %lu len %lu\n", dst_offset, len, dst->len);
Chris Masond1310b22008-01-24 16:13:08 -05004031 BUG_ON(1);
4032 }
Sergei Trofimovich33872062011-04-11 21:52:52 +00004033 if (!areas_overlap(src_offset, dst_offset, len)) {
Chris Masond1310b22008-01-24 16:13:08 -05004034 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
4035 return;
4036 }
Chris Masond3977122009-01-05 21:25:51 -05004037 while (len > 0) {
Chris Masond1310b22008-01-24 16:13:08 -05004038 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
4039 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
4040
4041 dst_off_in_page = (start_offset + dst_end) &
4042 ((unsigned long)PAGE_CACHE_SIZE - 1);
4043 src_off_in_page = (start_offset + src_end) &
4044 ((unsigned long)PAGE_CACHE_SIZE - 1);
4045
4046 cur = min_t(unsigned long, len, src_off_in_page + 1);
4047 cur = min(cur, dst_off_in_page + 1);
4048 move_pages(extent_buffer_page(dst, dst_i),
4049 extent_buffer_page(dst, src_i),
4050 dst_off_in_page - cur + 1,
4051 src_off_in_page - cur + 1, cur);
4052
4053 dst_end -= cur;
4054 src_end -= cur;
4055 len -= cur;
4056 }
4057}
Chris Mason6af118ce2008-07-22 11:18:07 -04004058
Miao Xie19fe0a82010-10-26 20:57:29 -04004059static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
4060{
4061 struct extent_buffer *eb =
4062 container_of(head, struct extent_buffer, rcu_head);
4063
4064 btrfs_release_extent_buffer(eb);
4065}
4066
Chris Mason6af118ce2008-07-22 11:18:07 -04004067int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
4068{
4069 u64 start = page_offset(page);
4070 struct extent_buffer *eb;
4071 int ret = 1;
Chris Mason6af118ce2008-07-22 11:18:07 -04004072
4073 spin_lock(&tree->buffer_lock);
Miao Xie19fe0a82010-10-26 20:57:29 -04004074 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
Chris Mason45f49bc2010-11-21 22:27:44 -05004075 if (!eb) {
4076 spin_unlock(&tree->buffer_lock);
4077 return ret;
4078 }
Chris Mason6af118ce2008-07-22 11:18:07 -04004079
Chris Masonb9473432009-03-13 11:00:37 -04004080 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
4081 ret = 0;
4082 goto out;
4083 }
Miao Xie897ca6e92010-10-26 20:57:29 -04004084
Miao Xie19fe0a82010-10-26 20:57:29 -04004085 /*
4086 * set @eb->refs to 0 if it is already 1, and then release the @eb.
4087 * Or go back.
4088 */
4089 if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
4090 ret = 0;
4091 goto out;
4092 }
4093
4094 radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
Chris Mason6af118ce2008-07-22 11:18:07 -04004095out:
4096 spin_unlock(&tree->buffer_lock);
Miao Xie19fe0a82010-10-26 20:57:29 -04004097
4098 /* at this point we can safely release the extent buffer */
4099 if (atomic_read(&eb->refs) == 0)
4100 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
Chris Mason6af118ce2008-07-22 11:18:07 -04004101 return ret;
4102}