blob: 33d55734c514f9af8b4435e45ae368ee7d9614d5 [file] [log] [blame]
Mark Fashehccd979b2005-12-15 14:31:24 -08001/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * suballoc.c
5 *
6 * metadata alloc and free
7 * Inspired by ext3 block groups.
8 *
9 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public
22 * License along with this program; if not, write to the
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 * Boston, MA 021110-1307, USA.
25 */
26
27#include <linux/fs.h>
28#include <linux/types.h>
29#include <linux/slab.h>
30#include <linux/highmem.h>
31
32#define MLOG_MASK_PREFIX ML_DISK_ALLOC
33#include <cluster/masklog.h>
34
35#include "ocfs2.h"
36
37#include "alloc.h"
38#include "dlmglue.h"
39#include "inode.h"
40#include "journal.h"
41#include "localalloc.h"
42#include "suballoc.h"
43#include "super.h"
44#include "sysfile.h"
45#include "uptodate.h"
46
47#include "buffer_head_io.h"
48
Tao Maffda89a2008-03-03 17:12:09 +080049#define NOT_ALLOC_NEW_GROUP 0
50#define ALLOC_NEW_GROUP 1
51
Mark Fashehccd979b2005-12-15 14:31:24 -080052static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
53static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
54static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
Mark Fasheh1fabe142006-10-09 18:11:45 -070055static int ocfs2_block_group_fill(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -080056 struct inode *alloc_inode,
57 struct buffer_head *bg_bh,
58 u64 group_blkno,
59 u16 my_chain,
60 struct ocfs2_chain_list *cl);
61static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
62 struct inode *alloc_inode,
63 struct buffer_head *bh);
64
Mark Fashehccd979b2005-12-15 14:31:24 -080065static int ocfs2_cluster_group_search(struct inode *inode,
66 struct buffer_head *group_bh,
67 u32 bits_wanted, u32 min_bits,
68 u16 *bit_off, u16 *bits_found);
69static int ocfs2_block_group_search(struct inode *inode,
70 struct buffer_head *group_bh,
71 u32 bits_wanted, u32 min_bits,
72 u16 *bit_off, u16 *bits_found);
Mark Fashehccd979b2005-12-15 14:31:24 -080073static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
74 struct ocfs2_alloc_context *ac,
Mark Fasheh1fabe142006-10-09 18:11:45 -070075 handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -080076 u32 bits_wanted,
77 u32 min_bits,
78 u16 *bit_off,
79 unsigned int *num_bits,
80 u64 *bg_blkno);
81static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
82 int nr);
Mark Fasheh1fabe142006-10-09 18:11:45 -070083static inline int ocfs2_block_group_set_bits(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -080084 struct inode *alloc_inode,
85 struct ocfs2_group_desc *bg,
86 struct buffer_head *group_bh,
87 unsigned int bit_off,
88 unsigned int num_bits);
Mark Fasheh1fabe142006-10-09 18:11:45 -070089static inline int ocfs2_block_group_clear_bits(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -080090 struct inode *alloc_inode,
91 struct ocfs2_group_desc *bg,
92 struct buffer_head *group_bh,
93 unsigned int bit_off,
94 unsigned int num_bits);
95
Mark Fasheh1fabe142006-10-09 18:11:45 -070096static int ocfs2_relink_block_group(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -080097 struct inode *alloc_inode,
98 struct buffer_head *fe_bh,
99 struct buffer_head *bg_bh,
100 struct buffer_head *prev_bg_bh,
101 u16 chain);
102static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
103 u32 wanted);
Mark Fashehccd979b2005-12-15 14:31:24 -0800104static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
105 u64 bg_blkno,
106 u16 bg_bit_off);
Mark Fashehccd979b2005-12-15 14:31:24 -0800107static inline void ocfs2_block_to_cluster_group(struct inode *inode,
108 u64 data_blkno,
109 u64 *bg_blkno,
110 u16 *bg_bit_off);
111
112void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
113{
Mark Fashehda5cbf22006-10-06 18:34:35 -0700114 struct inode *inode = ac->ac_inode;
115
116 if (inode) {
117 if (ac->ac_which != OCFS2_AC_USE_LOCAL)
Mark Fashehe63aecb62007-10-18 15:30:42 -0700118 ocfs2_inode_unlock(inode, 1);
Mark Fashehda5cbf22006-10-06 18:34:35 -0700119
120 mutex_unlock(&inode->i_mutex);
121
122 iput(inode);
123 }
Mark Fashehccd979b2005-12-15 14:31:24 -0800124 if (ac->ac_bh)
125 brelse(ac->ac_bh);
126 kfree(ac);
127}
128
129static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
130{
131 return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
132}
133
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700134/* somewhat more expensive than our other checks, so use sparingly. */
Tao Mad6590722007-12-18 15:47:03 +0800135int ocfs2_check_group_descriptor(struct super_block *sb,
136 struct ocfs2_dinode *di,
137 struct ocfs2_group_desc *gd)
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700138{
139 unsigned int max_bits;
140
141 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
142 OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd);
143 return -EIO;
144 }
145
146 if (di->i_blkno != gd->bg_parent_dinode) {
147 ocfs2_error(sb, "Group descriptor # %llu has bad parent "
148 "pointer (%llu, expected %llu)",
149 (unsigned long long)le64_to_cpu(gd->bg_blkno),
150 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
151 (unsigned long long)le64_to_cpu(di->i_blkno));
152 return -EIO;
153 }
154
155 max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
156 if (le16_to_cpu(gd->bg_bits) > max_bits) {
157 ocfs2_error(sb, "Group descriptor # %llu has bit count of %u",
158 (unsigned long long)le64_to_cpu(gd->bg_blkno),
159 le16_to_cpu(gd->bg_bits));
160 return -EIO;
161 }
162
163 if (le16_to_cpu(gd->bg_chain) >=
164 le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
165 ocfs2_error(sb, "Group descriptor # %llu has bad chain %u",
166 (unsigned long long)le64_to_cpu(gd->bg_blkno),
167 le16_to_cpu(gd->bg_chain));
168 return -EIO;
169 }
170
171 if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
172 ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
173 "claims that %u are free",
174 (unsigned long long)le64_to_cpu(gd->bg_blkno),
175 le16_to_cpu(gd->bg_bits),
176 le16_to_cpu(gd->bg_free_bits_count));
177 return -EIO;
178 }
179
180 if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
181 ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
182 "max bitmap bits of %u",
183 (unsigned long long)le64_to_cpu(gd->bg_blkno),
184 le16_to_cpu(gd->bg_bits),
185 8 * le16_to_cpu(gd->bg_size));
186 return -EIO;
187 }
188
189 return 0;
190}
191
Mark Fasheh1fabe142006-10-09 18:11:45 -0700192static int ocfs2_block_group_fill(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -0800193 struct inode *alloc_inode,
194 struct buffer_head *bg_bh,
195 u64 group_blkno,
196 u16 my_chain,
197 struct ocfs2_chain_list *cl)
198{
199 int status = 0;
200 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
201 struct super_block * sb = alloc_inode->i_sb;
202
203 mlog_entry_void();
204
205 if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
Mark Fashehb06970532006-03-03 10:24:33 -0800206 ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
207 "b_blocknr (%llu)",
208 (unsigned long long)group_blkno,
Mark Fashehccd979b2005-12-15 14:31:24 -0800209 (unsigned long long) bg_bh->b_blocknr);
210 status = -EIO;
211 goto bail;
212 }
213
214 status = ocfs2_journal_access(handle,
215 alloc_inode,
216 bg_bh,
217 OCFS2_JOURNAL_ACCESS_CREATE);
218 if (status < 0) {
219 mlog_errno(status);
220 goto bail;
221 }
222
223 memset(bg, 0, sb->s_blocksize);
224 strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
225 bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
226 bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb));
227 bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl));
228 bg->bg_chain = cpu_to_le16(my_chain);
229 bg->bg_next_group = cl->cl_recs[my_chain].c_blkno;
230 bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno);
231 bg->bg_blkno = cpu_to_le64(group_blkno);
232 /* set the 1st bit in the bitmap to account for the descriptor block */
233 ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap);
234 bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1);
235
236 status = ocfs2_journal_dirty(handle, bg_bh);
237 if (status < 0)
238 mlog_errno(status);
239
240 /* There is no need to zero out or otherwise initialize the
241 * other blocks in a group - All valid FS metadata in a block
242 * group stores the superblock fs_generation value at
243 * allocation time. */
244
245bail:
246 mlog_exit(status);
247 return status;
248}
249
250static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
251{
252 u16 curr, best;
253
254 best = curr = 0;
255 while (curr < le16_to_cpu(cl->cl_count)) {
256 if (le32_to_cpu(cl->cl_recs[best].c_total) >
257 le32_to_cpu(cl->cl_recs[curr].c_total))
258 best = curr;
259 curr++;
260 }
261 return best;
262}
263
264/*
265 * We expect the block group allocator to already be locked.
266 */
267static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
268 struct inode *alloc_inode,
269 struct buffer_head *bh)
270{
271 int status, credits;
272 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
273 struct ocfs2_chain_list *cl;
274 struct ocfs2_alloc_context *ac = NULL;
Mark Fasheh1fabe142006-10-09 18:11:45 -0700275 handle_t *handle = NULL;
Mark Fashehccd979b2005-12-15 14:31:24 -0800276 u32 bit_off, num_bits;
277 u16 alloc_rec;
278 u64 bg_blkno;
279 struct buffer_head *bg_bh = NULL;
280 struct ocfs2_group_desc *bg;
281
282 BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode));
283
284 mlog_entry_void();
285
Mark Fashehccd979b2005-12-15 14:31:24 -0800286 cl = &fe->id2.i_chain;
287 status = ocfs2_reserve_clusters(osb,
Mark Fashehccd979b2005-12-15 14:31:24 -0800288 le16_to_cpu(cl->cl_cpg),
289 &ac);
290 if (status < 0) {
291 if (status != -ENOSPC)
292 mlog_errno(status);
293 goto bail;
294 }
295
296 credits = ocfs2_calc_group_alloc_credits(osb->sb,
297 le16_to_cpu(cl->cl_cpg));
Mark Fasheh65eff9c2006-10-09 17:26:22 -0700298 handle = ocfs2_start_trans(osb, credits);
Mark Fashehccd979b2005-12-15 14:31:24 -0800299 if (IS_ERR(handle)) {
300 status = PTR_ERR(handle);
301 handle = NULL;
302 mlog_errno(status);
303 goto bail;
304 }
305
306 status = ocfs2_claim_clusters(osb,
307 handle,
308 ac,
309 le16_to_cpu(cl->cl_cpg),
310 &bit_off,
311 &num_bits);
312 if (status < 0) {
313 if (status != -ENOSPC)
314 mlog_errno(status);
315 goto bail;
316 }
317
318 alloc_rec = ocfs2_find_smallest_chain(cl);
319
320 /* setup the group */
321 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
Mark Fashehb06970532006-03-03 10:24:33 -0800322 mlog(0, "new descriptor, record %u, at block %llu\n",
323 alloc_rec, (unsigned long long)bg_blkno);
Mark Fashehccd979b2005-12-15 14:31:24 -0800324
325 bg_bh = sb_getblk(osb->sb, bg_blkno);
326 if (!bg_bh) {
327 status = -EIO;
328 mlog_errno(status);
329 goto bail;
330 }
331 ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh);
332
333 status = ocfs2_block_group_fill(handle,
334 alloc_inode,
335 bg_bh,
336 bg_blkno,
337 alloc_rec,
338 cl);
339 if (status < 0) {
340 mlog_errno(status);
341 goto bail;
342 }
343
344 bg = (struct ocfs2_group_desc *) bg_bh->b_data;
345
346 status = ocfs2_journal_access(handle, alloc_inode,
347 bh, OCFS2_JOURNAL_ACCESS_WRITE);
348 if (status < 0) {
349 mlog_errno(status);
350 goto bail;
351 }
352
353 le32_add_cpu(&cl->cl_recs[alloc_rec].c_free,
354 le16_to_cpu(bg->bg_free_bits_count));
355 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits));
356 cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg_blkno);
357 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
358 le16_add_cpu(&cl->cl_next_free_rec, 1);
359
360 le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) -
361 le16_to_cpu(bg->bg_free_bits_count));
362 le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits));
363 le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg));
364
365 status = ocfs2_journal_dirty(handle, bh);
366 if (status < 0) {
367 mlog_errno(status);
368 goto bail;
369 }
370
371 spin_lock(&OCFS2_I(alloc_inode)->ip_lock);
372 OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
373 fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb,
374 le32_to_cpu(fe->i_clusters)));
375 spin_unlock(&OCFS2_I(alloc_inode)->ip_lock);
376 i_size_write(alloc_inode, le64_to_cpu(fe->i_size));
Mark Fasheh8110b072007-03-22 16:53:23 -0700377 alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
Mark Fashehccd979b2005-12-15 14:31:24 -0800378
379 status = 0;
380bail:
381 if (handle)
Mark Fasheh02dc1af2006-10-09 16:48:10 -0700382 ocfs2_commit_trans(osb, handle);
Mark Fashehccd979b2005-12-15 14:31:24 -0800383
384 if (ac)
385 ocfs2_free_alloc_context(ac);
386
387 if (bg_bh)
388 brelse(bg_bh);
389
390 mlog_exit(status);
391 return status;
392}
393
394static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
Mark Fashehda5cbf22006-10-06 18:34:35 -0700395 struct ocfs2_alloc_context *ac,
396 int type,
Tao Maffda89a2008-03-03 17:12:09 +0800397 u32 slot,
398 int alloc_new_group)
Mark Fashehccd979b2005-12-15 14:31:24 -0800399{
400 int status;
401 u32 bits_wanted = ac->ac_bits_wanted;
Mark Fashehda5cbf22006-10-06 18:34:35 -0700402 struct inode *alloc_inode;
Mark Fashehccd979b2005-12-15 14:31:24 -0800403 struct buffer_head *bh = NULL;
Mark Fashehccd979b2005-12-15 14:31:24 -0800404 struct ocfs2_dinode *fe;
405 u32 free_bits;
406
407 mlog_entry_void();
408
Mark Fashehda5cbf22006-10-06 18:34:35 -0700409 alloc_inode = ocfs2_get_system_file_inode(osb, type, slot);
410 if (!alloc_inode) {
411 mlog_errno(-EINVAL);
412 return -EINVAL;
Mark Fashehccd979b2005-12-15 14:31:24 -0800413 }
414
Mark Fashehda5cbf22006-10-06 18:34:35 -0700415 mutex_lock(&alloc_inode->i_mutex);
416
Mark Fashehe63aecb62007-10-18 15:30:42 -0700417 status = ocfs2_inode_lock(alloc_inode, &bh, 1);
Mark Fashehda5cbf22006-10-06 18:34:35 -0700418 if (status < 0) {
419 mutex_unlock(&alloc_inode->i_mutex);
420 iput(alloc_inode);
421
422 mlog_errno(status);
423 return status;
424 }
425
426 ac->ac_inode = alloc_inode;
Tao Maa4a48912008-03-03 17:12:30 +0800427 ac->ac_alloc_slot = slot;
Mark Fashehda5cbf22006-10-06 18:34:35 -0700428
Mark Fashehccd979b2005-12-15 14:31:24 -0800429 fe = (struct ocfs2_dinode *) bh->b_data;
430 if (!OCFS2_IS_VALID_DINODE(fe)) {
431 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
432 status = -EIO;
433 goto bail;
434 }
435 if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
Mark Fashehb06970532006-03-03 10:24:33 -0800436 ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
437 (unsigned long long)le64_to_cpu(fe->i_blkno));
Mark Fashehccd979b2005-12-15 14:31:24 -0800438 status = -EIO;
439 goto bail;
440 }
441
442 free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) -
443 le32_to_cpu(fe->id1.bitmap1.i_used);
444
445 if (bits_wanted > free_bits) {
446 /* cluster bitmap never grows */
447 if (ocfs2_is_cluster_bitmap(alloc_inode)) {
448 mlog(0, "Disk Full: wanted=%u, free_bits=%u\n",
449 bits_wanted, free_bits);
450 status = -ENOSPC;
451 goto bail;
452 }
453
Tao Maffda89a2008-03-03 17:12:09 +0800454 if (alloc_new_group != ALLOC_NEW_GROUP) {
455 mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, "
456 "and we don't alloc a new group for it.\n",
457 slot, bits_wanted, free_bits);
458 status = -ENOSPC;
459 goto bail;
460 }
461
Mark Fashehccd979b2005-12-15 14:31:24 -0800462 status = ocfs2_block_group_alloc(osb, alloc_inode, bh);
463 if (status < 0) {
464 if (status != -ENOSPC)
465 mlog_errno(status);
466 goto bail;
467 }
468 atomic_inc(&osb->alloc_stats.bg_extends);
469
470 /* You should never ask for this much metadata */
471 BUG_ON(bits_wanted >
472 (le32_to_cpu(fe->id1.bitmap1.i_total)
473 - le32_to_cpu(fe->id1.bitmap1.i_used)));
474 }
475
476 get_bh(bh);
477 ac->ac_bh = bh;
478bail:
479 if (bh)
480 brelse(bh);
481
482 mlog_exit(status);
483 return status;
484}
485
486int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
Mark Fashehccd979b2005-12-15 14:31:24 -0800487 struct ocfs2_dinode *fe,
488 struct ocfs2_alloc_context **ac)
489{
490 int status;
Mark Fashehda5cbf22006-10-06 18:34:35 -0700491 u32 slot;
Mark Fashehccd979b2005-12-15 14:31:24 -0800492
Robert P. J. Daycd861282006-12-13 00:34:52 -0800493 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
Mark Fashehccd979b2005-12-15 14:31:24 -0800494 if (!(*ac)) {
495 status = -ENOMEM;
496 mlog_errno(status);
497 goto bail;
498 }
499
500 (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe);
Mark Fashehccd979b2005-12-15 14:31:24 -0800501 (*ac)->ac_which = OCFS2_AC_USE_META;
Mark Fashehda5cbf22006-10-06 18:34:35 -0700502 slot = osb->slot_num;
Mark Fashehccd979b2005-12-15 14:31:24 -0800503 (*ac)->ac_group_search = ocfs2_block_group_search;
504
Mark Fashehda5cbf22006-10-06 18:34:35 -0700505 status = ocfs2_reserve_suballoc_bits(osb, (*ac),
Tao Maffda89a2008-03-03 17:12:09 +0800506 EXTENT_ALLOC_SYSTEM_INODE,
507 slot, ALLOC_NEW_GROUP);
Mark Fashehccd979b2005-12-15 14:31:24 -0800508 if (status < 0) {
509 if (status != -ENOSPC)
510 mlog_errno(status);
511 goto bail;
512 }
513
514 status = 0;
515bail:
516 if ((status < 0) && *ac) {
517 ocfs2_free_alloc_context(*ac);
518 *ac = NULL;
519 }
520
Mark Fashehccd979b2005-12-15 14:31:24 -0800521 mlog_exit(status);
522 return status;
523}
524
525int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
Mark Fashehccd979b2005-12-15 14:31:24 -0800526 struct ocfs2_alloc_context **ac)
527{
528 int status;
Mark Fashehccd979b2005-12-15 14:31:24 -0800529
Robert P. J. Daycd861282006-12-13 00:34:52 -0800530 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
Mark Fashehccd979b2005-12-15 14:31:24 -0800531 if (!(*ac)) {
532 status = -ENOMEM;
533 mlog_errno(status);
534 goto bail;
535 }
536
537 (*ac)->ac_bits_wanted = 1;
Mark Fashehccd979b2005-12-15 14:31:24 -0800538 (*ac)->ac_which = OCFS2_AC_USE_INODE;
539
Mark Fashehccd979b2005-12-15 14:31:24 -0800540 (*ac)->ac_group_search = ocfs2_block_group_search;
541
Mark Fashehda5cbf22006-10-06 18:34:35 -0700542 status = ocfs2_reserve_suballoc_bits(osb, *ac,
543 INODE_ALLOC_SYSTEM_INODE,
Tao Maffda89a2008-03-03 17:12:09 +0800544 osb->slot_num, ALLOC_NEW_GROUP);
Mark Fashehccd979b2005-12-15 14:31:24 -0800545 if (status < 0) {
546 if (status != -ENOSPC)
547 mlog_errno(status);
548 goto bail;
549 }
550
551 status = 0;
552bail:
553 if ((status < 0) && *ac) {
554 ocfs2_free_alloc_context(*ac);
555 *ac = NULL;
556 }
557
Mark Fashehccd979b2005-12-15 14:31:24 -0800558 mlog_exit(status);
559 return status;
560}
561
562/* local alloc code has to do the same thing, so rather than do this
563 * twice.. */
564int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
565 struct ocfs2_alloc_context *ac)
566{
567 int status;
568
Mark Fashehccd979b2005-12-15 14:31:24 -0800569 ac->ac_which = OCFS2_AC_USE_MAIN;
570 ac->ac_group_search = ocfs2_cluster_group_search;
571
Mark Fashehda5cbf22006-10-06 18:34:35 -0700572 status = ocfs2_reserve_suballoc_bits(osb, ac,
573 GLOBAL_BITMAP_SYSTEM_INODE,
Tao Maffda89a2008-03-03 17:12:09 +0800574 OCFS2_INVALID_SLOT,
575 ALLOC_NEW_GROUP);
Mark Fashehda5cbf22006-10-06 18:34:35 -0700576 if (status < 0 && status != -ENOSPC) {
Mark Fashehccd979b2005-12-15 14:31:24 -0800577 mlog_errno(status);
Mark Fashehda5cbf22006-10-06 18:34:35 -0700578 goto bail;
579 }
580
Mark Fashehccd979b2005-12-15 14:31:24 -0800581bail:
582 return status;
583}
584
585/* Callers don't need to care which bitmap (local alloc or main) to
586 * use so we figure it out for them, but unfortunately this clutters
587 * things a bit. */
588int ocfs2_reserve_clusters(struct ocfs2_super *osb,
Mark Fashehccd979b2005-12-15 14:31:24 -0800589 u32 bits_wanted,
590 struct ocfs2_alloc_context **ac)
591{
592 int status;
593
594 mlog_entry_void();
595
Robert P. J. Daycd861282006-12-13 00:34:52 -0800596 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
Mark Fashehccd979b2005-12-15 14:31:24 -0800597 if (!(*ac)) {
598 status = -ENOMEM;
599 mlog_errno(status);
600 goto bail;
601 }
602
603 (*ac)->ac_bits_wanted = bits_wanted;
Mark Fashehccd979b2005-12-15 14:31:24 -0800604
605 status = -ENOSPC;
606 if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
607 status = ocfs2_reserve_local_alloc_bits(osb,
Mark Fashehccd979b2005-12-15 14:31:24 -0800608 bits_wanted,
609 *ac);
610 if ((status < 0) && (status != -ENOSPC)) {
611 mlog_errno(status);
612 goto bail;
613 } else if (status == -ENOSPC) {
614 /* reserve_local_bits will return enospc with
615 * the local alloc inode still locked, so we
616 * can change this safely here. */
617 mlog(0, "Disabling local alloc\n");
618 /* We set to OCFS2_LA_DISABLED so that umount
619 * can clean up what's left of the local
620 * allocation */
621 osb->local_alloc_state = OCFS2_LA_DISABLED;
622 }
623 }
624
625 if (status == -ENOSPC) {
626 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
627 if (status < 0) {
628 if (status != -ENOSPC)
629 mlog_errno(status);
630 goto bail;
631 }
632 }
633
634 status = 0;
635bail:
636 if ((status < 0) && *ac) {
637 ocfs2_free_alloc_context(*ac);
638 *ac = NULL;
639 }
640
641 mlog_exit(status);
642 return status;
643}
644
645/*
646 * More or less lifted from ext3. I'll leave their description below:
647 *
648 * "For ext3 allocations, we must not reuse any blocks which are
649 * allocated in the bitmap buffer's "last committed data" copy. This
650 * prevents deletes from freeing up the page for reuse until we have
651 * committed the delete transaction.
652 *
653 * If we didn't do this, then deleting something and reallocating it as
654 * data would allow the old block to be overwritten before the
655 * transaction committed (because we force data to disk before commit).
656 * This would lead to corruption if we crashed between overwriting the
657 * data and committing the delete.
658 *
659 * @@@ We may want to make this allocation behaviour conditional on
660 * data-writes at some point, and disable it for metadata allocations or
661 * sync-data inodes."
662 *
663 * Note: OCFS2 already does this differently for metadata vs data
Joe Perchesc78bad12008-02-03 17:33:42 +0200664 * allocations, as those bitmaps are separate and undo access is never
Mark Fashehccd979b2005-12-15 14:31:24 -0800665 * called on a metadata group descriptor.
666 */
667static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
668 int nr)
669{
670 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
671
672 if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
673 return 0;
674 if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data)
675 return 1;
676
677 bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
678 return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
679}
680
681static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
682 struct buffer_head *bg_bh,
683 unsigned int bits_wanted,
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700684 unsigned int total_bits,
Mark Fashehccd979b2005-12-15 14:31:24 -0800685 u16 *bit_off,
686 u16 *bits_found)
687{
688 void *bitmap;
689 u16 best_offset, best_size;
690 int offset, start, found, status = 0;
691 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
692
693 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
694 OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg);
695 return -EIO;
696 }
697
698 found = start = best_offset = best_size = 0;
699 bitmap = bg->bg_bitmap;
700
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700701 while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
702 if (offset == total_bits)
Mark Fashehccd979b2005-12-15 14:31:24 -0800703 break;
704
705 if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
706 /* We found a zero, but we can't use it as it
707 * hasn't been put to disk yet! */
708 found = 0;
709 start = offset + 1;
710 } else if (offset == start) {
711 /* we found a zero */
712 found++;
713 /* move start to the next bit to test */
714 start++;
715 } else {
716 /* got a zero after some ones */
717 found = 1;
718 start = offset + 1;
719 }
720 if (found > best_size) {
721 best_size = found;
722 best_offset = start - found;
723 }
724 /* we got everything we needed */
725 if (found == bits_wanted) {
726 /* mlog(0, "Found it all!\n"); */
727 break;
728 }
729 }
730
731 /* XXX: I think the first clause is equivalent to the second
732 * - jlbec */
733 if (found == bits_wanted) {
734 *bit_off = start - found;
735 *bits_found = found;
736 } else if (best_size) {
737 *bit_off = best_offset;
738 *bits_found = best_size;
739 } else {
740 status = -ENOSPC;
741 /* No error log here -- see the comment above
742 * ocfs2_test_bg_bit_allocatable */
743 }
744
745 return status;
746}
747
Mark Fasheh1fabe142006-10-09 18:11:45 -0700748static inline int ocfs2_block_group_set_bits(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -0800749 struct inode *alloc_inode,
750 struct ocfs2_group_desc *bg,
751 struct buffer_head *group_bh,
752 unsigned int bit_off,
753 unsigned int num_bits)
754{
755 int status;
756 void *bitmap = bg->bg_bitmap;
757 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
758
759 mlog_entry_void();
760
761 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
762 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
763 status = -EIO;
764 goto bail;
765 }
766 BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
767
768 mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
769 num_bits);
770
771 if (ocfs2_is_cluster_bitmap(alloc_inode))
772 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
773
774 status = ocfs2_journal_access(handle,
775 alloc_inode,
776 group_bh,
777 journal_type);
778 if (status < 0) {
779 mlog_errno(status);
780 goto bail;
781 }
782
783 le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
784
785 while(num_bits--)
786 ocfs2_set_bit(bit_off++, bitmap);
787
788 status = ocfs2_journal_dirty(handle,
789 group_bh);
790 if (status < 0) {
791 mlog_errno(status);
792 goto bail;
793 }
794
795bail:
796 mlog_exit(status);
797 return status;
798}
799
800/* find the one with the most empty bits */
801static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl)
802{
803 u16 curr, best;
804
805 BUG_ON(!cl->cl_next_free_rec);
806
807 best = curr = 0;
808 while (curr < le16_to_cpu(cl->cl_next_free_rec)) {
809 if (le32_to_cpu(cl->cl_recs[curr].c_free) >
810 le32_to_cpu(cl->cl_recs[best].c_free))
811 best = curr;
812 curr++;
813 }
814
815 BUG_ON(best >= le16_to_cpu(cl->cl_next_free_rec));
816 return best;
817}
818
Mark Fasheh1fabe142006-10-09 18:11:45 -0700819static int ocfs2_relink_block_group(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -0800820 struct inode *alloc_inode,
821 struct buffer_head *fe_bh,
822 struct buffer_head *bg_bh,
823 struct buffer_head *prev_bg_bh,
824 u16 chain)
825{
826 int status;
827 /* there is a really tiny chance the journal calls could fail,
828 * but we wouldn't want inconsistent blocks in *any* case. */
829 u64 fe_ptr, bg_ptr, prev_bg_ptr;
830 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
831 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
832 struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
833
834 if (!OCFS2_IS_VALID_DINODE(fe)) {
835 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
836 status = -EIO;
837 goto out;
838 }
839 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
840 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
841 status = -EIO;
842 goto out;
843 }
844 if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) {
845 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg);
846 status = -EIO;
847 goto out;
848 }
849
Mark Fashehb06970532006-03-03 10:24:33 -0800850 mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
Mark Fasheh1ca1a112007-04-27 16:01:25 -0700851 (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
852 (unsigned long long)le64_to_cpu(bg->bg_blkno),
853 (unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
Mark Fashehccd979b2005-12-15 14:31:24 -0800854
855 fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
856 bg_ptr = le64_to_cpu(bg->bg_next_group);
857 prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
858
859 status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh,
860 OCFS2_JOURNAL_ACCESS_WRITE);
861 if (status < 0) {
862 mlog_errno(status);
863 goto out_rollback;
864 }
865
866 prev_bg->bg_next_group = bg->bg_next_group;
867
868 status = ocfs2_journal_dirty(handle, prev_bg_bh);
869 if (status < 0) {
870 mlog_errno(status);
871 goto out_rollback;
872 }
873
874 status = ocfs2_journal_access(handle, alloc_inode, bg_bh,
875 OCFS2_JOURNAL_ACCESS_WRITE);
876 if (status < 0) {
877 mlog_errno(status);
878 goto out_rollback;
879 }
880
881 bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
882
883 status = ocfs2_journal_dirty(handle, bg_bh);
884 if (status < 0) {
885 mlog_errno(status);
886 goto out_rollback;
887 }
888
889 status = ocfs2_journal_access(handle, alloc_inode, fe_bh,
890 OCFS2_JOURNAL_ACCESS_WRITE);
891 if (status < 0) {
892 mlog_errno(status);
893 goto out_rollback;
894 }
895
896 fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
897
898 status = ocfs2_journal_dirty(handle, fe_bh);
899 if (status < 0) {
900 mlog_errno(status);
901 goto out_rollback;
902 }
903
904 status = 0;
905out_rollback:
906 if (status < 0) {
907 fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr);
908 bg->bg_next_group = cpu_to_le64(bg_ptr);
909 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
910 }
911out:
912 mlog_exit(status);
913 return status;
914}
915
916static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
917 u32 wanted)
918{
919 return le16_to_cpu(bg->bg_free_bits_count) > wanted;
920}
921
922/* return 0 on success, -ENOSPC to keep searching and any other < 0
923 * value on error. */
924static int ocfs2_cluster_group_search(struct inode *inode,
925 struct buffer_head *group_bh,
926 u32 bits_wanted, u32 min_bits,
927 u16 *bit_off, u16 *bits_found)
928{
929 int search = -ENOSPC;
930 int ret;
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700931 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
Mark Fashehccd979b2005-12-15 14:31:24 -0800932 u16 tmp_off, tmp_found;
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700933 unsigned int max_bits, gd_cluster_off;
Mark Fashehccd979b2005-12-15 14:31:24 -0800934
935 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
936
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700937 if (gd->bg_free_bits_count) {
938 max_bits = le16_to_cpu(gd->bg_bits);
939
940 /* Tail groups in cluster bitmaps which aren't cpg
941 * aligned are prone to partial extention by a failed
942 * fs resize. If the file system resize never got to
943 * update the dinode cluster count, then we don't want
944 * to trust any clusters past it, regardless of what
945 * the group descriptor says. */
946 gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
947 le64_to_cpu(gd->bg_blkno));
948 if ((gd_cluster_off + max_bits) >
949 OCFS2_I(inode)->ip_clusters) {
950 max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
951 mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
952 (unsigned long long)le64_to_cpu(gd->bg_blkno),
953 le16_to_cpu(gd->bg_bits),
954 OCFS2_I(inode)->ip_clusters, max_bits);
955 }
956
Mark Fashehccd979b2005-12-15 14:31:24 -0800957 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
958 group_bh, bits_wanted,
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700959 max_bits,
Mark Fashehccd979b2005-12-15 14:31:24 -0800960 &tmp_off, &tmp_found);
961 if (ret)
962 return ret;
963
964 /* ocfs2_block_group_find_clear_bits() might
965 * return success, but we still want to return
966 * -ENOSPC unless it found the minimum number
967 * of bits. */
968 if (min_bits <= tmp_found) {
969 *bit_off = tmp_off;
970 *bits_found = tmp_found;
971 search = 0; /* success */
972 }
973 }
974
975 return search;
976}
977
978static int ocfs2_block_group_search(struct inode *inode,
979 struct buffer_head *group_bh,
980 u32 bits_wanted, u32 min_bits,
981 u16 *bit_off, u16 *bits_found)
982{
983 int ret = -ENOSPC;
984 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
985
986 BUG_ON(min_bits != 1);
987 BUG_ON(ocfs2_is_cluster_bitmap(inode));
988
989 if (bg->bg_free_bits_count)
990 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
991 group_bh, bits_wanted,
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700992 le16_to_cpu(bg->bg_bits),
Mark Fashehccd979b2005-12-15 14:31:24 -0800993 bit_off, bits_found);
994
995 return ret;
996}
997
Mark Fasheh883d4ca2006-06-05 16:41:00 -0400998static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
Mark Fasheh1fabe142006-10-09 18:11:45 -0700999 handle_t *handle,
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001000 struct buffer_head *di_bh,
1001 u32 num_bits,
1002 u16 chain)
1003{
1004 int ret;
1005 u32 tmp_used;
1006 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
1007 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
1008
1009 ret = ocfs2_journal_access(handle, inode, di_bh,
1010 OCFS2_JOURNAL_ACCESS_WRITE);
1011 if (ret < 0) {
1012 mlog_errno(ret);
1013 goto out;
1014 }
1015
1016 tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
1017 di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
1018 le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
1019
1020 ret = ocfs2_journal_dirty(handle, di_bh);
1021 if (ret < 0)
1022 mlog_errno(ret);
1023
1024out:
1025 return ret;
1026}
1027
1028static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
Mark Fasheh1fabe142006-10-09 18:11:45 -07001029 handle_t *handle,
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001030 u32 bits_wanted,
1031 u32 min_bits,
1032 u16 *bit_off,
1033 unsigned int *num_bits,
1034 u64 gd_blkno,
1035 u16 *bits_left)
1036{
1037 int ret;
1038 u16 found;
1039 struct buffer_head *group_bh = NULL;
1040 struct ocfs2_group_desc *gd;
1041 struct inode *alloc_inode = ac->ac_inode;
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001042
1043 ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno,
1044 &group_bh, OCFS2_BH_CACHED, alloc_inode);
1045 if (ret < 0) {
1046 mlog_errno(ret);
1047 return ret;
1048 }
1049
1050 gd = (struct ocfs2_group_desc *) group_bh->b_data;
1051 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
1052 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
1053 ret = -EIO;
1054 goto out;
1055 }
1056
1057 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1058 bit_off, &found);
1059 if (ret < 0) {
1060 if (ret != -ENOSPC)
1061 mlog_errno(ret);
1062 goto out;
1063 }
1064
1065 *num_bits = found;
1066
1067 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
1068 *num_bits,
1069 le16_to_cpu(gd->bg_chain));
1070 if (ret < 0) {
1071 mlog_errno(ret);
1072 goto out;
1073 }
1074
1075 ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
1076 *bit_off, *num_bits);
1077 if (ret < 0)
1078 mlog_errno(ret);
1079
1080 *bits_left = le16_to_cpu(gd->bg_free_bits_count);
1081
1082out:
1083 brelse(group_bh);
1084
1085 return ret;
1086}
1087
Mark Fashehccd979b2005-12-15 14:31:24 -08001088static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
Mark Fasheh1fabe142006-10-09 18:11:45 -07001089 handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001090 u32 bits_wanted,
1091 u32 min_bits,
1092 u16 *bit_off,
1093 unsigned int *num_bits,
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001094 u64 *bg_blkno,
1095 u16 *bits_left)
Mark Fashehccd979b2005-12-15 14:31:24 -08001096{
1097 int status;
1098 u16 chain, tmp_bits;
1099 u32 tmp_used;
1100 u64 next_group;
Mark Fashehccd979b2005-12-15 14:31:24 -08001101 struct inode *alloc_inode = ac->ac_inode;
1102 struct buffer_head *group_bh = NULL;
1103 struct buffer_head *prev_group_bh = NULL;
1104 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
1105 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1106 struct ocfs2_group_desc *bg;
1107
1108 chain = ac->ac_chain;
Mark Fashehb06970532006-03-03 10:24:33 -08001109 mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n",
1110 bits_wanted, chain,
1111 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
Mark Fashehccd979b2005-12-15 14:31:24 -08001112
1113 status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
1114 le64_to_cpu(cl->cl_recs[chain].c_blkno),
1115 &group_bh, OCFS2_BH_CACHED, alloc_inode);
1116 if (status < 0) {
1117 mlog_errno(status);
1118 goto bail;
1119 }
1120 bg = (struct ocfs2_group_desc *) group_bh->b_data;
Mark Fasheh7bf72ed2006-05-03 17:46:50 -07001121 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
1122 if (status) {
1123 mlog_errno(status);
Mark Fashehccd979b2005-12-15 14:31:24 -08001124 goto bail;
1125 }
1126
1127 status = -ENOSPC;
1128 /* for now, the chain search is a bit simplistic. We just use
1129 * the 1st group with any empty bits. */
1130 while ((status = ac->ac_group_search(alloc_inode, group_bh,
1131 bits_wanted, min_bits, bit_off,
1132 &tmp_bits)) == -ENOSPC) {
1133 if (!bg->bg_next_group)
1134 break;
1135
1136 if (prev_group_bh) {
1137 brelse(prev_group_bh);
1138 prev_group_bh = NULL;
1139 }
1140 next_group = le64_to_cpu(bg->bg_next_group);
1141 prev_group_bh = group_bh;
1142 group_bh = NULL;
1143 status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
1144 next_group, &group_bh,
1145 OCFS2_BH_CACHED, alloc_inode);
1146 if (status < 0) {
1147 mlog_errno(status);
1148 goto bail;
1149 }
1150 bg = (struct ocfs2_group_desc *) group_bh->b_data;
Mark Fasheh7bf72ed2006-05-03 17:46:50 -07001151 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
1152 if (status) {
1153 mlog_errno(status);
Mark Fashehccd979b2005-12-15 14:31:24 -08001154 goto bail;
1155 }
1156 }
1157 if (status < 0) {
1158 if (status != -ENOSPC)
1159 mlog_errno(status);
1160 goto bail;
1161 }
1162
Mark Fashehb06970532006-03-03 10:24:33 -08001163 mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
Mark Fasheh1ca1a112007-04-27 16:01:25 -07001164 tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno));
Mark Fashehccd979b2005-12-15 14:31:24 -08001165
1166 *num_bits = tmp_bits;
1167
1168 BUG_ON(*num_bits == 0);
1169
1170 /*
1171 * Keep track of previous block descriptor read. When
1172 * we find a target, if we have read more than X
1173 * number of descriptors, and the target is reasonably
1174 * empty, relink him to top of his chain.
1175 *
1176 * We've read 0 extra blocks and only send one more to
1177 * the transaction, yet the next guy to search has a
1178 * much easier time.
1179 *
1180 * Do this *after* figuring out how many bits we're taking out
1181 * of our target group.
1182 */
1183 if (ac->ac_allow_chain_relink &&
1184 (prev_group_bh) &&
1185 (ocfs2_block_group_reasonably_empty(bg, *num_bits))) {
1186 status = ocfs2_relink_block_group(handle, alloc_inode,
1187 ac->ac_bh, group_bh,
1188 prev_group_bh, chain);
1189 if (status < 0) {
1190 mlog_errno(status);
1191 goto bail;
1192 }
1193 }
1194
1195 /* Ok, claim our bits now: set the info on dinode, chainlist
1196 * and then the group */
1197 status = ocfs2_journal_access(handle,
1198 alloc_inode,
1199 ac->ac_bh,
1200 OCFS2_JOURNAL_ACCESS_WRITE);
1201 if (status < 0) {
1202 mlog_errno(status);
1203 goto bail;
1204 }
1205
1206 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
1207 fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used);
1208 le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits));
1209
1210 status = ocfs2_journal_dirty(handle,
1211 ac->ac_bh);
1212 if (status < 0) {
1213 mlog_errno(status);
1214 goto bail;
1215 }
1216
1217 status = ocfs2_block_group_set_bits(handle,
1218 alloc_inode,
1219 bg,
1220 group_bh,
1221 *bit_off,
1222 *num_bits);
1223 if (status < 0) {
1224 mlog_errno(status);
1225 goto bail;
1226 }
1227
Mark Fashehb06970532006-03-03 10:24:33 -08001228 mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits,
Mark Fasheh1ca1a112007-04-27 16:01:25 -07001229 (unsigned long long)le64_to_cpu(fe->i_blkno));
Mark Fashehccd979b2005-12-15 14:31:24 -08001230
1231 *bg_blkno = le64_to_cpu(bg->bg_blkno);
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001232 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
Mark Fashehccd979b2005-12-15 14:31:24 -08001233bail:
1234 if (group_bh)
1235 brelse(group_bh);
1236 if (prev_group_bh)
1237 brelse(prev_group_bh);
1238
1239 mlog_exit(status);
1240 return status;
1241}
1242
1243/* will give out up to bits_wanted contiguous bits. */
1244static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1245 struct ocfs2_alloc_context *ac,
Mark Fasheh1fabe142006-10-09 18:11:45 -07001246 handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001247 u32 bits_wanted,
1248 u32 min_bits,
1249 u16 *bit_off,
1250 unsigned int *num_bits,
1251 u64 *bg_blkno)
1252{
1253 int status;
1254 u16 victim, i;
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001255 u16 bits_left = 0;
1256 u64 hint_blkno = ac->ac_last_group;
Mark Fashehccd979b2005-12-15 14:31:24 -08001257 struct ocfs2_chain_list *cl;
1258 struct ocfs2_dinode *fe;
1259
1260 mlog_entry_void();
1261
1262 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
1263 BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given));
1264 BUG_ON(!ac->ac_bh);
1265
1266 fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
1267 if (!OCFS2_IS_VALID_DINODE(fe)) {
1268 OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe);
1269 status = -EIO;
1270 goto bail;
1271 }
1272 if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
1273 le32_to_cpu(fe->id1.bitmap1.i_total)) {
Mark Fashehb06970532006-03-03 10:24:33 -08001274 ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
1275 "bits but only %u total.",
1276 (unsigned long long)le64_to_cpu(fe->i_blkno),
Mark Fashehccd979b2005-12-15 14:31:24 -08001277 le32_to_cpu(fe->id1.bitmap1.i_used),
1278 le32_to_cpu(fe->id1.bitmap1.i_total));
1279 status = -EIO;
1280 goto bail;
1281 }
1282
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001283 if (hint_blkno) {
1284 /* Attempt to short-circuit the usual search mechanism
1285 * by jumping straight to the most recently used
1286 * allocation group. This helps us mantain some
1287 * contiguousness across allocations. */
Mark Fashehda5cbf22006-10-06 18:34:35 -07001288 status = ocfs2_search_one_group(ac, handle, bits_wanted,
1289 min_bits, bit_off, num_bits,
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001290 hint_blkno, &bits_left);
1291 if (!status) {
1292 /* Be careful to update *bg_blkno here as the
1293 * caller is expecting it to be filled in, and
1294 * ocfs2_search_one_group() won't do that for
1295 * us. */
1296 *bg_blkno = hint_blkno;
1297 goto set_hint;
1298 }
1299 if (status < 0 && status != -ENOSPC) {
1300 mlog_errno(status);
1301 goto bail;
1302 }
1303 }
1304
Mark Fashehccd979b2005-12-15 14:31:24 -08001305 cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1306
1307 victim = ocfs2_find_victim_chain(cl);
1308 ac->ac_chain = victim;
1309 ac->ac_allow_chain_relink = 1;
1310
Mark Fashehda5cbf22006-10-06 18:34:35 -07001311 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off,
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001312 num_bits, bg_blkno, &bits_left);
Mark Fashehccd979b2005-12-15 14:31:24 -08001313 if (!status)
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001314 goto set_hint;
Mark Fashehccd979b2005-12-15 14:31:24 -08001315 if (status < 0 && status != -ENOSPC) {
1316 mlog_errno(status);
1317 goto bail;
1318 }
1319
1320 mlog(0, "Search of victim chain %u came up with nothing, "
1321 "trying all chains now.\n", victim);
1322
1323 /* If we didn't pick a good victim, then just default to
1324 * searching each chain in order. Don't allow chain relinking
1325 * because we only calculate enough journal credits for one
1326 * relink per alloc. */
1327 ac->ac_allow_chain_relink = 0;
1328 for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
1329 if (i == victim)
1330 continue;
1331 if (!cl->cl_recs[i].c_free)
1332 continue;
1333
1334 ac->ac_chain = i;
Mark Fashehda5cbf22006-10-06 18:34:35 -07001335 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001336 bit_off, num_bits, bg_blkno,
1337 &bits_left);
Mark Fashehccd979b2005-12-15 14:31:24 -08001338 if (!status)
1339 break;
1340 if (status < 0 && status != -ENOSPC) {
1341 mlog_errno(status);
1342 goto bail;
1343 }
1344 }
Mark Fashehccd979b2005-12-15 14:31:24 -08001345
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001346set_hint:
1347 if (status != -ENOSPC) {
1348 /* If the next search of this group is not likely to
1349 * yield a suitable extent, then we reset the last
1350 * group hint so as to not waste a disk read */
1351 if (bits_left < min_bits)
1352 ac->ac_last_group = 0;
1353 else
1354 ac->ac_last_group = *bg_blkno;
1355 }
1356
1357bail:
Mark Fashehccd979b2005-12-15 14:31:24 -08001358 mlog_exit(status);
1359 return status;
1360}
1361
1362int ocfs2_claim_metadata(struct ocfs2_super *osb,
Mark Fasheh1fabe142006-10-09 18:11:45 -07001363 handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001364 struct ocfs2_alloc_context *ac,
1365 u32 bits_wanted,
1366 u16 *suballoc_bit_start,
1367 unsigned int *num_bits,
1368 u64 *blkno_start)
1369{
1370 int status;
1371 u64 bg_blkno;
1372
1373 BUG_ON(!ac);
1374 BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted));
1375 BUG_ON(ac->ac_which != OCFS2_AC_USE_META);
Mark Fashehccd979b2005-12-15 14:31:24 -08001376
1377 status = ocfs2_claim_suballoc_bits(osb,
1378 ac,
Mark Fashehda5cbf22006-10-06 18:34:35 -07001379 handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001380 bits_wanted,
1381 1,
1382 suballoc_bit_start,
1383 num_bits,
1384 &bg_blkno);
1385 if (status < 0) {
1386 mlog_errno(status);
1387 goto bail;
1388 }
1389 atomic_inc(&osb->alloc_stats.bg_allocs);
1390
1391 *blkno_start = bg_blkno + (u64) *suballoc_bit_start;
1392 ac->ac_bits_given += (*num_bits);
1393 status = 0;
1394bail:
1395 mlog_exit(status);
1396 return status;
1397}
1398
1399int ocfs2_claim_new_inode(struct ocfs2_super *osb,
Mark Fasheh1fabe142006-10-09 18:11:45 -07001400 handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001401 struct ocfs2_alloc_context *ac,
1402 u16 *suballoc_bit,
1403 u64 *fe_blkno)
1404{
1405 int status;
1406 unsigned int num_bits;
1407 u64 bg_blkno;
1408
1409 mlog_entry_void();
1410
1411 BUG_ON(!ac);
1412 BUG_ON(ac->ac_bits_given != 0);
1413 BUG_ON(ac->ac_bits_wanted != 1);
1414 BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
Mark Fashehccd979b2005-12-15 14:31:24 -08001415
1416 status = ocfs2_claim_suballoc_bits(osb,
1417 ac,
Mark Fashehda5cbf22006-10-06 18:34:35 -07001418 handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001419 1,
1420 1,
1421 suballoc_bit,
1422 &num_bits,
1423 &bg_blkno);
1424 if (status < 0) {
1425 mlog_errno(status);
1426 goto bail;
1427 }
1428 atomic_inc(&osb->alloc_stats.bg_allocs);
1429
1430 BUG_ON(num_bits != 1);
1431
1432 *fe_blkno = bg_blkno + (u64) (*suballoc_bit);
1433 ac->ac_bits_given++;
1434 status = 0;
1435bail:
1436 mlog_exit(status);
1437 return status;
1438}
1439
1440/* translate a group desc. blkno and it's bitmap offset into
1441 * disk cluster offset. */
1442static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
1443 u64 bg_blkno,
1444 u16 bg_bit_off)
1445{
1446 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1447 u32 cluster = 0;
1448
1449 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1450
1451 if (bg_blkno != osb->first_cluster_group_blkno)
1452 cluster = ocfs2_blocks_to_clusters(inode->i_sb, bg_blkno);
1453 cluster += (u32) bg_bit_off;
1454 return cluster;
1455}
1456
1457/* given a cluster offset, calculate which block group it belongs to
1458 * and return that block offset. */
Tao Mad6590722007-12-18 15:47:03 +08001459u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster)
Mark Fashehccd979b2005-12-15 14:31:24 -08001460{
1461 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1462 u32 group_no;
1463
1464 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1465
1466 group_no = cluster / osb->bitmap_cpg;
1467 if (!group_no)
1468 return osb->first_cluster_group_blkno;
1469 return ocfs2_clusters_to_blocks(inode->i_sb,
1470 group_no * osb->bitmap_cpg);
1471}
1472
1473/* given the block number of a cluster start, calculate which cluster
1474 * group and descriptor bitmap offset that corresponds to. */
1475static inline void ocfs2_block_to_cluster_group(struct inode *inode,
1476 u64 data_blkno,
1477 u64 *bg_blkno,
1478 u16 *bg_bit_off)
1479{
1480 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1481 u32 data_cluster = ocfs2_blocks_to_clusters(osb->sb, data_blkno);
1482
1483 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1484
1485 *bg_blkno = ocfs2_which_cluster_group(inode,
1486 data_cluster);
1487
1488 if (*bg_blkno == osb->first_cluster_group_blkno)
1489 *bg_bit_off = (u16) data_cluster;
1490 else
1491 *bg_bit_off = (u16) ocfs2_blocks_to_clusters(osb->sb,
1492 data_blkno - *bg_blkno);
1493}
1494
1495/*
1496 * min_bits - minimum contiguous chunk from this total allocation we
1497 * can handle. set to what we asked for originally for a full
1498 * contig. allocation, set to '1' to indicate we can deal with extents
1499 * of any size.
1500 */
Mark Fasheh415cb802007-09-16 20:10:16 -07001501int __ocfs2_claim_clusters(struct ocfs2_super *osb,
1502 handle_t *handle,
1503 struct ocfs2_alloc_context *ac,
1504 u32 min_clusters,
1505 u32 max_clusters,
1506 u32 *cluster_start,
1507 u32 *num_clusters)
Mark Fashehccd979b2005-12-15 14:31:24 -08001508{
1509 int status;
Mark Fasheh415cb802007-09-16 20:10:16 -07001510 unsigned int bits_wanted = max_clusters;
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001511 u64 bg_blkno = 0;
Mark Fashehccd979b2005-12-15 14:31:24 -08001512 u16 bg_bit_off;
1513
1514 mlog_entry_void();
1515
Mark Fashehccd979b2005-12-15 14:31:24 -08001516 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
1517
1518 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
1519 && ac->ac_which != OCFS2_AC_USE_MAIN);
Mark Fashehccd979b2005-12-15 14:31:24 -08001520
1521 if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
1522 status = ocfs2_claim_local_alloc_bits(osb,
1523 handle,
1524 ac,
1525 bits_wanted,
1526 cluster_start,
1527 num_clusters);
1528 if (!status)
1529 atomic_inc(&osb->alloc_stats.local_data);
1530 } else {
1531 if (min_clusters > (osb->bitmap_cpg - 1)) {
1532 /* The only paths asking for contiguousness
1533 * should know about this already. */
Sunil Mushran2fbe8d12007-12-20 14:58:11 -08001534 mlog(ML_ERROR, "minimum allocation requested %u exceeds "
1535 "group bitmap size %u!\n", min_clusters,
1536 osb->bitmap_cpg);
Mark Fashehccd979b2005-12-15 14:31:24 -08001537 status = -ENOSPC;
1538 goto bail;
1539 }
1540 /* clamp the current request down to a realistic size. */
1541 if (bits_wanted > (osb->bitmap_cpg - 1))
1542 bits_wanted = osb->bitmap_cpg - 1;
1543
1544 status = ocfs2_claim_suballoc_bits(osb,
1545 ac,
Mark Fashehda5cbf22006-10-06 18:34:35 -07001546 handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001547 bits_wanted,
1548 min_clusters,
1549 &bg_bit_off,
1550 num_clusters,
1551 &bg_blkno);
1552 if (!status) {
1553 *cluster_start =
1554 ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode,
1555 bg_blkno,
1556 bg_bit_off);
1557 atomic_inc(&osb->alloc_stats.bitmap_data);
1558 }
1559 }
1560 if (status < 0) {
1561 if (status != -ENOSPC)
1562 mlog_errno(status);
1563 goto bail;
1564 }
1565
1566 ac->ac_bits_given += *num_clusters;
1567
1568bail:
1569 mlog_exit(status);
1570 return status;
1571}
1572
Mark Fasheh415cb802007-09-16 20:10:16 -07001573int ocfs2_claim_clusters(struct ocfs2_super *osb,
1574 handle_t *handle,
1575 struct ocfs2_alloc_context *ac,
1576 u32 min_clusters,
1577 u32 *cluster_start,
1578 u32 *num_clusters)
1579{
1580 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
1581
1582 return __ocfs2_claim_clusters(osb, handle, ac, min_clusters,
1583 bits_wanted, cluster_start, num_clusters);
1584}
1585
Mark Fasheh1fabe142006-10-09 18:11:45 -07001586static inline int ocfs2_block_group_clear_bits(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001587 struct inode *alloc_inode,
1588 struct ocfs2_group_desc *bg,
1589 struct buffer_head *group_bh,
1590 unsigned int bit_off,
1591 unsigned int num_bits)
1592{
1593 int status;
1594 unsigned int tmp;
1595 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
1596 struct ocfs2_group_desc *undo_bg = NULL;
1597
1598 mlog_entry_void();
1599
1600 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
1601 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
1602 status = -EIO;
1603 goto bail;
1604 }
1605
1606 mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
1607
1608 if (ocfs2_is_cluster_bitmap(alloc_inode))
1609 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
1610
1611 status = ocfs2_journal_access(handle, alloc_inode, group_bh,
1612 journal_type);
1613 if (status < 0) {
1614 mlog_errno(status);
1615 goto bail;
1616 }
1617
1618 if (ocfs2_is_cluster_bitmap(alloc_inode))
1619 undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data;
1620
1621 tmp = num_bits;
1622 while(tmp--) {
1623 ocfs2_clear_bit((bit_off + tmp),
1624 (unsigned long *) bg->bg_bitmap);
1625 if (ocfs2_is_cluster_bitmap(alloc_inode))
1626 ocfs2_set_bit(bit_off + tmp,
1627 (unsigned long *) undo_bg->bg_bitmap);
1628 }
1629 le16_add_cpu(&bg->bg_free_bits_count, num_bits);
1630
1631 status = ocfs2_journal_dirty(handle, group_bh);
1632 if (status < 0)
1633 mlog_errno(status);
1634bail:
1635 return status;
1636}
1637
1638/*
1639 * expects the suballoc inode to already be locked.
1640 */
Mark Fasheh2b604352007-06-22 15:45:27 -07001641int ocfs2_free_suballoc_bits(handle_t *handle,
1642 struct inode *alloc_inode,
1643 struct buffer_head *alloc_bh,
1644 unsigned int start_bit,
1645 u64 bg_blkno,
1646 unsigned int count)
Mark Fashehccd979b2005-12-15 14:31:24 -08001647{
1648 int status = 0;
1649 u32 tmp_used;
1650 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
1651 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
1652 struct ocfs2_chain_list *cl = &fe->id2.i_chain;
1653 struct buffer_head *group_bh = NULL;
1654 struct ocfs2_group_desc *group;
1655
1656 mlog_entry_void();
1657
1658 if (!OCFS2_IS_VALID_DINODE(fe)) {
1659 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
1660 status = -EIO;
1661 goto bail;
1662 }
1663 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
1664
Mark Fashehb06970532006-03-03 10:24:33 -08001665 mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
1666 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
1667 (unsigned long long)bg_blkno, start_bit);
Mark Fashehccd979b2005-12-15 14:31:24 -08001668
1669 status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED,
1670 alloc_inode);
1671 if (status < 0) {
1672 mlog_errno(status);
1673 goto bail;
1674 }
1675
1676 group = (struct ocfs2_group_desc *) group_bh->b_data;
Mark Fasheh7bf72ed2006-05-03 17:46:50 -07001677 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
1678 if (status) {
1679 mlog_errno(status);
Mark Fashehccd979b2005-12-15 14:31:24 -08001680 goto bail;
1681 }
1682 BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
1683
1684 status = ocfs2_block_group_clear_bits(handle, alloc_inode,
1685 group, group_bh,
1686 start_bit, count);
1687 if (status < 0) {
1688 mlog_errno(status);
1689 goto bail;
1690 }
1691
1692 status = ocfs2_journal_access(handle, alloc_inode, alloc_bh,
1693 OCFS2_JOURNAL_ACCESS_WRITE);
1694 if (status < 0) {
1695 mlog_errno(status);
1696 goto bail;
1697 }
1698
1699 le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free,
1700 count);
1701 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
1702 fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);
1703
1704 status = ocfs2_journal_dirty(handle, alloc_bh);
1705 if (status < 0) {
1706 mlog_errno(status);
1707 goto bail;
1708 }
1709
1710bail:
1711 if (group_bh)
1712 brelse(group_bh);
1713
1714 mlog_exit(status);
1715 return status;
1716}
1717
Mark Fasheh1fabe142006-10-09 18:11:45 -07001718int ocfs2_free_dinode(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001719 struct inode *inode_alloc_inode,
1720 struct buffer_head *inode_alloc_bh,
1721 struct ocfs2_dinode *di)
1722{
1723 u64 blk = le64_to_cpu(di->i_blkno);
1724 u16 bit = le16_to_cpu(di->i_suballoc_bit);
1725 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1726
1727 return ocfs2_free_suballoc_bits(handle, inode_alloc_inode,
1728 inode_alloc_bh, bit, bg_blkno, 1);
1729}
1730
Mark Fasheh1fabe142006-10-09 18:11:45 -07001731int ocfs2_free_clusters(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001732 struct inode *bitmap_inode,
1733 struct buffer_head *bitmap_bh,
1734 u64 start_blk,
1735 unsigned int num_clusters)
1736{
1737 int status;
1738 u16 bg_start_bit;
1739 u64 bg_blkno;
1740 struct ocfs2_dinode *fe;
1741
1742 /* You can't ever have a contiguous set of clusters
1743 * bigger than a block group bitmap so we never have to worry
1744 * about looping on them. */
1745
1746 mlog_entry_void();
1747
1748 /* This is expensive. We can safely remove once this stuff has
1749 * gotten tested really well. */
1750 BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk)));
1751
1752 fe = (struct ocfs2_dinode *) bitmap_bh->b_data;
1753
1754 ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
1755 &bg_start_bit);
1756
Mark Fashehb06970532006-03-03 10:24:33 -08001757 mlog(0, "want to free %u clusters starting at block %llu\n",
1758 num_clusters, (unsigned long long)start_blk);
1759 mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
1760 (unsigned long long)bg_blkno, bg_start_bit);
Mark Fashehccd979b2005-12-15 14:31:24 -08001761
1762 status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
1763 bg_start_bit, bg_blkno,
1764 num_clusters);
1765 if (status < 0)
1766 mlog_errno(status);
1767
1768 mlog_exit(status);
1769 return status;
1770}
1771
1772static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
1773{
1774 printk("Block Group:\n");
1775 printk("bg_signature: %s\n", bg->bg_signature);
1776 printk("bg_size: %u\n", bg->bg_size);
1777 printk("bg_bits: %u\n", bg->bg_bits);
1778 printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
1779 printk("bg_chain: %u\n", bg->bg_chain);
1780 printk("bg_generation: %u\n", le32_to_cpu(bg->bg_generation));
Mark Fashehb06970532006-03-03 10:24:33 -08001781 printk("bg_next_group: %llu\n",
1782 (unsigned long long)bg->bg_next_group);
1783 printk("bg_parent_dinode: %llu\n",
1784 (unsigned long long)bg->bg_parent_dinode);
1785 printk("bg_blkno: %llu\n",
1786 (unsigned long long)bg->bg_blkno);
Mark Fashehccd979b2005-12-15 14:31:24 -08001787}
1788
1789static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
1790{
1791 int i;
1792
Mark Fashehb06970532006-03-03 10:24:33 -08001793 printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno);
Mark Fashehccd979b2005-12-15 14:31:24 -08001794 printk("i_signature: %s\n", fe->i_signature);
Mark Fashehb06970532006-03-03 10:24:33 -08001795 printk("i_size: %llu\n",
1796 (unsigned long long)fe->i_size);
Mark Fashehccd979b2005-12-15 14:31:24 -08001797 printk("i_clusters: %u\n", fe->i_clusters);
1798 printk("i_generation: %u\n",
1799 le32_to_cpu(fe->i_generation));
1800 printk("id1.bitmap1.i_used: %u\n",
1801 le32_to_cpu(fe->id1.bitmap1.i_used));
1802 printk("id1.bitmap1.i_total: %u\n",
1803 le32_to_cpu(fe->id1.bitmap1.i_total));
1804 printk("id2.i_chain.cl_cpg: %u\n", fe->id2.i_chain.cl_cpg);
1805 printk("id2.i_chain.cl_bpc: %u\n", fe->id2.i_chain.cl_bpc);
1806 printk("id2.i_chain.cl_count: %u\n", fe->id2.i_chain.cl_count);
1807 printk("id2.i_chain.cl_next_free_rec: %u\n",
1808 fe->id2.i_chain.cl_next_free_rec);
1809 for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) {
1810 printk("fe->id2.i_chain.cl_recs[%d].c_free: %u\n", i,
1811 fe->id2.i_chain.cl_recs[i].c_free);
1812 printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
1813 fe->id2.i_chain.cl_recs[i].c_total);
Mark Fashehb06970532006-03-03 10:24:33 -08001814 printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
1815 (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
Mark Fashehccd979b2005-12-15 14:31:24 -08001816 }
1817}