bitmap.c 39.6 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6
/*
 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
 */
/* Reiserfs block (de)allocator, bitmap-based. */

#include <linux/time.h>
7
#include "reiserfs.h"
Linus Torvalds's avatar
Linus Torvalds committed
8 9 10 11
#include <linux/errno.h>
#include <linux/buffer_head.h>
#include <linux/kernel.h>
#include <linux/pagemap.h>
12
#include <linux/vmalloc.h>
Linus Torvalds's avatar
Linus Torvalds committed
13
#include <linux/quotaops.h>
14
#include <linux/seq_file.h>
Linus Torvalds's avatar
Linus Torvalds committed
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41

#define PREALLOCATION_SIZE 9

/* different reiserfs block allocator options */

#define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)

#define  _ALLOC_concentrating_formatted_nodes 0
#define  _ALLOC_displacing_large_files 1
#define  _ALLOC_displacing_new_packing_localities 2
#define  _ALLOC_old_hashed_relocation 3
#define  _ALLOC_new_hashed_relocation 4
#define  _ALLOC_skip_busy 5
#define  _ALLOC_displace_based_on_dirid 6
#define  _ALLOC_hashed_formatted_nodes 7
#define  _ALLOC_old_way 8
#define  _ALLOC_hundredth_slices 9
#define  _ALLOC_dirid_groups 10
#define  _ALLOC_oid_groups 11
#define  _ALLOC_packing_groups 12

#define  concentrating_formatted_nodes(s)	test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
#define  displacing_large_files(s)		test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
#define  displacing_new_packing_localities(s)	test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))

#define SET_OPTION(optname) \
   do { \
42 43
	reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \
	set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
Linus Torvalds's avatar
Linus Torvalds committed
44 45 46 47
    } while(0)
#define TEST_OPTION(optname, s) \
    test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))

48
static inline void get_bit_address(struct super_block *s,
49 50 51
				   b_blocknr_t block,
				   unsigned int *bmap_nr,
				   unsigned int *offset)
Linus Torvalds's avatar
Linus Torvalds committed
52
{
53 54 55 56
	/*
	 * It is in the bitmap block number equal to the block
	 * number divided by the number of bits in a block.
	 */
57
	*bmap_nr = block >> (s->s_blocksize_bits + 3);
58 59
	/* Within that bitmap block it is located at bit offset *offset. */
	*offset = block & ((s->s_blocksize << 3) - 1);
Linus Torvalds's avatar
Linus Torvalds committed
60 61
}

62
int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
Linus Torvalds's avatar
Linus Torvalds committed
63
{
64
	unsigned int bmap, offset;
65
	unsigned int bmap_count = reiserfs_bmap_count(s);
Linus Torvalds's avatar
Linus Torvalds committed
66

67
	if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
68 69 70
		reiserfs_error(s, "vs-4010",
			       "block number is out of range %lu (%u)",
			       block, SB_BLOCK_COUNT(s));
71
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
72 73
	}

74 75
	get_bit_address(s, block, &bmap, &offset);

76 77 78 79
	/*
	 * Old format filesystem? Unlikely, but the bitmaps are all
	 * up front so we need to account for it.
	 */
80
	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
81
			      &REISERFS_SB(s)->s_properties))) {
82
		b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
83 84
		if (block >= bmap1 &&
		    block <= bmap1 + bmap_count) {
85 86 87
			reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) "
				       "can't be freed or reused",
				       block, bmap_count);
88 89 90 91
			return 0;
		}
	} else {
		if (offset == 0) {
92 93 94
			reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) "
				       "can't be freed or reused",
				       block, bmap_count);
95 96
			return 0;
		}
97
	}
Linus Torvalds's avatar
Linus Torvalds committed
98

99
	if (bmap >= bmap_count) {
100 101 102
		reiserfs_error(s, "vs-4030", "bitmap for requested block "
			       "is out of range: block=%lu, bitmap_nr=%u",
			       block, bmap);
103 104
		return 0;
	}
Linus Torvalds's avatar
Linus Torvalds committed
105

106
	if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
107 108
		reiserfs_error(s, "vs-4050", "this is root block (%u), "
			       "it must be busy", SB_ROOT_BLOCK(s));
109 110 111 112
		return 0;
	}

	return 1;
Linus Torvalds's avatar
Linus Torvalds committed
113 114
}

115 116 117 118 119
/*
 * Searches in journal structures for a given block number (bmap, off).
 * If block is found in reiserfs journal it suggests next free block
 * candidate to test.
 */
120 121
static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
				      int off, int *next)
Linus Torvalds's avatar
Linus Torvalds committed
122
{
123 124 125 126 127 128 129
	b_blocknr_t tmp;

	if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
		if (tmp) {	/* hint supplied */
			*next = tmp;
			PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
		} else {
130
			(*next) = off + 1;  /* inc offset to avoid looping. */
131 132 133 134
			PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
		}
		PROC_INFO_INC(s, scan_bitmap.retry);
		return 1;
Linus Torvalds's avatar
Linus Torvalds committed
135
	}
136
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
137 138
}

139 140 141 142
/*
 * Searches for a window of zero bits with given minimum and maximum
 * lengths in one bitmap block
 */
143
static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
144 145
			     unsigned int bmap_n, int *beg, int boundary,
			     int min, int max, int unfm)
Linus Torvalds's avatar
Linus Torvalds committed
146
{
147 148
	struct super_block *s = th->t_super;
	struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
149
	struct buffer_head *bh;
150 151
	int end, next;
	int org = *beg;
Linus Torvalds's avatar
Linus Torvalds committed
152

153
	BUG_ON(!th->t_trans_id);
154 155
	RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
	       "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
156
	PROC_INFO_INC(s, scan_bitmap.bmap);
Linus Torvalds's avatar
Linus Torvalds committed
157

158
	if (!bi) {
159 160
		reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
			       "for bitmap %d", bmap_n);
161 162
		return 0;
	}
163

164 165 166
	bh = reiserfs_read_bitmap_block(s, bmap_n);
	if (bh == NULL)
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
167

168
	while (1) {
169
cont:
170 171
		if (bi->free_count < min) {
			brelse(bh);
172
			return 0;	/* No free blocks in this bitmap */
173
		}
174

175
		/* search for a first zero bit -- beginning of a window */
176
		*beg = reiserfs_find_next_zero_le_bit
177
		    ((unsigned long *)(bh->b_data), boundary, *beg);
178

179 180 181 182 183
		/*
		 * search for a zero bit fails or the rest of bitmap block
		 * cannot contain a zero window of minimum size
		 */
		if (*beg + min > boundary) {
184
			brelse(bh);
185
			return 0;
Linus Torvalds's avatar
Linus Torvalds committed
186 187
		}

188 189 190 191 192
		if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
			continue;
		/* first zero bit found; we check next bits */
		for (end = *beg + 1;; end++) {
			if (end >= *beg + max || end >= boundary
193
			    || reiserfs_test_le_bit(end, bh->b_data)) {
194 195 196
				next = end;
				break;
			}
197 198 199 200 201 202

			/*
			 * finding the other end of zero bit window requires
			 * looking into journal structures (in case of
			 * searching for free blocks for unformatted nodes)
			 */
203 204 205
			if (unfm && is_block_in_journal(s, bmap_n, end, &next))
				break;
		}
Linus Torvalds's avatar
Linus Torvalds committed
206

207 208 209 210 211 212 213
		/*
		 * now (*beg) points to beginning of zero bits window,
		 * (end) points to one bit after the window end
		 */

		/* found window of proper size */
		if (end - *beg >= min) {
214
			int i;
215
			reiserfs_prepare_for_journal(s, bh, 1);
216 217 218 219
			/*
			 * try to set all blocks used checking are
			 * they still free
			 */
220
			for (i = *beg; i < end; i++) {
221
				/* Don't check in journal again. */
222
				if (reiserfs_test_and_set_le_bit
223
				    (i, bh->b_data)) {
224 225 226 227
					/*
					 * bit was set by another process while
					 * we slept in prepare_for_journal()
					 */
228
					PROC_INFO_INC(s, scan_bitmap.stolen);
229 230 231 232 233 234 235

					/*
					 * we can continue with smaller set
					 * of allocated blocks, if length of
					 * this set is more or equal to `min'
					 */
					if (i >= *beg + min) {
236 237 238
						end = i;
						break;
					}
239 240 241 242 243

					/*
					 * otherwise we clear all bit
					 * were set ...
					 */
244
					while (--i >= *beg)
245
						reiserfs_clear_le_bit
246 247
						    (i, bh->b_data);
					reiserfs_restore_prepared_buffer(s, bh);
248
					*beg = org;
249 250 251 252 253

					/*
					 * Search again in current block
					 * from beginning
					 */
254 255 256 257
					goto cont;
				}
			}
			bi->free_count -= (end - *beg);
258
			journal_mark_dirty(th, bh);
259
			brelse(bh);
260 261 262 263 264

			/* free block count calculation */
			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
						     1);
			PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
265
			journal_mark_dirty(th, SB_BUFFER_WITH_SB(s));
266 267 268 269 270

			return end - (*beg);
		} else {
			*beg = next;
		}
Linus Torvalds's avatar
Linus Torvalds committed
271 272 273
	}
}

274 275 276 277 278 279 280 281 282 283 284
static int bmap_hash_id(struct super_block *s, u32 id)
{
	char *hash_in = NULL;
	unsigned long hash;
	unsigned bm;

	if (id <= 2) {
		bm = 1;
	} else {
		hash_in = (char *)(&id);
		hash = keyed_hash(hash_in, 4);
285
		bm = hash % reiserfs_bmap_count(s);
286 287 288 289
		if (!bm)
			bm = 1;
	}
	/* this can only be true when SB_BMAP_NR = 1 */
290
	if (bm >= reiserfs_bmap_count(s))
291 292
		bm = 0;
	return bm;
Linus Torvalds's avatar
Linus Torvalds committed
293 294 295 296 297 298
}

/*
 * hashes the id and then returns > 0 if the block group for the
 * corresponding hash is full
 */
299 300
static inline int block_group_used(struct super_block *s, u32 id)
{
301 302 303
	int bm = bmap_hash_id(s, id);
	struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm];

304 305
	/*
	 * If we don't have cached information on this bitmap block, we're
306
	 * going to have to load it later anyway. Loading it here allows us
Joe Perches's avatar
Joe Perches committed
307
	 * to make a better decision. This favors long-term performance gain
308
	 * with a better on-disk layout vs. a short term gain of skipping the
309 310
	 * read and potentially having a bad placement.
	 */
311
	if (info->free_count == UINT_MAX) {
312 313 314 315 316
		struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
		brelse(bh);
	}

	if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) {
317 318 319
		return 0;
	}
	return 1;
Linus Torvalds's avatar
Linus Torvalds committed
320 321 322 323 324
}

/*
 * the packing is returned in disk byte order
 */
325
__le32 reiserfs_choose_packing(struct inode * dir)
326
{
327 328 329 330 331 332 333 334 335 336 337 338 339 340
	__le32 packing;
	if (TEST_OPTION(packing_groups, dir->i_sb)) {
		u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
		/*
		 * some versions of reiserfsck expect packing locality 1 to be
		 * special
		 */
		if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
			packing = INODE_PKEY(dir)->k_objectid;
		else
			packing = INODE_PKEY(dir)->k_dir_id;
	} else
		packing = INODE_PKEY(dir)->k_objectid;
	return packing;
Linus Torvalds's avatar
Linus Torvalds committed
341
}
342

343 344 345 346
/*
 * Tries to find contiguous zero bit window (given size) in given region of
 * bitmap and place new blocks there. Returns number of allocated blocks.
 */
347 348
static int scan_bitmap(struct reiserfs_transaction_handle *th,
		       b_blocknr_t * start, b_blocknr_t finish,
349
		       int min, int max, int unfm, sector_t file_block)
Linus Torvalds's avatar
Linus Torvalds committed
350
{
351 352
	int nr_allocated = 0;
	struct super_block *s = th->t_super;
353 354 355
	unsigned int bm, off;
	unsigned int end_bm, end_off;
	unsigned int off_max = s->s_blocksize << 3;
356 357 358

	BUG_ON(!th->t_trans_id);
	PROC_INFO_INC(s, scan_bitmap.call);
359 360

	/* No point in looking for more free blocks */
361
	if (SB_FREE_BLOCKS(s) <= 0)
362
		return 0;
363 364 365

	get_bit_address(s, *start, &bm, &off);
	get_bit_address(s, finish, &end_bm, &end_off);
366
	if (bm > reiserfs_bmap_count(s))
367
		return 0;
368 369
	if (end_bm > reiserfs_bmap_count(s))
		end_bm = reiserfs_bmap_count(s);
370

371 372
	/*
	 * When the bitmap is more than 10% free, anyone can allocate.
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409
	 * When it's less than 10% free, only files that already use the
	 * bitmap are allowed. Once we pass 80% full, this restriction
	 * is lifted.
	 *
	 * We do this so that files that grow later still have space close to
	 * their original allocation. This improves locality, and presumably
	 * performance as a result.
	 *
	 * This is only an allocation policy and does not make up for getting a
	 * bad hint. Decent hinting must be implemented for this to work well.
	 */
	if (TEST_OPTION(skip_busy, s)
	    && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
		for (; bm < end_bm; bm++, off = 0) {
			if ((off && (!unfm || (file_block != 0)))
			    || SB_AP_BITMAP(s)[bm].free_count >
			    (s->s_blocksize << 3) / 10)
				nr_allocated =
				    scan_bitmap_block(th, bm, &off, off_max,
						      min, max, unfm);
			if (nr_allocated)
				goto ret;
		}
		/* we know from above that start is a reasonable number */
		get_bit_address(s, *start, &bm, &off);
	}

	for (; bm < end_bm; bm++, off = 0) {
		nr_allocated =
		    scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
		if (nr_allocated)
			goto ret;
	}

	nr_allocated =
	    scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);

410
ret:
411 412
	*start = bm * off_max + off;
	return nr_allocated;
Linus Torvalds's avatar
Linus Torvalds committed
413 414 415

}

416 417 418
static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
				 struct inode *inode, b_blocknr_t block,
				 int for_unformatted)
Linus Torvalds's avatar
Linus Torvalds committed
419
{
420 421
	struct super_block *s = th->t_super;
	struct reiserfs_super_block *rs;
422
	struct buffer_head *sbh, *bmbh;
423
	struct reiserfs_bitmap_info *apbi;
424
	unsigned int nr, offset;
Linus Torvalds's avatar
Linus Torvalds committed
425

426 427 428 429 430
	BUG_ON(!th->t_trans_id);
	PROC_INFO_INC(s, free_block);
	rs = SB_DISK_SUPER_BLOCK(s);
	sbh = SB_BUFFER_WITH_SB(s);
	apbi = SB_AP_BITMAP(s);
Linus Torvalds's avatar
Linus Torvalds committed
431

432
	get_bit_address(s, block, &nr, &offset);
Linus Torvalds's avatar
Linus Torvalds committed
433

434
	if (nr >= reiserfs_bmap_count(s)) {
435 436
		reiserfs_error(s, "vs-4075", "block %lu is out of range",
			       block);
437 438 439
		return;
	}

440 441 442
	bmbh = reiserfs_read_bitmap_block(s, nr);
	if (!bmbh)
		return;
443 444

	reiserfs_prepare_for_journal(s, bmbh, 1);
445 446

	/* clear bit for the given block in bit map */
447
	if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) {
448 449
		reiserfs_error(s, "vs-4080",
			       "block %lu: bit already cleared", block);
450 451
	}
	apbi[nr].free_count++;
452
	journal_mark_dirty(th, bmbh);
453
	brelse(bmbh);
454 455 456 457 458

	reiserfs_prepare_for_journal(s, sbh, 1);
	/* update super block */
	set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);

459
	journal_mark_dirty(th, sbh);
460 461
	if (for_unformatted) {
		int depth = reiserfs_write_unlock_nested(s);
462
		dquot_free_block_nodirty(inode, 1);
463 464
		reiserfs_write_lock_nested(s, depth);
	}
Linus Torvalds's avatar
Linus Torvalds committed
465 466
}

467 468 469
void reiserfs_free_block(struct reiserfs_transaction_handle *th,
			 struct inode *inode, b_blocknr_t block,
			 int for_unformatted)
Linus Torvalds's avatar
Linus Torvalds committed
470
{
471
	struct super_block *s = th->t_super;
Linus Torvalds's avatar
Linus Torvalds committed
472

473
	BUG_ON(!th->t_trans_id);
474
	RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
475 476 477 478
	if (!is_reusable(s, block, 1))
		return;

	if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
479
		reiserfs_error(th->t_super, "bitmap-4072",
480 481 482 483 484
			       "Trying to free block outside file system "
			       "boundaries (%lu > %lu)",
			       block, sb_block_count(REISERFS_SB(s)->s_rs));
		return;
	}
485 486 487
	/* mark it before we clear it, just in case */
	journal_mark_freed(th, s, block);
	_reiserfs_free_block(th, inode, block, for_unformatted);
Linus Torvalds's avatar
Linus Torvalds committed
488 489 490
}

/* preallocated blocks don't need to be run through journal_mark_freed */
491 492 493
static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
					 struct inode *inode, b_blocknr_t block)
{
494
	BUG_ON(!th->t_trans_id);
495 496
	RFALSE(!th->t_super,
	       "vs-4060: trying to free block on nonexistent device");
497 498
	if (!is_reusable(th->t_super, block, 1))
		return;
499
	_reiserfs_free_block(th, inode, block, 1);
Linus Torvalds's avatar
Linus Torvalds committed
500 501
}

502 503
static void __discard_prealloc(struct reiserfs_transaction_handle *th,
			       struct reiserfs_inode_info *ei)
Linus Torvalds's avatar
Linus Torvalds committed
504
{
505 506 507
	unsigned long save = ei->i_prealloc_block;
	int dirty = 0;
	struct inode *inode = &ei->vfs_inode;
508

509
	BUG_ON(!th->t_trans_id);
Linus Torvalds's avatar
Linus Torvalds committed
510
#ifdef CONFIG_REISERFS_CHECK
511
	if (ei->i_prealloc_count < 0)
512 513
		reiserfs_error(th->t_super, "zam-4001",
			       "inode has negative prealloc blocks count.");
Linus Torvalds's avatar
Linus Torvalds committed
514
#endif
515
	while (ei->i_prealloc_count > 0) {
516 517 518 519 520 521 522 523 524
		b_blocknr_t block_to_free;

		/*
		 * reiserfs_free_prealloc_block can drop the write lock,
		 * which could allow another caller to free the same block.
		 * We can protect against it by modifying the prealloc
		 * state before calling it.
		 */
		block_to_free = ei->i_prealloc_block++;
525
		ei->i_prealloc_count--;
526
		reiserfs_free_prealloc_block(th, inode, block_to_free);
527 528 529 530 531
		dirty = 1;
	}
	if (dirty)
		reiserfs_update_sd(th, inode);
	ei->i_prealloc_block = save;
532
	list_del_init(&ei->i_prealloc_list);
Linus Torvalds's avatar
Linus Torvalds committed
533 534 535
}

/* FIXME: It should be inline function */
536 537
void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
			       struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
538
{
539
	struct reiserfs_inode_info *ei = REISERFS_I(inode);
540

541 542 543
	BUG_ON(!th->t_trans_id);
	if (ei->i_prealloc_count)
		__discard_prealloc(th, ei);
Linus Torvalds's avatar
Linus Torvalds committed
544 545
}

546
void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
Linus Torvalds's avatar
Linus Torvalds committed
547
{
548
	struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
Linus Torvalds's avatar
Linus Torvalds committed
549

550 551 552 553 554
	BUG_ON(!th->t_trans_id);
	while (!list_empty(plist)) {
		struct reiserfs_inode_info *ei;
		ei = list_entry(plist->next, struct reiserfs_inode_info,
				i_prealloc_list);
Linus Torvalds's avatar
Linus Torvalds committed
555
#ifdef CONFIG_REISERFS_CHECK
556
		if (!ei->i_prealloc_count) {
557 558 559
			reiserfs_error(th->t_super, "zam-4001",
				       "inode is in prealloc list but has "
				       "no preallocated blocks.");
560
		}
Linus Torvalds's avatar
Linus Torvalds committed
561
#endif
562 563
		__discard_prealloc(th, ei);
	}
Linus Torvalds's avatar
Linus Torvalds committed
564 565
}

566
void reiserfs_init_alloc_options(struct super_block *s)
Linus Torvalds's avatar
Linus Torvalds committed
567
{
568 569 570
	set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
	set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
	set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
Linus Torvalds's avatar
Linus Torvalds committed
571 572 573
}

/* block allocator related options are parsed here */
574
int reiserfs_parse_alloc_options(struct super_block *s, char *options)
Linus Torvalds's avatar
Linus Torvalds committed
575
{
576 577
	char *this_char, *value;

578 579
	/* clear default settings */
	REISERFS_SB(s)->s_alloc_options.bits = 0;
580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608

	while ((this_char = strsep(&options, ":")) != NULL) {
		if ((value = strchr(this_char, '=')) != NULL)
			*value++ = 0;

		if (!strcmp(this_char, "concentrating_formatted_nodes")) {
			int temp;
			SET_OPTION(concentrating_formatted_nodes);
			temp = (value
				&& *value) ? simple_strtoul(value, &value,
							    0) : 10;
			if (temp <= 0 || temp > 100) {
				REISERFS_SB(s)->s_alloc_options.border = 10;
			} else {
				REISERFS_SB(s)->s_alloc_options.border =
				    100 / temp;
			}
			continue;
		}
		if (!strcmp(this_char, "displacing_large_files")) {
			SET_OPTION(displacing_large_files);
			REISERFS_SB(s)->s_alloc_options.large_file_size =
			    (value
			     && *value) ? simple_strtoul(value, &value, 0) : 16;
			continue;
		}
		if (!strcmp(this_char, "displacing_new_packing_localities")) {
			SET_OPTION(displacing_new_packing_localities);
			continue;
609
		}
610 611 612 613 614

		if (!strcmp(this_char, "old_hashed_relocation")) {
			SET_OPTION(old_hashed_relocation);
			continue;
		}
Linus Torvalds's avatar
Linus Torvalds committed
615

616 617 618 619
		if (!strcmp(this_char, "new_hashed_relocation")) {
			SET_OPTION(new_hashed_relocation);
			continue;
		}
Linus Torvalds's avatar
Linus Torvalds committed
620

621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
		if (!strcmp(this_char, "dirid_groups")) {
			SET_OPTION(dirid_groups);
			continue;
		}
		if (!strcmp(this_char, "oid_groups")) {
			SET_OPTION(oid_groups);
			continue;
		}
		if (!strcmp(this_char, "packing_groups")) {
			SET_OPTION(packing_groups);
			continue;
		}
		if (!strcmp(this_char, "hashed_formatted_nodes")) {
			SET_OPTION(hashed_formatted_nodes);
			continue;
		}
Linus Torvalds's avatar
Linus Torvalds committed
637

638 639 640 641
		if (!strcmp(this_char, "skip_busy")) {
			SET_OPTION(skip_busy);
			continue;
		}
Linus Torvalds's avatar
Linus Torvalds committed
642

643 644 645 646
		if (!strcmp(this_char, "hundredth_slices")) {
			SET_OPTION(hundredth_slices);
			continue;
		}
Linus Torvalds's avatar
Linus Torvalds committed
647

648 649 650 651
		if (!strcmp(this_char, "old_way")) {
			SET_OPTION(old_way);
			continue;
		}
Linus Torvalds's avatar
Linus Torvalds committed
652

653 654 655 656
		if (!strcmp(this_char, "displace_based_on_dirid")) {
			SET_OPTION(displace_based_on_dirid);
			continue;
		}
Linus Torvalds's avatar
Linus Torvalds committed
657

658 659 660 661 662 663 664 665 666 667 668 669 670 671 672
		if (!strcmp(this_char, "preallocmin")) {
			REISERFS_SB(s)->s_alloc_options.preallocmin =
			    (value
			     && *value) ? simple_strtoul(value, &value, 0) : 4;
			continue;
		}

		if (!strcmp(this_char, "preallocsize")) {
			REISERFS_SB(s)->s_alloc_options.preallocsize =
			    (value
			     && *value) ? simple_strtoul(value, &value,
							 0) :
			    PREALLOCATION_SIZE;
			continue;
		}
Linus Torvalds's avatar
Linus Torvalds committed
673

674 675
		reiserfs_warning(s, "zam-4001", "unknown option - %s",
				 this_char);
676
		return 1;
Linus Torvalds's avatar
Linus Torvalds committed
677 678
	}

679
	reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
680
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
681
}
682

683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772
static void print_sep(struct seq_file *seq, int *first)
{
	if (!*first)
		seq_puts(seq, ":");
	else
		*first = 0;
}

void show_alloc_options(struct seq_file *seq, struct super_block *s)
{
	int first = 1;

	if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) |
		(1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups)))
		return;

	seq_puts(seq, ",alloc=");

	if (TEST_OPTION(concentrating_formatted_nodes, s)) {
		print_sep(seq, &first);
		if (REISERFS_SB(s)->s_alloc_options.border != 10) {
			seq_printf(seq, "concentrating_formatted_nodes=%d",
				100 / REISERFS_SB(s)->s_alloc_options.border);
		} else
			seq_puts(seq, "concentrating_formatted_nodes");
	}
	if (TEST_OPTION(displacing_large_files, s)) {
		print_sep(seq, &first);
		if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) {
			seq_printf(seq, "displacing_large_files=%lu",
			    REISERFS_SB(s)->s_alloc_options.large_file_size);
		} else
			seq_puts(seq, "displacing_large_files");
	}
	if (TEST_OPTION(displacing_new_packing_localities, s)) {
		print_sep(seq, &first);
		seq_puts(seq, "displacing_new_packing_localities");
	}
	if (TEST_OPTION(old_hashed_relocation, s)) {
		print_sep(seq, &first);
		seq_puts(seq, "old_hashed_relocation");
	}
	if (TEST_OPTION(new_hashed_relocation, s)) {
		print_sep(seq, &first);
		seq_puts(seq, "new_hashed_relocation");
	}
	if (TEST_OPTION(dirid_groups, s)) {
		print_sep(seq, &first);
		seq_puts(seq, "dirid_groups");
	}
	if (TEST_OPTION(oid_groups, s)) {
		print_sep(seq, &first);
		seq_puts(seq, "oid_groups");
	}
	if (TEST_OPTION(packing_groups, s)) {
		print_sep(seq, &first);
		seq_puts(seq, "packing_groups");
	}
	if (TEST_OPTION(hashed_formatted_nodes, s)) {
		print_sep(seq, &first);
		seq_puts(seq, "hashed_formatted_nodes");
	}
	if (TEST_OPTION(skip_busy, s)) {
		print_sep(seq, &first);
		seq_puts(seq, "skip_busy");
	}
	if (TEST_OPTION(hundredth_slices, s)) {
		print_sep(seq, &first);
		seq_puts(seq, "hundredth_slices");
	}
	if (TEST_OPTION(old_way, s)) {
		print_sep(seq, &first);
		seq_puts(seq, "old_way");
	}
	if (TEST_OPTION(displace_based_on_dirid, s)) {
		print_sep(seq, &first);
		seq_puts(seq, "displace_based_on_dirid");
	}
	if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) {
		print_sep(seq, &first);
		seq_printf(seq, "preallocmin=%d",
				REISERFS_SB(s)->s_alloc_options.preallocmin);
	}
	if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) {
		print_sep(seq, &first);
		seq_printf(seq, "preallocsize=%d",
				REISERFS_SB(s)->s_alloc_options.preallocsize);
	}
}

773
static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
Linus Torvalds's avatar
Linus Torvalds committed
774
{
775
	char *hash_in;
776

777 778 779 780
	if (hint->formatted_node) {
		hash_in = (char *)&hint->key.k_dir_id;
	} else {
		if (!hint->inode) {
781
			/*hint->search_start = hint->beg;*/
782 783 784 785 786 787 788 789
			hash_in = (char *)&hint->key.k_dir_id;
		} else
		    if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
			hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
		else
			hash_in =
			    (char *)(&INODE_PKEY(hint->inode)->k_objectid);
	}
Linus Torvalds's avatar
Linus Torvalds committed
790

791 792
	hint->search_start =
	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
Linus Torvalds's avatar
Linus Torvalds committed
793 794 795 796
}

/*
 * Relocation based on dirid, hashing them into a given bitmap block
Joe Perches's avatar
Joe Perches committed
797
 * files. Formatted nodes are unaffected, a separate policy covers them
Linus Torvalds's avatar
Linus Torvalds committed
798
 */
799
static void dirid_groups(reiserfs_blocknr_hint_t * hint)
Linus Torvalds's avatar
Linus Torvalds committed
800
{
801 802 803 804
	unsigned long hash;
	__u32 dirid = 0;
	int bm = 0;
	struct super_block *sb = hint->th->t_super;
805

Linus Torvalds's avatar
Linus Torvalds committed
806
	if (hint->inode)
807 808 809 810 811 812 813 814 815 816 817 818
		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
	else if (hint->formatted_node)
		dirid = hint->key.k_dir_id;

	if (dirid) {
		bm = bmap_hash_id(sb, dirid);
		hash = bm * (sb->s_blocksize << 3);
		/* give a portion of the block group to metadata */
		if (hint->inode)
			hash += sb->s_blocksize / 2;
		hint->search_start = hash;
	}
Linus Torvalds's avatar
Linus Torvalds committed
819 820 821 822
}

/*
 * Relocation based on oid, hashing them into a given bitmap block
Joe Perches's avatar
Joe Perches committed
823
 * files. Formatted nodes are unaffected, a separate policy covers them
Linus Torvalds's avatar
Linus Torvalds committed
824
 */
825
static void oid_groups(reiserfs_blocknr_hint_t * hint)
Linus Torvalds's avatar
Linus Torvalds committed
826
{
827 828 829 830 831 832 833 834
	if (hint->inode) {
		unsigned long hash;
		__u32 oid;
		__u32 dirid;
		int bm;

		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);

835 836
		/*
		 * keep the root dir and it's first set of subdirs close to
837 838 839 840 841 842 843 844 845 846
		 * the start of the disk
		 */
		if (dirid <= 2)
			hash = (hint->inode->i_sb->s_blocksize << 3);
		else {
			oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
			bm = bmap_hash_id(hint->inode->i_sb, oid);
			hash = bm * (hint->inode->i_sb->s_blocksize << 3);
		}
		hint->search_start = hash;
Linus Torvalds's avatar
Linus Torvalds committed
847 848 849
	}
}

850 851
/*
 * returns 1 if it finds an indirect item and gets valid hint info
Linus Torvalds's avatar
Linus Torvalds committed
852 853
 * from it, otherwise 0
 */
854
static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
Linus Torvalds's avatar
Linus Torvalds committed
855
{
856
	struct treepath *path;
857 858 859 860 861 862
	struct buffer_head *bh;
	struct item_head *ih;
	int pos_in_item;
	__le32 *item;
	int ret = 0;

863 864 865 866 867
	/*
	 * reiserfs code can call this function w/o pointer to path
	 * structure supplied; then we rely on supplied search_start
	 */
	if (!hint->path)
868 869 870 871 872
		return 0;

	path = hint->path;
	bh = get_last_bh(path);
	RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
873
	ih = tp_item_head(path);
874
	pos_in_item = path->pos_in_item;
875
	item = tp_item_body(path);
876 877 878

	hint->search_start = bh->b_blocknr;

879 880 881 882
	/*
	 * for indirect item: go to left and look for the first non-hole entry
	 * in the indirect item
	 */
883 884 885 886 887 888 889 890 891 892 893 894
	if (!hint->formatted_node && is_indirect_le_ih(ih)) {
		if (pos_in_item == I_UNFM_NUM(ih))
			pos_in_item--;
		while (pos_in_item >= 0) {
			int t = get_block_num(item, pos_in_item);
			if (t) {
				hint->search_start = t;
				ret = 1;
				break;
			}
			pos_in_item--;
		}
Linus Torvalds's avatar
Linus Torvalds committed
895 896
	}

897 898
	/* does result value fit into specified region? */
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
899 900
}

901 902 903 904 905 906
/*
 * should be, if formatted node, then try to put on first part of the device
 * specified as number of percent with mount option device, else try to put
 * on last of device.  This is not to say it is good code to do so,
 * but the effect should be measured.
 */
907 908
static inline void set_border_in_hint(struct super_block *s,
				      reiserfs_blocknr_hint_t * hint)
Linus Torvalds's avatar
Linus Torvalds committed
909
{
910 911
	b_blocknr_t border =
	    SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
Linus Torvalds's avatar
Linus Torvalds committed
912

913 914 915 916
	if (hint->formatted_node)
		hint->end = border - 1;
	else
		hint->beg = border;
Linus Torvalds's avatar
Linus Torvalds committed
917 918
}

919
static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
Linus Torvalds's avatar
Linus Torvalds committed
920
{
921 922 923 924 925 926 927 928 929 930
	if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
		hint->search_start =
		    hint->beg +
		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
			       4) % (hint->end - hint->beg);
	else
		hint->search_start =
		    hint->beg +
		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
			       4) % (hint->end - hint->beg);
Linus Torvalds's avatar
Linus Torvalds committed
931 932
}

933
static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
Linus Torvalds's avatar
Linus Torvalds committed
934
{
935
	char *hash_in;
Linus Torvalds's avatar
Linus Torvalds committed
936

937 938 939 940 941 942
	if (!hint->inode)
		hash_in = (char *)&hint->key.k_dir_id;
	else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
	else
		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
Linus Torvalds's avatar
Linus Torvalds committed
943

944 945
	hint->search_start =
	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
Linus Torvalds's avatar
Linus Torvalds committed
946 947
}

948 949 950
static inline int
this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
						   hint)
Linus Torvalds's avatar
Linus Torvalds committed
951
{
952 953
	return hint->block ==
	    REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
Linus Torvalds's avatar
Linus Torvalds committed
954 955 956
}

#ifdef DISPLACE_NEW_PACKING_LOCALITIES
957
static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
Linus Torvalds's avatar
Linus Torvalds committed
958
{
959
	struct in_core_key *key = &hint->key;
Linus Torvalds's avatar
Linus Torvalds committed
960

961 962 963 964
	hint->th->displace_new_blocks = 0;
	hint->search_start =
	    hint->beg + keyed_hash((char *)(&key->k_objectid),
				   4) % (hint->end - hint->beg);
Linus Torvalds's avatar
Linus Torvalds committed
965
}
966
#endif
Linus Torvalds's avatar
Linus Torvalds committed
967

968
static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
Linus Torvalds's avatar
Linus Torvalds committed
969
{
970 971
	b_blocknr_t border;
	u32 hash_in;
Linus Torvalds's avatar
Linus Torvalds committed
972

973 974 975
	if (hint->formatted_node || hint->inode == NULL) {
		return 0;
	}
Linus Torvalds's avatar
Linus Torvalds committed
976

977 978 979 980 981 982
	hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
	border =
	    hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
					 4) % (hint->end - hint->beg - 1);
	if (border > hint->search_start)
		hint->search_start = border;
Linus Torvalds's avatar
Linus Torvalds committed
983

984
	return 1;
Linus Torvalds's avatar
Linus Torvalds committed
985 986
}

987
static inline int old_way(reiserfs_blocknr_hint_t * hint)
Linus Torvalds's avatar
Linus Torvalds committed
988
{
989 990 991 992 993 994 995 996 997 998 999 1000
	b_blocknr_t border;

	if (hint->formatted_node || hint->inode == NULL) {
		return 0;
	}

	border =
	    hint->beg +
	    le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
							      hint->beg);
	if (border > hint->search_start)
		hint->search_start = border;
Linus Torvalds's avatar
Linus Torvalds committed
1001

1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015
	return 1;
}

static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
{
	struct in_core_key *key = &hint->key;
	b_blocknr_t slice_start;

	slice_start =
	    (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
	if (slice_start > hint->search_start
	    || slice_start + (hint->end / 100) <= hint->search_start) {
		hint->search_start = slice_start;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1016
}
1017 1018 1019

static void determine_search_start(reiserfs_blocknr_hint_t * hint,
				   int amount_needed)
Linus Torvalds's avatar
Linus Torvalds committed
1020
{
1021 1022
	struct super_block *s = hint->th->t_super;
	int unfm_hint;
Linus Torvalds's avatar
Linus Torvalds committed
1023

1024 1025
	hint->beg = 0;
	hint->end = SB_BLOCK_COUNT(s) - 1;
Linus Torvalds's avatar
Linus Torvalds committed
1026

1027 1028 1029
	/* This is former border algorithm. Now with tunable border offset */
	if (concentrating_formatted_nodes(s))
		set_border_in_hint(s, hint);
Linus Torvalds's avatar
Linus Torvalds committed
1030 1031

#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1032 1033 1034 1035 1036
	/*
	 * whenever we create a new directory, we displace it.  At first
	 * we will hash for location, later we might look for a moderately
	 * empty place for it
	 */
1037 1038 1039 1040
	if (displacing_new_packing_localities(s)
	    && hint->th->displace_new_blocks) {
		displace_new_packing_locality(hint);

1041 1042 1043 1044
		/*
		 * we do not continue determine_search_start,
		 * if new packing locality is being displaced
		 */
1045 1046
		return;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1047 1048
#endif

1049 1050 1051 1052
	/*
	 * all persons should feel encouraged to add more special cases
	 * here and test them
	 */
Linus Torvalds's avatar
Linus Torvalds committed
1053

1054 1055 1056 1057 1058
	if (displacing_large_files(s) && !hint->formatted_node
	    && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
		displace_large_file(hint);
		return;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1059

1060 1061 1062 1063
	/*
	 * if none of our special cases is relevant, use the left
	 * neighbor in the tree order of the new node we are allocating for
	 */
1064 1065 1066 1067
	if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
		hash_formatted_node(hint);
		return;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1068

1069 1070
	unfm_hint = get_left_neighbor(hint);

1071 1072 1073 1074 1075 1076 1077
	/*
	 * Mimic old block allocator behaviour, that is if VFS allowed for
	 * preallocation, new blocks are displaced based on directory ID.
	 * Also, if suggested search_start is less than last preallocated
	 * block, we start searching from it, assuming that HDD dataflow
	 * is faster in forward direction
	 */
1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091
	if (TEST_OPTION(old_way, s)) {
		if (!hint->formatted_node) {
			if (!reiserfs_hashed_relocation(s))
				old_way(hint);
			else if (!reiserfs_no_unhashed_relocation(s))
				old_hashed_relocation(hint);

			if (hint->inode
			    && hint->search_start <
			    REISERFS_I(hint->inode)->i_prealloc_block)
				hint->search_start =
				    REISERFS_I(hint->inode)->i_prealloc_block;
		}
		return;
Linus Torvalds's avatar
Linus Torvalds committed
1092 1093
	}

1094 1095 1096 1097 1098 1099
	/* This is an approach proposed by Hans */
	if (TEST_OPTION(hundredth_slices, s)
	    && !(displacing_large_files(s) && !hint->formatted_node)) {
		hundredth_slices(hint);
		return;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1100

1101 1102 1103 1104 1105
	/* old_hashed_relocation only works on unformatted */
	if (!unfm_hint && !hint->formatted_node &&
	    TEST_OPTION(old_hashed_relocation, s)) {
		old_hashed_relocation(hint);
	}
1106

1107 1108 1109 1110 1111
	/* new_hashed_relocation works with both formatted/unformatted nodes */
	if ((!unfm_hint || hint->formatted_node) &&
	    TEST_OPTION(new_hashed_relocation, s)) {
		new_hashed_relocation(hint);
	}
1112

1113 1114 1115 1116
	/* dirid grouping works only on unformatted nodes */
	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
		dirid_groups(hint);
	}
Linus Torvalds's avatar
Linus Torvalds committed
1117
#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1118 1119 1120
	if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
		dirid_groups(hint);
	}
Linus Torvalds's avatar
Linus Torvalds committed
1121 1122
#endif

1123 1124 1125 1126 1127
	/* oid grouping works only on unformatted nodes */
	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
		oid_groups(hint);
	}
	return;
Linus Torvalds's avatar
Linus Torvalds committed
1128 1129 1130 1131
}

static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
{
1132 1133 1134 1135 1136 1137 1138
	/* make minimum size a mount option and benchmark both ways */
	/* we preallocate blocks only for regular files, specific size */
	/* benchmark preallocating always and see what happens */

	hint->prealloc_size = 0;

	if (!hint->formatted_node && hint->preallocate) {
1139
		if (S_ISREG(hint->inode->i_mode) && !IS_PRIVATE(hint->inode)
1140 1141 1142 1143 1144 1145 1146 1147
		    && hint->inode->i_size >=
		    REISERFS_SB(hint->th->t_super)->s_alloc_options.
		    preallocmin * hint->inode->i_sb->s_blocksize)
			hint->prealloc_size =
			    REISERFS_SB(hint->th->t_super)->s_alloc_options.
			    preallocsize - 1;
	}
	return CARRY_ON;
Linus Torvalds's avatar
Linus Torvalds committed
1148 1149
}

1150 1151 1152 1153 1154 1155
static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
						 b_blocknr_t * new_blocknrs,
						 b_blocknr_t start,
						 b_blocknr_t finish, int min,
						 int amount_needed,
						 int prealloc_size)
Linus Torvalds's avatar
Linus Torvalds committed
1156
{
1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173
	int rest = amount_needed;
	int nr_allocated;

	while (rest > 0 && start <= finish) {
		nr_allocated = scan_bitmap(hint->th, &start, finish, min,
					   rest + prealloc_size,
					   !hint->formatted_node, hint->block);

		if (nr_allocated == 0)	/* no new blocks allocated, return */
			break;

		/* fill free_blocknrs array first */
		while (rest > 0 && nr_allocated > 0) {
			*new_blocknrs++ = start++;
			rest--;
			nr_allocated--;
		}
Linus Torvalds's avatar
Linus Torvalds committed
1174

1175 1176
		/* do we have something to fill prealloc. array also ? */
		if (nr_allocated > 0) {
1177 1178 1179 1180
			/*
			 * it means prealloc_size was greater that 0 and
			 * we do preallocation
			 */
1181 1182 1183 1184 1185 1186 1187 1188
			list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
				 &SB_JOURNAL(hint->th->t_super)->
				 j_prealloc_list);
			REISERFS_I(hint->inode)->i_prealloc_block = start;
			REISERFS_I(hint->inode)->i_prealloc_count =
			    nr_allocated;
			break;
		}
Linus Torvalds's avatar
Linus Torvalds committed
1189 1190
	}

1191
	return (amount_needed - rest);
Linus Torvalds's avatar
Linus Torvalds committed
1192 1193 1194
}

static inline int blocknrs_and_prealloc_arrays_from_search_start
1195 1196 1197 1198 1199 1200 1201
    (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
     int amount_needed) {
	struct super_block *s = hint->th->t_super;
	b_blocknr_t start = hint->search_start;
	b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
	int passno = 0;
	int nr_allocated = 0;
1202
	int depth;
1203 1204 1205 1206

	determine_prealloc_size(hint);
	if (!hint->formatted_node) {
		int quota_ret;
Linus Torvalds's avatar
Linus Torvalds committed
1207
#ifdef REISERQUOTA_DEBUG
1208 1209 1210
		reiserfs_debug(s, REISERFS_DEBUG_CODE,
			       "reiserquota: allocating %d blocks id=%u",
			       amount_needed, hint->inode->i_uid);
Linus Torvalds's avatar
Linus Torvalds committed
1211
#endif
1212
		depth = reiserfs_write_unlock_nested(s);
1213
		quota_ret =
1214
		    dquot_alloc_block_nodirty(hint->inode, amount_needed);
1215 1216
		if (quota_ret) {	/* Quota exceeded? */
			reiserfs_write_lock_nested(s, depth);
1217
			return QUOTA_EXCEEDED;
1218
		}
1219
		if (hint->preallocate && hint->prealloc_size) {
Linus Torvalds's avatar
Linus Torvalds committed
1220
#ifdef REISERQUOTA_DEBUG
1221 1222 1223
			reiserfs_debug(s, REISERFS_DEBUG_CODE,
				       "reiserquota: allocating (prealloc) %d blocks id=%u",
				       hint->prealloc_size, hint->inode->i_uid);
Linus Torvalds's avatar
Linus Torvalds committed
1224
#endif
1225
			quota_ret = dquot_prealloc_block_nodirty(hint->inode,
1226 1227 1228 1229 1230
							 hint->prealloc_size);
			if (quota_ret)
				hint->preallocate = hint->prealloc_size = 0;
		}
		/* for unformatted nodes, force large allocations */
1231
		reiserfs_write_lock_nested(s, depth);
Linus Torvalds's avatar
Linus Torvalds committed
1232 1233
	}

1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247
	do {
		switch (passno++) {
		case 0:	/* Search from hint->search_start to end of disk */
			start = hint->search_start;
			finish = SB_BLOCK_COUNT(s) - 1;
			break;
		case 1:	/* Search from hint->beg to hint->search_start */
			start = hint->beg;
			finish = hint->search_start;
			break;
		case 2:	/* Last chance: Search from 0 to hint->beg */
			start = 0;
			finish = hint->beg;
			break;
1248 1249
		default:
			/* We've tried searching everywhere, not enough space */
1250 1251
			/* Free the blocks */
			if (!hint->formatted_node) {
Linus Torvalds's avatar
Linus Torvalds committed
1252
#ifdef REISERQUOTA_DEBUG
1253 1254 1255 1256 1257 1258
				reiserfs_debug(s, REISERFS_DEBUG_CODE,
					       "reiserquota: freeing (nospace) %d blocks id=%u",
					       amount_needed +
					       hint->prealloc_size -
					       nr_allocated,
					       hint->inode->i_uid);
Linus Torvalds's avatar
Linus Torvalds committed
1259
#endif
1260
				/* Free not allocated blocks */
1261
				depth = reiserfs_write_unlock_nested(s);
1262
				dquot_free_block_nodirty(hint->inode,
1263 1264
					amount_needed + hint->prealloc_size -
					nr_allocated);
1265
				reiserfs_write_lock_nested(s, depth);
1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277
			}
			while (nr_allocated--)
				reiserfs_free_block(hint->th, hint->inode,
						    new_blocknrs[nr_allocated],
						    !hint->formatted_node);

			return NO_DISK_SPACE;
		}
	} while ((nr_allocated += allocate_without_wrapping_disk(hint,
								 new_blocknrs +
								 nr_allocated,
								 start, finish,
1278
								 1,
1279 1280 1281 1282 1283 1284 1285 1286 1287
								 amount_needed -
								 nr_allocated,
								 hint->
								 prealloc_size))
		 < amount_needed);
	if (!hint->formatted_node &&
	    amount_needed + hint->prealloc_size >
	    nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
		/* Some of preallocation blocks were not allocated */
Linus Torvalds's avatar
Linus Torvalds committed
1288
#ifdef REISERQUOTA_DEBUG
1289 1290 1291 1292 1293 1294
		reiserfs_debug(s, REISERFS_DEBUG_CODE,
			       "reiserquota: freeing (failed prealloc) %d blocks id=%u",
			       amount_needed + hint->prealloc_size -
			       nr_allocated -
			       REISERFS_I(hint->inode)->i_prealloc_count,
			       hint->inode->i_uid);
Linus Torvalds's avatar
Linus Torvalds committed
1295
#endif
1296 1297

		depth = reiserfs_write_unlock_nested(s);
1298
		dquot_free_block_nodirty(hint->inode, amount_needed +
1299 1300 1301
					 hint->prealloc_size - nr_allocated -
					 REISERFS_I(hint->inode)->
					 i_prealloc_count);
1302
		reiserfs_write_lock_nested(s, depth);
1303
	}
Linus Torvalds's avatar
Linus Torvalds committed
1304

1305
	return CARRY_ON;
Linus Torvalds's avatar
Linus Torvalds committed
1306 1307 1308 1309
}

/* grab new blocknrs from preallocated list */
/* return amount still needed after using them */
1310 1311 1312
static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
					      b_blocknr_t * new_blocknrs,
					      int amount_needed)
Linus Torvalds's avatar
Linus Torvalds committed
1313
{
1314
	struct inode *inode = hint->inode;
Linus Torvalds's avatar
Linus Torvalds committed
1315

1316 1317
	if (REISERFS_I(inode)->i_prealloc_count > 0) {
		while (amount_needed) {
Linus Torvalds's avatar
Linus Torvalds committed
1318

1319 1320
			*new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
			REISERFS_I(inode)->i_prealloc_count--;
Linus Torvalds's avatar
Linus Torvalds committed
1321

1322
			amount_needed--;
Linus Torvalds's avatar
Linus Torvalds committed
1323

1324 1325 1326 1327 1328
			if (REISERFS_I(inode)->i_prealloc_count <= 0) {
				list_del(&REISERFS_I(inode)->i_prealloc_list);
				break;
			}
		}
Linus Torvalds's avatar
Linus Torvalds committed
1329
	}
1330 1331
	/* return amount still needed after using preallocated blocks */
	return amount_needed;
Linus Torvalds's avatar
Linus Torvalds committed
1332 1333
}

1334 1335 1336 1337 1338
int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint,
			       b_blocknr_t *new_blocknrs,
			       int amount_needed,
			       /* Amount of blocks we have already reserved */
			       int reserved_by_us)
Linus Torvalds's avatar
Linus Torvalds committed
1339
{
1340 1341 1342 1343 1344 1345 1346 1347 1348 1349
	int initial_amount_needed = amount_needed;
	int ret;
	struct super_block *s = hint->th->t_super;

	/* Check if there is enough space, taking into account reserved space */
	if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
	    amount_needed - reserved_by_us)
		return NO_DISK_SPACE;
	/* should this be if !hint->inode &&  hint->preallocate? */
	/* do you mean hint->formatted_node can be removed ? - Zam */
1350 1351 1352 1353 1354
	/*
	 * hint->formatted_node cannot be removed because we try to access
	 * inode information here, and there is often no inode associated with
	 * metadata allocations - green
	 */
1355 1356 1357 1358

	if (!hint->formatted_node && hint->preallocate) {
		amount_needed = use_preallocated_list_if_available
		    (hint, new_blocknrs, amount_needed);
1359 1360 1361 1362 1363 1364

		/*
		 * We have all the block numbers we need from the
		 * prealloc list
		 */
		if (amount_needed == 0)
1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375
			return CARRY_ON;
		new_blocknrs += (initial_amount_needed - amount_needed);
	}

	/* find search start and save it in hint structure */
	determine_search_start(hint, amount_needed);
	if (hint->search_start >= SB_BLOCK_COUNT(s))
		hint->search_start = SB_BLOCK_COUNT(s) - 1;

	/* allocation itself; fill new_blocknrs and preallocation arrays */
	ret = blocknrs_and_prealloc_arrays_from_search_start
Linus Torvalds's avatar
Linus Torvalds committed
1376
	    (hint, new_blocknrs, amount_needed);
1377

1378 1379 1380 1381 1382 1383
	/*
	 * We used prealloc. list to fill (partially) new_blocknrs array.
	 * If final allocation fails we need to return blocks back to
	 * prealloc. list or just free them. -- Zam (I chose second
	 * variant)
	 */
1384 1385 1386 1387 1388
	if (ret != CARRY_ON) {
		while (amount_needed++ < initial_amount_needed) {
			reiserfs_free_block(hint->th, hint->inode,
					    *(--new_blocknrs), 1);
		}
Linus Torvalds's avatar
Linus Torvalds committed
1389
	}
1390
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
1391 1392
}

1393 1394 1395 1396 1397 1398
void reiserfs_cache_bitmap_metadata(struct super_block *sb,
                                    struct buffer_head *bh,
                                    struct reiserfs_bitmap_info *info)
{
	unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);

1399
	/* The first bit must ALWAYS be 1 */
1400 1401 1402
	if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data))
		reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is "
			       "corrupted: first bit must be 1", bh->b_blocknr);
1403 1404

	info->free_count = 0;
Jeff Mahoney's avatar