journal.c 120 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
Linus Torvalds's avatar
Linus Torvalds committed
2
/*
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
 * Write ahead logging implementation copyright Chris Mason 2000
 *
 * The background commits make this code very interrelated, and
 * overly complex.  I need to rethink things a bit....The major players:
 *
 * journal_begin -- call with the number of blocks you expect to log.
 *                  If the current transaction is too
 *		    old, it will block until the current transaction is
 *		    finished, and then start a new one.
 *		    Usually, your transaction will get joined in with
 *                  previous ones for speed.
 *
 * journal_join  -- same as journal_begin, but won't block on the current
 *                  transaction regardless of age.  Don't ever call
 *                  this.  Ever.  There are only two places it should be
 *                  called from, and they are both inside this file.
 *
 * journal_mark_dirty -- adds blocks into this transaction.  clears any flags
 *                       that might make them get sent to disk
 *                       and then marks them BH_JDirty.  Puts the buffer head
 *                       into the current transaction hash.
 *
 * journal_end -- if the current transaction is batchable, it does nothing
 *                   otherwise, it could do an async/synchronous commit, or
 *                   a full flush of all log and real blocks in the
 *                   transaction.
 *
 * flush_old_commits -- if the current transaction is too old, it is ended and
 *                      commit blocks are sent to disk.  Forces commit blocks
 *                      to disk for all backgrounded commits that have been
 *                      around too long.
 *		     -- Note, if you call this as an immediate flush from
 *		        from within kupdate, it will ignore the immediate flag
 */
Linus Torvalds's avatar
Linus Torvalds committed
37 38

#include <linux/time.h>
39
#include <linux/semaphore.h>
Linus Torvalds's avatar
Linus Torvalds committed
40
#include <linux/vmalloc.h>
41
#include "reiserfs.h"
Linus Torvalds's avatar
Linus Torvalds committed
42 43 44 45 46 47 48 49 50
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/buffer_head.h>
#include <linux/workqueue.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
51
#include <linux/backing-dev.h>
52
#include <linux/uaccess.h>
53
#include <linux/slab.h>
54

Linus Torvalds's avatar
Linus Torvalds committed
55 56 57 58 59 60 61

/* gets a struct reiserfs_journal_list * from a list head */
#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
                               j_list))
#define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
                               j_working_list))

62 63
/* must be correct to keep the desc and commit structs at 4k */
#define JOURNAL_TRANS_HALF 1018
64
#define BUFNR 64		/*read ahead */
Linus Torvalds's avatar
Linus Torvalds committed
65 66 67

/* cnode stat bits.  Move these into reiserfs_fs.h */

68 69 70 71
/* this block was freed, and can't be written.  */
#define BLOCK_FREED 2
/* this block was freed during this transaction, and can't be written */
#define BLOCK_FREED_HOLDER 3
Linus Torvalds's avatar
Linus Torvalds committed
72

73 74
/* used in flush_journal_list */
#define BLOCK_NEEDS_FLUSH 4
Linus Torvalds's avatar
Linus Torvalds committed
75 76 77 78 79
#define BLOCK_DIRTIED 5

/* journal list state bits */
#define LIST_TOUCHED 1
#define LIST_DIRTY   2
80
#define LIST_COMMIT_PENDING  4	/* someone will commit this list */
Linus Torvalds's avatar
Linus Torvalds committed
81 82 83 84

/* flags for do_journal_end */
#define FLUSH_ALL   1		/* flush commit and real blocks */
#define COMMIT_NOW  2		/* end and commit this transaction */
85 86
#define WAIT        4		/* wait for the log blocks to hit the disk */

87
static int do_journal_end(struct reiserfs_transaction_handle *, int flags);
88 89 90 91 92 93
static int flush_journal_list(struct super_block *s,
			      struct reiserfs_journal_list *jl, int flushall);
static int flush_commit_list(struct super_block *s,
			     struct reiserfs_journal_list *jl, int flushall);
static int can_dirty(struct reiserfs_journal_cnode *cn);
static int journal_join(struct reiserfs_transaction_handle *th,
94
			struct super_block *sb);
Al Viro's avatar
Al Viro committed
95
static void release_journal_dev(struct super_block *super,
96
			       struct reiserfs_journal *journal);
Linus Torvalds's avatar
Linus Torvalds committed
97
static int dirty_one_transaction(struct super_block *s,
98
				 struct reiserfs_journal_list *jl);
99
static void flush_async_commits(struct work_struct *work);
Linus Torvalds's avatar
Linus Torvalds committed
100 101 102 103
static void queue_log_writer(struct super_block *s);

/* values for join in do_journal_begin_r */
enum {
104
	JBEGIN_REG = 0,		/* regular journal begin */
105 106 107 108
	/* join the running transaction if at all possible */
	JBEGIN_JOIN = 1,
	/* called from cleanup code, ignores aborted flag */
	JBEGIN_ABORT = 2,
Linus Torvalds's avatar
Linus Torvalds committed
109 110 111
};

static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
112
			      struct super_block *sb,
113
			      unsigned long nblocks, int join);
Linus Torvalds's avatar
Linus Torvalds committed
114

115
static void init_journal_hash(struct super_block *sb)
116
{
117
	struct reiserfs_journal *journal = SB_JOURNAL(sb);
118 119
	memset(journal->j_hash_table, 0,
	       JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
Linus Torvalds's avatar
Linus Torvalds committed
120 121 122
}

/*
123 124 125 126 127
 * clears BH_Dirty and sticks the buffer on the clean list.  Called because
 * I can't allow refile_buffer to make schedule happen after I've freed a
 * block.  Look at remove_from_transaction and journal_mark_freed for
 * more details.
 */
128 129 130 131 132 133 134
static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
{
	if (bh) {
		clear_buffer_dirty(bh);
		clear_buffer_journal_test(bh);
	}
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
135 136
}

137
static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
138
							 *sb)
139 140 141 142
{
	struct reiserfs_bitmap_node *bn;
	static int id;

143
	bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS);
144 145 146
	if (!bn) {
		return NULL;
	}
147
	bn->data = kzalloc(sb->s_blocksize, GFP_NOFS);
148
	if (!bn->data) {
149
		kfree(bn);
150 151 152 153 154 155 156
		return NULL;
	}
	bn->id = id++;
	INIT_LIST_HEAD(&bn->list);
	return bn;
}

157
static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb)
158
{
159
	struct reiserfs_journal *journal = SB_JOURNAL(sb);
160 161 162 163
	struct reiserfs_bitmap_node *bn = NULL;
	struct list_head *entry = journal->j_bitmap_nodes.next;

	journal->j_used_bitmap_nodes++;
164
repeat:
165 166 167 168

	if (entry != &journal->j_bitmap_nodes) {
		bn = list_entry(entry, struct reiserfs_bitmap_node, list);
		list_del(entry);
169
		memset(bn->data, 0, sb->s_blocksize);
170 171 172
		journal->j_free_bitmap_nodes--;
		return bn;
	}
173
	bn = allocate_bitmap_node(sb);
174 175 176 177 178
	if (!bn) {
		yield();
		goto repeat;
	}
	return bn;
Linus Torvalds's avatar
Linus Torvalds committed
179
}
180
static inline void free_bitmap_node(struct super_block *sb,
181 182
				    struct reiserfs_bitmap_node *bn)
{
183
	struct reiserfs_journal *journal = SB_JOURNAL(sb);
184 185
	journal->j_used_bitmap_nodes--;
	if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
186 187
		kfree(bn->data);
		kfree(bn);
188 189 190 191 192 193
	} else {
		list_add(&bn->list, &journal->j_bitmap_nodes);
		journal->j_free_bitmap_nodes++;
	}
}

194
static void allocate_bitmap_nodes(struct super_block *sb)
195 196
{
	int i;
197
	struct reiserfs_journal *journal = SB_JOURNAL(sb);
198 199
	struct reiserfs_bitmap_node *bn = NULL;
	for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) {
200
		bn = allocate_bitmap_node(sb);
201 202 203 204
		if (bn) {
			list_add(&bn->list, &journal->j_bitmap_nodes);
			journal->j_free_bitmap_nodes++;
		} else {
205 206
			/* this is ok, we'll try again when more are needed */
			break;
207 208
		}
	}
Linus Torvalds's avatar
Linus Torvalds committed
209 210
}

211
static int set_bit_in_list_bitmap(struct super_block *sb,
212
				  b_blocknr_t block,
213 214
				  struct reiserfs_list_bitmap *jb)
{
215 216
	unsigned int bmap_nr = block / (sb->s_blocksize << 3);
	unsigned int bit_nr = block % (sb->s_blocksize << 3);
Linus Torvalds's avatar
Linus Torvalds committed
217

218
	if (!jb->bitmaps[bmap_nr]) {
219
		jb->bitmaps[bmap_nr] = get_bitmap_node(sb);
220 221 222
	}
	set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data);
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
223 224
}

225
static void cleanup_bitmap_list(struct super_block *sb,
226 227 228 229 230 231
				struct reiserfs_list_bitmap *jb)
{
	int i;
	if (jb->bitmaps == NULL)
		return;

232
	for (i = 0; i < reiserfs_bmap_count(sb); i++) {
233
		if (jb->bitmaps[i]) {
234
			free_bitmap_node(sb, jb->bitmaps[i]);
235 236 237
			jb->bitmaps[i] = NULL;
		}
	}
Linus Torvalds's avatar
Linus Torvalds committed
238 239 240
}

/*
241 242
 * only call this on FS unmount.
 */
243
static int free_list_bitmaps(struct super_block *sb,
244 245 246 247 248 249 250
			     struct reiserfs_list_bitmap *jb_array)
{
	int i;
	struct reiserfs_list_bitmap *jb;
	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
		jb = jb_array + i;
		jb->journal_list = NULL;
251
		cleanup_bitmap_list(sb, jb);
252 253 254 255 256 257
		vfree(jb->bitmaps);
		jb->bitmaps = NULL;
	}
	return 0;
}

258
static int free_bitmap_nodes(struct super_block *sb)
259
{
260
	struct reiserfs_journal *journal = SB_JOURNAL(sb);
261 262 263 264 265 266
	struct list_head *next = journal->j_bitmap_nodes.next;
	struct reiserfs_bitmap_node *bn;

	while (next != &journal->j_bitmap_nodes) {
		bn = list_entry(next, struct reiserfs_bitmap_node, list);
		list_del(next);
267 268
		kfree(bn->data);
		kfree(bn);
269 270 271 272 273
		next = journal->j_bitmap_nodes.next;
		journal->j_free_bitmap_nodes--;
	}

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
274 275 276
}

/*
277 278 279
 * get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
 * jb_array is the array to be filled in.
 */
280
int reiserfs_allocate_list_bitmaps(struct super_block *sb,
281
				   struct reiserfs_list_bitmap *jb_array,
282
				   unsigned int bmap_nr)
283 284 285 286 287 288 289 290 291
{
	int i;
	int failed = 0;
	struct reiserfs_list_bitmap *jb;
	int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *);

	for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
		jb = jb_array + i;
		jb->journal_list = NULL;
292
		jb->bitmaps = vzalloc(mem);
293
		if (!jb->bitmaps) {
294
			reiserfs_warning(sb, "clm-2000", "unable to "
295
					 "allocate bitmaps for journal lists");
296 297 298 299 300
			failed = 1;
			break;
		}
	}
	if (failed) {
301
		free_list_bitmaps(sb, jb_array);
302 303 304
		return -1;
	}
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
305 306 307
}

/*
308 309 310
 * find an available list bitmap.  If you can't find one, flush a commit list
 * and try again
 */
311
static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb,
312 313 314 315
						    struct reiserfs_journal_list
						    *jl)
{
	int i, j;
316
	struct reiserfs_journal *journal = SB_JOURNAL(sb);
317 318 319 320 321 322 323
	struct reiserfs_list_bitmap *jb = NULL;

	for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) {
		i = journal->j_list_bitmap_index;
		journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS;
		jb = journal->j_list_bitmap + i;
		if (journal->j_list_bitmap[i].journal_list) {
324
			flush_commit_list(sb,
325 326 327 328 329 330 331 332 333
					  journal->j_list_bitmap[i].
					  journal_list, 1);
			if (!journal->j_list_bitmap[i].journal_list) {
				break;
			}
		} else {
			break;
		}
	}
334 335
	/* double check to make sure if flushed correctly */
	if (jb->journal_list)
336 337 338
		return NULL;
	jb->journal_list = jl;
	return jb;
Linus Torvalds's avatar
Linus Torvalds committed
339 340
}

341
/*
342 343 344 345
 * allocates a new chunk of X nodes, and links them all together as a list.
 * Uses the cnode->next and cnode->prev pointers
 * returns NULL on failure
 */
346 347 348 349 350 351 352
static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
{
	struct reiserfs_journal_cnode *head;
	int i;
	if (num_cnodes <= 0) {
		return NULL;
	}
353
	head = vzalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode));
354 355 356 357 358 359 360 361 362 363 364
	if (!head) {
		return NULL;
	}
	head[0].prev = NULL;
	head[0].next = head + 1;
	for (i = 1; i < num_cnodes; i++) {
		head[i].prev = head + (i - 1);
		head[i].next = head + (i + 1);	/* if last one, overwrite it after the if */
	}
	head[num_cnodes - 1].next = NULL;
	return head;
Linus Torvalds's avatar
Linus Torvalds committed
365 366
}

367
/* pulls a cnode off the free list, or returns NULL on failure */
368
static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb)
369 370
{
	struct reiserfs_journal_cnode *cn;
371
	struct reiserfs_journal *journal = SB_JOURNAL(sb);
372

373
	reiserfs_check_lock_depth(sb, "get_cnode");
374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389

	if (journal->j_cnode_free <= 0) {
		return NULL;
	}
	journal->j_cnode_used++;
	journal->j_cnode_free--;
	cn = journal->j_cnode_free_list;
	if (!cn) {
		return cn;
	}
	if (cn->next) {
		cn->next->prev = NULL;
	}
	journal->j_cnode_free_list = cn->next;
	memset(cn, 0, sizeof(struct reiserfs_journal_cnode));
	return cn;
Linus Torvalds's avatar
Linus Torvalds committed
390 391 392
}

/*
393 394
 * returns a cnode to the free list
 */
395
static void free_cnode(struct super_block *sb,
396 397
		       struct reiserfs_journal_cnode *cn)
{
398
	struct reiserfs_journal *journal = SB_JOURNAL(sb);
Linus Torvalds's avatar
Linus Torvalds committed
399

400
	reiserfs_check_lock_depth(sb, "free_cnode");
Linus Torvalds's avatar
Linus Torvalds committed
401

402 403 404 405 406 407 408 409 410
	journal->j_cnode_used--;
	journal->j_cnode_free++;
	/* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
	cn->next = journal->j_cnode_free_list;
	if (journal->j_cnode_free_list) {
		journal->j_cnode_free_list->prev = cn;
	}
	cn->prev = NULL;	/* not needed with the memset, but I might kill the memset, and forget to do this */
	journal->j_cnode_free_list = cn;
Linus Torvalds's avatar
Linus Torvalds committed
411 412
}

413 414 415 416
static void clear_prepared_bits(struct buffer_head *bh)
{
	clear_buffer_journal_prepared(bh);
	clear_buffer_journal_restore_dirty(bh);
Linus Torvalds's avatar
Linus Torvalds committed
417 418
}

419 420 421 422
/*
 * return a cnode with same dev, block number and size in table,
 * or null if not found
 */
423 424 425 426 427 428 429
static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
								  super_block
								  *sb,
								  struct
								  reiserfs_journal_cnode
								  **table,
								  long bl)
Linus Torvalds's avatar
Linus Torvalds committed
430
{
431 432 433 434 435 436 437 438
	struct reiserfs_journal_cnode *cn;
	cn = journal_hash(table, sb, bl);
	while (cn) {
		if (cn->blocknr == bl && cn->sb == sb)
			return cn;
		cn = cn->hnext;
	}
	return (struct reiserfs_journal_cnode *)0;
Linus Torvalds's avatar
Linus Torvalds committed
439 440 441
}

/*
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
 * this actually means 'can this block be reallocated yet?'.  If you set
 * search_all, a block can only be allocated if it is not in the current
 * transaction, was not freed by the current transaction, and has no chance
 * of ever being overwritten by a replay after crashing.
 *
 * If you don't set search_all, a block can only be allocated if it is not
 * in the current transaction.  Since deleting a block removes it from the
 * current transaction, this case should never happen.  If you don't set
 * search_all, make sure you never write the block without logging it.
 *
 * next_zero_bit is a suggestion about the next block to try for find_forward.
 * when bl is rejected because it is set in a journal list bitmap, we search
 * for the next zero bit in the bitmap that rejected bl.  Then, we return
 * that through next_zero_bit for find_forward to try.
 *
 * Just because we return something in next_zero_bit does not mean we won't
 * reject it on the next call to reiserfs_in_journal
 */
460
int reiserfs_in_journal(struct super_block *sb,
461
			unsigned int bmap_nr, int bit_nr, int search_all,
462 463
			b_blocknr_t * next_zero_bit)
{
464
	struct reiserfs_journal *journal = SB_JOURNAL(sb);
465 466 467 468 469 470 471
	struct reiserfs_journal_cnode *cn;
	struct reiserfs_list_bitmap *jb;
	int i;
	unsigned long bl;

	*next_zero_bit = 0;	/* always start this at zero. */

472
	PROC_INFO_INC(sb, journal.in_journal);
473 474 475 476 477
	/*
	 * If we aren't doing a search_all, this is a metablock, and it
	 * will be logged before use.  if we crash before the transaction
	 * that freed it commits,  this transaction won't have committed
	 * either, and the block will never be written
478 479 480
	 */
	if (search_all) {
		for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
481
			PROC_INFO_INC(sb, journal.in_journal_bitmap);
482 483 484 485 486 487 488 489 490
			jb = journal->j_list_bitmap + i;
			if (jb->journal_list && jb->bitmaps[bmap_nr] &&
			    test_bit(bit_nr,
				     (unsigned long *)jb->bitmaps[bmap_nr]->
				     data)) {
				*next_zero_bit =
				    find_next_zero_bit((unsigned long *)
						       (jb->bitmaps[bmap_nr]->
							data),
491
						       sb->s_blocksize << 3,
492 493 494 495 496 497
						       bit_nr + 1);
				return 1;
			}
		}
	}

498
	bl = bmap_nr * (sb->s_blocksize << 3) + bit_nr;
499 500 501
	/* is it in any old transactions? */
	if (search_all
	    && (cn =
502
		get_journal_hash_dev(sb, journal->j_list_hash_table, bl))) {
503 504 505 506
		return 1;
	}

	/* is it in the current transaction.  This should never happen */
507
	if ((cn = get_journal_hash_dev(sb, journal->j_hash_table, bl))) {
508 509 510 511
		BUG();
		return 1;
	}

512
	PROC_INFO_INC(sb, journal.in_journal_reusable);
513 514
	/* safe for reuse */
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
515 516
}

517
/* insert cn into table */
518 519 520 521
static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
				       struct reiserfs_journal_cnode *cn)
{
	struct reiserfs_journal_cnode *cn_orig;
Linus Torvalds's avatar
Linus Torvalds committed
522

523 524 525 526 527 528 529
	cn_orig = journal_hash(table, cn->sb, cn->blocknr);
	cn->hnext = cn_orig;
	cn->hprev = NULL;
	if (cn_orig) {
		cn_orig->hprev = cn;
	}
	journal_hash(table, cn->sb, cn->blocknr) = cn;
Linus Torvalds's avatar
Linus Torvalds committed
530 531 532
}

/* lock the current transaction */
533
static inline void lock_journal(struct super_block *sb)
534
{
535
	PROC_INFO_INC(sb, journal.lock_journal);
536 537

	reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
Linus Torvalds's avatar
Linus Torvalds committed
538 539 540
}

/* unlock the current transaction */
541
static inline void unlock_journal(struct super_block *sb)
542
{
543
	mutex_unlock(&SB_JOURNAL(sb)->j_mutex);
Linus Torvalds's avatar
Linus Torvalds committed
544 545 546 547
}

static inline void get_journal_list(struct reiserfs_journal_list *jl)
{
548
	jl->j_refcount++;
Linus Torvalds's avatar
Linus Torvalds committed
549 550 551
}

static inline void put_journal_list(struct super_block *s,
552
				    struct reiserfs_journal_list *jl)
Linus Torvalds's avatar
Linus Torvalds committed
553
{
554
	if (jl->j_refcount < 1) {
555
		reiserfs_panic(s, "journal-2", "trans id %u, refcount at %d",
556 557 558
			       jl->j_trans_id, jl->j_refcount);
	}
	if (--jl->j_refcount == 0)
559
		kfree(jl);
Linus Torvalds's avatar
Linus Torvalds committed
560 561 562
}

/*
563 564 565 566
 * this used to be much more involved, and I'm keeping it just in case
 * things get ugly again.  it gets called by flush_commit_list, and
 * cleans up any data stored about blocks freed during a transaction.
 */
567
static void cleanup_freed_for_journal_list(struct super_block *sb,
568 569
					   struct reiserfs_journal_list *jl)
{
Linus Torvalds's avatar
Linus Torvalds committed
570

571 572
	struct reiserfs_list_bitmap *jb = jl->j_list_bitmap;
	if (jb) {
573
		cleanup_bitmap_list(sb, jb);
574 575 576
	}
	jl->j_list_bitmap->journal_list = NULL;
	jl->j_list_bitmap = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
577 578 579
}

static int journal_list_still_alive(struct super_block *s,
580
				    unsigned int trans_id)
581 582 583 584 585 586 587 588 589 590 591 592 593 594
{
	struct reiserfs_journal *journal = SB_JOURNAL(s);
	struct list_head *entry = &journal->j_journal_list;
	struct reiserfs_journal_list *jl;

	if (!list_empty(entry)) {
		jl = JOURNAL_LIST_ENTRY(entry->next);
		if (jl->j_trans_id <= trans_id) {
			return 1;
		}
	}
	return 0;
}

595 596 597 598 599 600 601 602
/*
 * If page->mapping was null, we failed to truncate this page for
 * some reason.  Most likely because it was truncated after being
 * logged via data=journal.
 *
 * This does a check to see if the buffer belongs to one of these
 * lost pages before doing the final put_bh.  If page->mapping was
 * null, it tries to free buffers on the page, which should make the
603
 * final put_page drop the page from the lru.
604 605 606 607
 */
static void release_buffer_page(struct buffer_head *bh)
{
	struct page *page = bh->b_page;
Nick Piggin's avatar
Nick Piggin committed
608
	if (!page->mapping && trylock_page(page)) {
609
		get_page(page);
610 611 612 613
		put_bh(bh);
		if (!page->mapping)
			try_to_free_buffers(page);
		unlock_page(page);
614
		put_page(page);
615 616 617 618 619
	} else {
		put_bh(bh);
	}
}

620 621 622
static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
{
	if (buffer_journaled(bh)) {
623
		reiserfs_warning(NULL, "clm-2084",
624 625
				 "pinned buffer %lu:%pg sent to disk",
				 bh->b_blocknr, bh->b_bdev);
626 627 628 629 630
	}
	if (uptodate)
		set_buffer_uptodate(bh);
	else
		clear_buffer_uptodate(bh);
631

632
	unlock_buffer(bh);
633
	release_buffer_page(bh);
634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655
}

static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate)
{
	if (uptodate)
		set_buffer_uptodate(bh);
	else
		clear_buffer_uptodate(bh);
	unlock_buffer(bh);
	put_bh(bh);
}

static void submit_logged_buffer(struct buffer_head *bh)
{
	get_bh(bh);
	bh->b_end_io = reiserfs_end_buffer_io_sync;
	clear_buffer_journal_new(bh);
	clear_buffer_dirty(bh);
	if (!test_clear_buffer_journal_test(bh))
		BUG();
	if (!buffer_uptodate(bh))
		BUG();
656
	submit_bh(REQ_OP_WRITE, 0, bh);
657 658 659 660 661 662 663 664 665
}

static void submit_ordered_buffer(struct buffer_head *bh)
{
	get_bh(bh);
	bh->b_end_io = reiserfs_end_ordered_io;
	clear_buffer_dirty(bh);
	if (!buffer_uptodate(bh))
		BUG();
666
	submit_bh(REQ_OP_WRITE, 0, bh);
667 668
}

Linus Torvalds's avatar
Linus Torvalds committed
669 670
#define CHUNK_SIZE 32
struct buffer_chunk {
671 672
	struct buffer_head *bh[CHUNK_SIZE];
	int nr;
Linus Torvalds's avatar
Linus Torvalds committed
673 674
};

675 676 677 678 679 680 681
static void write_chunk(struct buffer_chunk *chunk)
{
	int i;
	for (i = 0; i < chunk->nr; i++) {
		submit_logged_buffer(chunk->bh[i]);
	}
	chunk->nr = 0;
Linus Torvalds's avatar
Linus Torvalds committed
682 683
}

684 685 686 687 688 689 690
static void write_ordered_chunk(struct buffer_chunk *chunk)
{
	int i;
	for (i = 0; i < chunk->nr; i++) {
		submit_ordered_buffer(chunk->bh[i]);
	}
	chunk->nr = 0;
Linus Torvalds's avatar
Linus Torvalds committed
691 692 693
}

static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
694
			spinlock_t * lock, void (fn) (struct buffer_chunk *))
Linus Torvalds's avatar
Linus Torvalds committed
695
{
696
	int ret = 0;
697
	BUG_ON(chunk->nr >= CHUNK_SIZE);
698 699 700
	chunk->bh[chunk->nr++] = bh;
	if (chunk->nr >= CHUNK_SIZE) {
		ret = 1;
701
		if (lock) {
702
			spin_unlock(lock);
703
			fn(chunk);
704
			spin_lock(lock);
705 706 707
		} else {
			fn(chunk);
		}
708 709
	}
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
710 711 712
}

static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0);
713 714 715 716 717 718 719 720 721 722
static struct reiserfs_jh *alloc_jh(void)
{
	struct reiserfs_jh *jh;
	while (1) {
		jh = kmalloc(sizeof(*jh), GFP_NOFS);
		if (jh) {
			atomic_inc(&nr_reiserfs_jh);
			return jh;
		}
		yield();
Linus Torvalds's avatar
Linus Torvalds committed
723 724 725 726 727 728 729
	}
}

/*
 * we want to free the jh when the buffer has been written
 * and waited on
 */
730 731 732 733 734 735 736 737 738 739 740 741 742 743 744
void reiserfs_free_jh(struct buffer_head *bh)
{
	struct reiserfs_jh *jh;

	jh = bh->b_private;
	if (jh) {
		bh->b_private = NULL;
		jh->bh = NULL;
		list_del_init(&jh->list);
		kfree(jh);
		if (atomic_read(&nr_reiserfs_jh) <= 0)
			BUG();
		atomic_dec(&nr_reiserfs_jh);
		put_bh(bh);
	}
Linus Torvalds's avatar
Linus Torvalds committed
745 746 747
}

static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
748
			   int tail)
Linus Torvalds's avatar
Linus Torvalds committed
749
{
750
	struct reiserfs_jh *jh;
Linus Torvalds's avatar
Linus Torvalds committed
751

752 753 754 755 756 757 758 759 760
	if (bh->b_private) {
		spin_lock(&j->j_dirty_buffers_lock);
		if (!bh->b_private) {
			spin_unlock(&j->j_dirty_buffers_lock);
			goto no_jh;
		}
		jh = bh->b_private;
		list_del_init(&jh->list);
	} else {
761
no_jh:
762 763 764
		get_bh(bh);
		jh = alloc_jh();
		spin_lock(&j->j_dirty_buffers_lock);
765 766
		/*
		 * buffer must be locked for __add_jh, should be able to have
767 768
		 * two adds at the same time
		 */
769
		BUG_ON(bh->b_private);
770 771
		jh->bh = bh;
		bh->b_private = jh;
Linus Torvalds's avatar
Linus Torvalds committed
772
	}
773 774 775 776 777 778 779 780
	jh->jl = j->j_current_jl;
	if (tail)
		list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
	else {
		list_add_tail(&jh->list, &jh->jl->j_bh_list);
	}
	spin_unlock(&j->j_dirty_buffers_lock);
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
781 782
}

783 784 785
int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh)
{
	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1);
Linus Torvalds's avatar
Linus Torvalds committed
786
}
787 788 789
int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh)
{
	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0);
Linus Torvalds's avatar
Linus Torvalds committed
790 791 792
}

#define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list)
793
static int write_ordered_buffers(spinlock_t * lock,
Linus Torvalds's avatar
Linus Torvalds committed
794
				 struct reiserfs_journal *j,
795
				 struct reiserfs_journal_list *jl,
Linus Torvalds's avatar
Linus Torvalds committed
796 797
				 struct list_head *list)
{
798 799 800 801 802 803 804 805 806 807 808 809 810
	struct buffer_head *bh;
	struct reiserfs_jh *jh;
	int ret = j->j_errno;
	struct buffer_chunk chunk;
	struct list_head tmp;
	INIT_LIST_HEAD(&tmp);

	chunk.nr = 0;
	spin_lock(lock);
	while (!list_empty(list)) {
		jh = JH_ENTRY(list->next);
		bh = jh->bh;
		get_bh(bh);
Nick Piggin's avatar
Nick Piggin committed
811
		if (!trylock_buffer(bh)) {
812
			if (!buffer_dirty(bh)) {
813
				list_move(&jh->list, &tmp);
814 815 816 817 818 819 820 821 822 823
				goto loop_next;
			}
			spin_unlock(lock);
			if (chunk.nr)
				write_ordered_chunk(&chunk);
			wait_on_buffer(bh);
			cond_resched();
			spin_lock(lock);
			goto loop_next;
		}
824 825
		/*
		 * in theory, dirty non-uptodate buffers should never get here,
826 827 828 829 830 831 832
		 * but the upper layer io error paths still have a few quirks.
		 * Handle them here as gracefully as we can
		 */
		if (!buffer_uptodate(bh) && buffer_dirty(bh)) {
			clear_buffer_dirty(bh);
			ret = -EIO;
		}
833
		if (buffer_dirty(bh)) {
834
			list_move(&jh->list, &tmp);
835 836 837 838 839
			add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
		} else {
			reiserfs_free_jh(bh);
			unlock_buffer(bh);
		}
840
loop_next:
841 842 843 844 845
		put_bh(bh);
		cond_resched_lock(lock);
	}
	if (chunk.nr) {
		spin_unlock(lock);
Linus Torvalds's avatar
Linus Torvalds committed
846
		write_ordered_chunk(&chunk);
847
		spin_lock(lock);
Linus Torvalds's avatar
Linus Torvalds committed
848
	}
849 850 851 852 853 854 855 856 857 858 859 860 861 862
	while (!list_empty(&tmp)) {
		jh = JH_ENTRY(tmp.prev);
		bh = jh->bh;
		get_bh(bh);
		reiserfs_free_jh(bh);

		if (buffer_locked(bh)) {
			spin_unlock(lock);
			wait_on_buffer(bh);
			spin_lock(lock);
		}
		if (!buffer_uptodate(bh)) {
			ret = -EIO;
		}
863 864 865 866 867 868 869 870
		/*
		 * ugly interaction with invalidatepage here.
		 * reiserfs_invalidate_page will pin any buffer that has a
		 * valid journal head from an older transaction.  If someone
		 * else sets our buffer dirty after we write it in the first
		 * loop, and then someone truncates the page away, nobody
		 * will ever write the buffer. We're safe if we write the
		 * page one last time after freeing the journal header.
871 872 873
		 */
		if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
			spin_unlock(lock);
874
			ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
875 876
			spin_lock(lock);
		}
877 878
		put_bh(bh);
		cond_resched_lock(lock);
Linus Torvalds's avatar
Linus Torvalds committed
879
	}
880 881 882
	spin_unlock(lock);
	return ret;
}
Linus Torvalds's avatar
Linus Torvalds committed
883

884 885 886 887 888 889 890
static int flush_older_commits(struct super_block *s,
			       struct reiserfs_journal_list *jl)
{
	struct reiserfs_journal *journal = SB_JOURNAL(s);
	struct reiserfs_journal_list *other_jl;
	struct reiserfs_journal_list *first_jl;
	struct list_head *entry;
891 892 893
	unsigned int trans_id = jl->j_trans_id;
	unsigned int other_trans_id;
	unsigned int first_trans_id;
894

895
find_first:
896 897 898 899 900 901 902 903 904 905
	/*
	 * first we walk backwards to find the oldest uncommitted transation
	 */
	first_jl = jl;
	entry = jl->j_list.prev;
	while (1) {
		other_jl = JOURNAL_LIST_ENTRY(entry);
		if (entry == &journal->j_journal_list ||
		    atomic_read(&other_jl->j_older_commits_done))
			break;
Linus Torvalds's avatar
Linus Torvalds committed
906

907 908 909
		first_jl = other_jl;
		entry = other_jl->j_list.prev;
	}
Linus Torvalds's avatar
Linus Torvalds committed
910

911 912 913 914
	/* if we didn't find any older uncommitted transactions, return now */
	if (first_jl == jl) {
		return 0;
	}
Linus Torvalds's avatar
Linus Torvalds committed
915

916 917 918 919 920 921 922 923 924 925 926 927 928 929 930
	first_trans_id = first_jl->j_trans_id;

	entry = &first_jl->j_list;
	while (1) {
		other_jl = JOURNAL_LIST_ENTRY(entry);
		other_trans_id = other_jl->j_trans_id;

		if (other_trans_id < trans_id) {
			if (atomic_read(&other_jl->j_commit_left) != 0) {
				flush_commit_list(s, other_jl, 0);

				/* list we were called with is gone, return */
				if (!journal_list_still_alive(s, trans_id))
					return 1;

931 932 933 934 935
				/*
				 * the one we just flushed is gone, this means
				 * all older lists are also gone, so first_jl
				 * is no longer valid either.  Go back to the
				 * beginning.
936 937 938 939 940 941 942 943 944 945 946
				 */
				if (!journal_list_still_alive
				    (s, other_trans_id)) {
					goto find_first;
				}
			}
			entry = entry->next;
			if (entry == &journal->j_journal_list)
				return 0;
		} else {
			return 0;
Linus Torvalds's avatar
Linus Torvalds committed
947 948
		}
	}
949
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
950
}
Adrian Bunk's avatar
Adrian Bunk committed
951 952

static int reiserfs_async_progress_wait(struct super_block *s)
953 954
{
	struct reiserfs_journal *j = SB_JOURNAL(s);
955 956

	if (atomic_read(&j->j_async_throttle)) {
957 958 959
		int depth;

		depth = reiserfs_write_unlock_nested(s);
960
		congestion_wait(BLK_RW_ASYNC, HZ / 10);
961
		reiserfs_write_lock_nested(s, depth);
962 963
	}

964
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
965 966 967
}

/*
968 969 970 971 972 973
 * if this journal list still has commit blocks unflushed, send them to disk.
 *
 * log areas must be flushed in order (transaction 2 can't commit before
 * transaction 1) Before the commit block can by written, every other log
 * block must be safely on disk
 */
974 975 976 977
static int flush_commit_list(struct super_block *s,
			     struct reiserfs_journal_list *jl, int flushall)
{
	int i;
978
	b_blocknr_t bn;
979
	struct buffer_head *tbh = NULL;
980
	unsigned int trans_id = jl->j_trans_id;
981 982
	struct reiserfs_journal *journal = SB_JOURNAL(s);
	int retval = 0;
983
	int write_len;
984
	int depth;
985 986 987 988 989 990 991

	reiserfs_check_lock_depth(s, "flush_commit_list");

	if (atomic_read(&jl->j_older_commits_done)) {
		return 0;
	}

992 993 994
	/*
	 * before we can put our commit blocks on disk, we have to make
	 * sure everyone older than us is on disk too
995 996 997 998 999 1000 1001
	 */
	BUG_ON(jl->j_len <= 0);
	BUG_ON(trans_id == journal->j_trans_id);

	get_journal_list(jl);
	if (flushall) {
		if (flush_older_commits(s, jl) == 1) {
1002 1003 1004 1005
			/*
			 * list disappeared during flush_older_commits.
			 * return
			 */
1006 1007 1008 1009 1010
			goto put_jl;
		}
	}

	/* make sure nobody is trying to flush this one at the same time */
1011 1012
	reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);

1013
	if (!journal_list_still_alive(s, trans_id)) {
1014
		mutex_unlock(&jl->j_commit_mutex);
1015 1016 1017 1018 1019
		goto put_jl;
	}
	BUG_ON(jl->j_trans_id == 0);

	/* this commit is done, exit */
1020
	if (atomic_read(&jl->j_commit_left) <= 0) {
1021
		if (flushall) {
1022
			atomic_set(&jl->j_older_commits_done, 1);
1023
		}
1024
		mutex_unlock(&jl->j_commit_mutex);
1025 1026 1027 1028
		goto put_jl;
	}

	if (!list_empty(&jl->j_bh_list)) {
1029
		int ret;
1030 1031 1032 1033 1034

		/*
		 * We might sleep in numerous places inside
		 * write_ordered_buffers. Relax the write lock.
		 */
1035
		depth = reiserfs_write_unlock_nested(s);
1036 1037 1038 1039
		ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
					    journal, jl, &jl->j_bh_list);
		if (ret < 0 && retval == 0)
			retval = ret;
1040
		reiserfs_write_lock_nested(s, depth);
1041 1042 1043 1044
	}
	BUG_ON(!list_empty(&jl->j_bh_list));
	/*
	 * for the description block and all the log blocks, submit any buffers
1045 1046 1047 1048
	 * that haven't already reached the disk.  Try to write at least 256
	 * log blocks. later on, we will only wait on blocks that correspond
	 * to this transaction, but while we're unplugging we might as well
	 * get a chunk of data on there.
1049 1050
	 */
	atomic_inc(&journal->j_async_throttle);
1051 1052 1053 1054
	write_len = jl->j_len + 1;
	if (write_len < 256)
		write_len = 256;
	for (i = 0 ; i < write_len ; i++) {
1055 1056 1057
		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
		    SB_ONDISK_JOURNAL_SIZE(s);
		tbh = journal_find_get_block(s, bn);
1058
		if (tbh) {
1059
			if (buffer_dirty(tbh)) {
1060
		            depth = reiserfs_write_unlock_nested(s);
1061
			    ll_rw_block(REQ_OP_WRITE, 0, 1, &tbh);
1062
			    reiserfs_write_lock_nested(s, depth);
1063
			}
1064 1065
			put_bh(tbh) ;
		}
1066 1067 1068 1069 1070 1071 1072
	}
	atomic_dec(&journal->j_async_throttle);

	for (i = 0; i < (jl->j_len + 1); i++) {
		bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
		    (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
		tbh = journal_find_get_block(s, bn);
1073

1074 1075 1076
		depth = reiserfs_write_unlock_nested(s);
		__wait_on_buffer(tbh);
		reiserfs_write_lock_nested(s, depth);
1077 1078 1079 1080
		/*
		 * since we're using ll_rw_blk above, it might have skipped
		 * over a locked buffer.  Double check here
		 */
1081 1082
		/* redundant, sync_dirty_buffer() checks */
		if (buffer_dirty(tbh)) {
1083
			depth = reiserfs_write_unlock_nested(s);
1084
			sync_dirty_buffer(tbh);
1085
			reiserfs_write_lock_nested(s, depth);
1086
		}
1087
		if (unlikely(!buffer_uptodate(tbh))) {
Linus Torvalds's avatar
Linus Torvalds committed
1088
#ifdef CONFIG_REISERFS_CHECK
1089 1090
			reiserfs_warning(s, "journal-601",
					 "buffer write failed");
Linus Torvalds's avatar
Linus Torvalds committed
1091
#endif
1092 1093
			retval = -EIO;
		}
1094 1095 1096 1097
		/* once for journal_find_get_block */
		put_bh(tbh);
		/* once due to original getblk in do_journal_end */
		put_bh(tbh);
1098
		atomic_dec(&jl->j_commit_left);
1099 1100
	}

1101
	BUG_ON(atomic_read(&jl->j_commit_left) != 1);
1102

1103 1104
	/*
	 * If there was a write error in the journal - we can't commit
1105 1106
	 * this transaction - it will be invalid and, if successful,
	 * will just end up propagating the write error out to
1107 1108
	 * the file system.
	 */
1109 1110 1111 1112
	if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
		if (buffer_dirty(jl->j_commit_bh))
			BUG();
		mark_buffer_dirty(jl->j_commit_bh) ;
1113
		depth = reiserfs_write_unlock_nested(s);
1114
		if (reiserfs_barrier_flush(s))
1115
			__sync_dirty_buffer(jl->j_commit_bh,
1116
					REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
1117 1118
		else
			sync_dirty_buffer(jl->j_commit_bh);
1119
		reiserfs_write_lock_nested(s, depth);
1120
	}
1121

1122 1123
	/*
	 * If there was a write error in the journal - we can't commit this
1124
	 * transaction - it will be invalid and, if successful, will just end
1125 1126
	 * up propagating the write error out to the filesystem.
	 */
1127
	if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
Linus Torvalds's avatar
Linus Torvalds committed
1128
#ifdef CONFIG_REISERFS_CHECK
1129
		reiserfs_warning(s, "journal-615", "buffer write failed");
Linus Torvalds's avatar
Linus Torvalds committed
1130
#endif
1131 1132 1133 1134 1135
		retval = -EIO;
	}
	bforget(jl->j_commit_bh);
	if (journal->j_last_commit_id != 0 &&
	    (jl->j_trans_id - journal->j_last_commit_id) != 1) {
1136
		reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu",
1137 1138 1139 1140
				 journal->j_last_commit_id, jl->j_trans_id);
	}
	journal->j_last_commit_id = jl->j_trans_id;

1141 1142 1143 1144
	/*
	 * now, every commit block is on the disk.  It is safe to allow
	 * blocks freed during this transaction to be reallocated
	 */
1145 1146 1147 1148 1149 1150 1151
	cleanup_freed_for_journal_list(s, jl);

	retval = retval ? retval : journal->j_errno;

	/* mark the metadata dirty */
	if (!retval)
		dirty_one_transaction(s, jl);
1152
	atomic_dec(&jl->j_commit_left);
1153 1154

	if (flushall) {
1155
		atomic_set(&jl->j_older_commits_done, 1);
1156
	}
1157
	mutex_unlock(&jl->j_commit_mutex);
1158
put_jl:
1159 1160 1161 1162
	put_journal_list(s, jl);

	if (retval)
		reiserfs_abort(s, retval, "Journal write error in %s",
1163
			       __func__);
1164
	return retval;
Linus Torvalds's avatar
Linus Torvalds committed
1165 1166 1167
}

/*
1168 1169 1170
 * flush_journal_list frequently needs to find a newer transaction for a
 * given block.  This does that, or returns NULL if it can't find anything
 */
1171 1172 1173 1174 1175 1176
static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
							  reiserfs_journal_cnode
							  *cn)
{
	struct super_block *sb = cn->sb;
	b_blocknr_t blocknr = cn->blocknr;
Linus Torvalds's avatar
Linus Torvalds committed
1177

1178 1179 1180 1181 1182 1183 1184 1185
	cn = cn->hprev;
	while (cn) {
		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) {
			return cn->jlist;
		}
		cn = cn->hprev;
	}
	return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1186 1187
}

1188 1189 1190 1191
static void remove_journal_hash(struct super_block *,
				struct reiserfs_journal_cnode **,
				struct reiserfs_journal_list *, unsigned long,
				int);
Linus Torvalds's avatar
Linus Torvalds committed
1192 1193

/*
1194 1195 1196 1197 1198
 * once all the real blocks have been flushed, it is safe to remove them
 * from the journal list for this transaction.  Aside from freeing the
 * cnode, this also allows the block to be reallocated for data blocks
 * if it had been deleted.
 */
1199
static void remove_all_from_journal_list(struct super_block *sb,
1200 1201 1202
					 struct reiserfs_journal_list *jl,
					 int debug)
{
1203
	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1204 1205 1206
	struct reiserfs_journal_cnode *cn, *last;
	cn = jl->j_realblock;

1207 1208 1209
	/*
	 * which is better, to lock once around the whole loop, or
	 * to lock for each call to remove_journal_hash?
1210 1211 1212 1213
	 */
	while (cn) {
		if (cn->blocknr != 0) {
			if (debug) {
1214
				reiserfs_warning(sb, "reiserfs-2201",
1215 1216 1217 1218 1219
						 "block %u, bh is %d, state %ld",
						 cn->blocknr, cn->bh ? 1 : 0,
						 cn->state);
			}
			cn->state = 0;
1220
			remove_journal_hash(sb, journal->j_list_hash_table,
1221 1222 1223 1224
					    jl, cn->blocknr, 1);
		}
		last = cn;
		cn = cn->next;
1225
		free_cnode(sb, last);
1226 1227
	}
	jl->j_realblock = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1228 1229 1230
}

/*
1231 1232 1233 1234 1235 1236 1237
 * if this timestamp is greater than the timestamp we wrote last to the
 * header block, write it to the header block.  once this is done, I can
 * safely say the log area for this transaction won't ever be replayed,
 * and I can start releasing blocks in this transaction for reuse as data
 * blocks.  called by flush_journal_list, before it calls
 * remove_all_from_journal_list
 */
1238
static int _update_journal_header_block(struct super_block *sb,
1239
					unsigned long offset,
1240
					unsigned int trans_id)
1241 1242
{
	struct reiserfs_journal_header *jh;
1243
	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1244
	int depth;
Linus Torvalds's avatar
Linus Torvalds committed
1245

1246 1247
	if (reiserfs_is_journal_aborted(journal))
		return -EIO;
Linus Torvalds's avatar
Linus Torvalds committed
1248

1249 1250
	if (trans_id >= journal->j_last_flush_trans_id) {
		if (buffer_locked((journal->j_header_bh))) {
1251 1252 1253
			depth = reiserfs_write_unlock_nested(sb);
			__wait_on_buffer(journal->j_header_bh);
			reiserfs_write_lock_nested(sb, depth);
1254
			if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
Linus Torvalds's avatar
Linus Torvalds committed
1255
#ifdef CONFIG_REISERFS_CHECK
1256
				reiserfs_warning(sb, "journal-699",
1257
						 "buffer write failed");
Linus Torvalds's avatar
Linus Torvalds committed
1258
#endif
1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269
				return -EIO;
			}
		}
		journal->j_last_flush_trans_id = trans_id;
		journal->j_first_unflushed_offset = offset;
		jh = (struct reiserfs_journal_header *)(journal->j_header_bh->
							b_data);
		jh->j_last_flush_trans_id = cpu_to_le32(trans_id);
		jh->j_first_unflushed_offset = cpu_to_le32(offset);
		jh->j_mount_id = cpu_to_le32(journal->j_mount_id);

1270
		set_buffer_dirty(journal->j_header_bh);
1271
		depth = reiserfs_write_unlock_nested(sb);
1272 1273

		if (reiserfs_barrier_flush(sb))
1274
			__sync_dirty_buffer(journal->j_header_bh,
1275
					REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
1276
		else
1277
			sync_dirty_buffer(journal->j_header_bh);
1278

1279
		reiserfs_write_lock_nested(sb, depth);
1280
		if (!buffer_uptodate(journal->j_header_bh)) {
1281
			reiserfs_warning(sb, "journal-837",
1282
					 "IO error during journal replay");
1283 1284 1285 1286 1287 1288
			return -EIO;
		}
	}
	return 0;
}

1289
static int update_journal_header_block(struct super_block *sb,
1290
				       unsigned long offset,
1291
				       unsigned int trans_id)
1292
{
1293
	return _update_journal_header_block(sb, offset, trans_id);
Linus Torvalds's avatar
Linus Torvalds committed
1294
}
1295

1296 1297
/*
** flush any and all journal lists older than you are
Linus Torvalds's avatar
Linus Torvalds committed
1298 1299
** can only be called from flush_journal_list
*/
1300
static int flush_older_journal_lists(struct super_block *sb,
1301 1302 1303 1304
				     struct reiserfs_journal_list *jl)
{
	struct list_head *entry;
	struct reiserfs_journal_list *other_jl;
1305
	struct reiserfs_journal *journal = SB_JOURNAL(sb);
1306
	unsigned int trans_id = jl->j_trans_id;
1307

1308 1309
	/*
	 * we know we are the only ones flushing things, no extra race
1310 1311
	 * protection is required.
	 */
1312
restart:
1313 1314 1315 1316 1317 1318 1319 1320
	entry = journal->j_journal_list.next;
	/* Did we wrap? */
	if (entry == &journal->j_journal_list)
		return 0;
	other_jl = JOURNAL_LIST_ENTRY(entry);
	if (other_jl->j_trans_id < trans_id) {
		BUG_ON(other_jl->j_refcount <= 0);
		/* do not flush all */
1321
		flush_journal_list(sb, other_jl, 0);
1322 1323 1324 1325 1326

		/* other_jl is now deleted from the list */
		goto restart;
	}
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1327 1328 1329
}

static void del_from_work_list(struct super_block *s,
1330 1331 1332 1333 1334 1335 1336
			       struct reiserfs_journal_list *jl)
{
	struct reiserfs_journal *journal = SB_JOURNAL(s);
	if (!list_empty(&jl->j_working_list)) {
		list_del_init(&jl->j_working_list);
		journal->j_num_work_lists--;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1337 1338
}

1339 1340 1341 1342 1343 1344 1345 1346 1347 1348
/*
 * flush a journal list, both commit and real blocks
 *
 * always set flushall to 1, unless you are calling from inside
 * flush_journal_list
 *
 * IMPORTANT.  This can only be called while there are no journal writers,
 * and the journal is locked.  That means it can only be called from
 * do_journal_end, or by journal_release
 */
1349 1350
static int flush_journal_list(struct super_block *s,
			      struct reiserfs_journal_list *jl, int flushall)
Linus Torvalds's avatar
Linus Torvalds committed
1351
{
1352 1353 1354 1355 1356 1357 1358 1359 1360
	struct reiserfs_journal_list *pjl;
	struct reiserfs_journal_cnode *cn, *last;
	int count;
	int was_jwait = 0;
	int was_dirty = 0;
	struct buffer_head *saved_bh;
	unsigned long j_len_saved = jl->j_len;
	struct reiserfs_journal *journal = SB_JOURNAL(s);
	int err = 0;
1361
	int depth;
1362 1363 1364 1365

	BUG_ON(j_len_saved <= 0);

	if (atomic_read(&journal->j_wcount) != 0) {
1366
		reiserfs_warning(s, "clm-2048", "called with wcount %d",
1367 1368
				 atomic_read(&journal->j_wcount));
	}
Linus Torvalds's avatar
Linus Torvalds committed
1369