pnode.c 15.3 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 *  linux/fs/pnode.c
 *
 * (C) Copyright IBM Corporation 2005.
 *	Released under GPL v2.
 *	Author : Ram Pai (linuxram@us.ibm.com)
 *
 */
9
#include <linux/mnt_namespace.h>
10 11
#include <linux/mount.h>
#include <linux/fs.h>
12
#include <linux/nsproxy.h>
13
#include <uapi/linux/mount.h>
14
#include "internal.h"
15 16
#include "pnode.h"

17
/* return the next shared peer mount of @p */
18
static inline struct mount *next_peer(struct mount *p)
19
{
20
	return list_entry(p->mnt_share.next, struct mount, mnt_share);
21 22
}

23
static inline struct mount *first_slave(struct mount *p)
24
{
25
	return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
26 27
}

28 29 30 31 32
static inline struct mount *last_slave(struct mount *p)
{
	return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
}

33
static inline struct mount *next_slave(struct mount *p)
34
{
35
	return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
36 37
}

38 39 40
static struct mount *get_peer_under_root(struct mount *mnt,
					 struct mnt_namespace *ns,
					 const struct path *root)
41
{
42
	struct mount *m = mnt;
43 44 45

	do {
		/* Check the namespace first for optimization */
46
		if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root))
47
			return m;
48

49
		m = next_peer(m);
50
	} while (m != mnt);
51 52 53 54 55 56 57 58 59 60

	return NULL;
}

/*
 * Get ID of closest dominating peer group having a representative
 * under the given root.
 *
 * Caller must hold namespace_sem
 */
61
int get_dominating_id(struct mount *mnt, const struct path *root)
62
{
63
	struct mount *m;
64

65
	for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
66
		struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root);
67
		if (d)
Al Viro's avatar
Al Viro committed
68
			return d->mnt_group_id;
69 70 71 72 73
	}

	return 0;
}

74
static int do_make_slave(struct mount *mnt)
Ram Pai's avatar
Ram Pai committed
75
{
Al Viro's avatar
Al Viro committed
76
	struct mount *master, *slave_mnt;
Ram Pai's avatar
Ram Pai committed
77

Al Viro's avatar
Al Viro committed
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
	if (list_empty(&mnt->mnt_share)) {
		if (IS_MNT_SHARED(mnt)) {
			mnt_release_group_id(mnt);
			CLEAR_MNT_SHARED(mnt);
		}
		master = mnt->mnt_master;
		if (!master) {
			struct list_head *p = &mnt->mnt_slave_list;
			while (!list_empty(p)) {
				slave_mnt = list_first_entry(p,
						struct mount, mnt_slave);
				list_del_init(&slave_mnt->mnt_slave);
				slave_mnt->mnt_master = NULL;
			}
			return 0;
		}
Ram Pai's avatar
Ram Pai committed
94
	} else {
Al Viro's avatar
Al Viro committed
95 96 97 98 99 100 101 102 103 104 105
		struct mount *m;
		/*
		 * slave 'mnt' to a peer mount that has the
		 * same root dentry. If none is available then
		 * slave it to anything that is available.
		 */
		for (m = master = next_peer(mnt); m != mnt; m = next_peer(m)) {
			if (m->mnt.mnt_root == mnt->mnt.mnt_root) {
				master = m;
				break;
			}
Ram Pai's avatar
Ram Pai committed
106
		}
Al Viro's avatar
Al Viro committed
107 108 109
		list_del_init(&mnt->mnt_share);
		mnt->mnt_group_id = 0;
		CLEAR_MNT_SHARED(mnt);
Ram Pai's avatar
Ram Pai committed
110
	}
Al Viro's avatar
Al Viro committed
111 112 113 114 115
	list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
		slave_mnt->mnt_master = master;
	list_move(&mnt->mnt_slave, &master->mnt_slave_list);
	list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
	INIT_LIST_HEAD(&mnt->mnt_slave_list);
116
	mnt->mnt_master = master;
Ram Pai's avatar
Ram Pai committed
117 118 119
	return 0;
}

Nick Piggin's avatar
Nick Piggin committed
120 121 122
/*
 * vfsmount lock must be held for write
 */
123
void change_mnt_propagation(struct mount *mnt, int type)
124
{
125
	if (type == MS_SHARED) {
126
		set_mnt_shared(mnt);
Ram Pai's avatar
Ram Pai committed
127 128
		return;
	}
129
	do_make_slave(mnt);
Ram Pai's avatar
Ram Pai committed
130
	if (type != MS_SLAVE) {
131
		list_del_init(&mnt->mnt_slave);
132
		mnt->mnt_master = NULL;
Ram Pai's avatar
Ram Pai committed
133
		if (type == MS_UNBINDABLE)
134
			mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
Andries E. Brouwer's avatar
Andries E. Brouwer committed
135
		else
136
			mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE;
137
	}
138
}
139 140 141 142 143

/*
 * get the next mount in the propagation tree.
 * @m: the mount seen last
 * @origin: the original mount from where the tree walk initiated
144 145 146 147 148
 *
 * Note that peer groups form contiguous segments of slave lists.
 * We rely on that in get_source() to be able to find out if
 * vfsmount found while iterating with propagation_next() is
 * a peer of one we'd found earlier.
149
 */
150 151
static struct mount *propagation_next(struct mount *m,
					 struct mount *origin)
152
{
153
	/* are there any slaves of this mount? */
154
	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
155 156 157
		return first_slave(m);

	while (1) {
158
		struct mount *master = m->mnt_master;
159

160
		if (master == origin->mnt_master) {
161 162
			struct mount *next = next_peer(m);
			return (next == origin) ? NULL : next;
163
		} else if (m->mnt_slave.next != &master->mnt_slave_list)
164 165 166 167 168 169 170
			return next_slave(m);

		/* back at master */
		m = master;
	}
}

171 172 173 174 175 176 177 178 179 180 181 182 183
static struct mount *skip_propagation_subtree(struct mount *m,
						struct mount *origin)
{
	/*
	 * Advance m such that propagation_next will not return
	 * the slaves of m.
	 */
	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
		m = last_slave(m);

	return m;
}

Al Viro's avatar
Al Viro committed
184
static struct mount *next_group(struct mount *m, struct mount *origin)
185
{
Al Viro's avatar
Al Viro committed
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
	while (1) {
		while (1) {
			struct mount *next;
			if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
				return first_slave(m);
			next = next_peer(m);
			if (m->mnt_group_id == origin->mnt_group_id) {
				if (next == origin)
					return NULL;
			} else if (m->mnt_slave.next != &next->mnt_slave)
				break;
			m = next;
		}
		/* m is the last peer */
		while (1) {
			struct mount *master = m->mnt_master;
			if (m->mnt_slave.next != &master->mnt_slave_list)
				return next_slave(m);
			m = next_peer(master);
			if (master->mnt_group_id == origin->mnt_group_id)
				break;
			if (master->mnt_slave.next == &m->mnt_slave)
				break;
			m = master;
		}
		if (m == origin)
			return NULL;
213
	}
Al Viro's avatar
Al Viro committed
214
}
215

Al Viro's avatar
Al Viro committed
216 217
/* all accesses are serialized by namespace_sem */
static struct user_namespace *user_ns;
218
static struct mount *last_dest, *first_source, *last_source, *dest_master;
Al Viro's avatar
Al Viro committed
219 220 221
static struct mountpoint *mp;
static struct hlist_head *list;

222 223 224 225 226
static inline bool peers(struct mount *m1, struct mount *m2)
{
	return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
}

Al Viro's avatar
Al Viro committed
227 228 229 230 231 232 233 234 235 236
static int propagate_one(struct mount *m)
{
	struct mount *child;
	int type;
	/* skip ones added by this propagate_mnt() */
	if (IS_MNT_NEW(m))
		return 0;
	/* skip if mountpoint isn't covered by it */
	if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
		return 0;
237
	if (peers(m, last_dest)) {
Al Viro's avatar
Al Viro committed
238 239 240
		type = CL_MAKE_SHARED;
	} else {
		struct mount *n, *p;
241
		bool done;
Al Viro's avatar
Al Viro committed
242 243
		for (n = m; ; n = p) {
			p = n->mnt_master;
244
			if (p == dest_master || IS_MNT_MARKED(p))
Al Viro's avatar
Al Viro committed
245
				break;
246
		}
247 248 249 250 251 252 253 254 255 256
		do {
			struct mount *parent = last_source->mnt_parent;
			if (last_source == first_source)
				break;
			done = parent->mnt_master == p;
			if (done && peers(n, parent))
				break;
			last_source = last_source->mnt_master;
		} while (!done);

Al Viro's avatar
Al Viro committed
257 258 259 260
		type = CL_SLAVE;
		/* beginning of peer group among the slaves? */
		if (IS_MNT_SHARED(m))
			type |= CL_MAKE_SHARED;
261
	}
Al Viro's avatar
Al Viro committed
262 263 264 265 266 267 268
		
	/* Notice when we are propagating across user namespaces */
	if (m->mnt_ns->user_ns != user_ns)
		type |= CL_UNPRIVILEGED;
	child = copy_tree(last_source, last_source->mnt.mnt_root, type);
	if (IS_ERR(child))
		return PTR_ERR(child);
269
	child->mnt.mnt_flags &= ~MNT_LOCKED;
Al Viro's avatar
Al Viro committed
270 271 272 273 274 275 276 277 278
	mnt_set_mountpoint(m, mp, child);
	last_dest = m;
	last_source = child;
	if (m->mnt_master != dest_master) {
		read_seqlock_excl(&mount_lock);
		SET_MNT_MARK(m->mnt_master);
		read_sequnlock_excl(&mount_lock);
	}
	hlist_add_head(&child->mnt_hash, list);
279
	return count_mounts(m->mnt_ns, child);
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294
}

/*
 * mount 'source_mnt' under the destination 'dest_mnt' at
 * dentry 'dest_dentry'. And propagate that mount to
 * all the peer and slave mounts of 'dest_mnt'.
 * Link all the new mounts into a propagation tree headed at
 * source_mnt. Also link all the new mounts using ->mnt_list
 * headed at source_mnt's ->mnt_list
 *
 * @dest_mnt: destination mount.
 * @dest_dentry: destination dentry.
 * @source_mnt: source mount.
 * @tree_list : list of heads of trees to be attached.
 */
295
int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
Al Viro's avatar
Al Viro committed
296
		    struct mount *source_mnt, struct hlist_head *tree_list)
297
{
Al Viro's avatar
Al Viro committed
298
	struct mount *m, *n;
299
	int ret = 0;
300

Al Viro's avatar
Al Viro committed
301 302 303 304 305 306 307
	/*
	 * we don't want to bother passing tons of arguments to
	 * propagate_one(); everything is serialized by namespace_sem,
	 * so globals will do just fine.
	 */
	user_ns = current->nsproxy->mnt_ns->user_ns;
	last_dest = dest_mnt;
308
	first_source = source_mnt;
Al Viro's avatar
Al Viro committed
309 310 311 312 313 314 315 316 317
	last_source = source_mnt;
	mp = dest_mp;
	list = tree_list;
	dest_master = dest_mnt->mnt_master;

	/* all peers of dest_mnt, except dest_mnt itself */
	for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
		ret = propagate_one(n);
		if (ret)
318
			goto out;
Al Viro's avatar
Al Viro committed
319
	}
320

Al Viro's avatar
Al Viro committed
321 322 323 324 325 326 327 328 329 330 331
	/* all slave groups */
	for (m = next_group(dest_mnt, dest_mnt); m;
			m = next_group(m, dest_mnt)) {
		/* everything in that slave group */
		n = m;
		do {
			ret = propagate_one(n);
			if (ret)
				goto out;
			n = next_peer(n);
		} while (n != m);
332 333
	}
out:
Al Viro's avatar
Al Viro committed
334 335 336 337 338
	read_seqlock_excl(&mount_lock);
	hlist_for_each_entry(n, tree_list, mnt_hash) {
		m = n->mnt_parent;
		if (m->mnt_master != dest_mnt->mnt_master)
			CLEAR_MNT_MARK(m->mnt_master);
339
	}
Al Viro's avatar
Al Viro committed
340
	read_sequnlock_excl(&mount_lock);
341 342
	return ret;
}
343

344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
static struct mount *find_topper(struct mount *mnt)
{
	/* If there is exactly one mount covering mnt completely return it. */
	struct mount *child;

	if (!list_is_singular(&mnt->mnt_mounts))
		return NULL;

	child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
	if (child->mnt_mountpoint != mnt->mnt.mnt_root)
		return NULL;

	return child;
}

359 360 361
/*
 * return true if the refcount is greater than count
 */
362
static inline int do_refcount_check(struct mount *mnt, int count)
363
{
364
	return mnt_get_count(mnt) > count;
365 366 367 368 369 370 371 372 373
}

/*
 * check if the mount 'mnt' can be unmounted successfully.
 * @mnt: the mount to be checked for unmount
 * NOTE: unmounting 'mnt' would naturally propagate to all
 * other mounts its parent propagates to.
 * Check if any of these mounts that **do not have submounts**
 * have more references than 'refcnt'. If so return busy.
Nick Piggin's avatar
Nick Piggin committed
374
 *
Nick Piggin's avatar
Nick Piggin committed
375
 * vfsmount lock must be held for write
376
 */
377
int propagate_mount_busy(struct mount *mnt, int refcnt)
378
{
379
	struct mount *m, *child, *topper;
380
	struct mount *parent = mnt->mnt_parent;
381

382
	if (mnt == parent)
383 384 385 386 387 388 389
		return do_refcount_check(mnt, refcnt);

	/*
	 * quickly check if the current mount can be unmounted.
	 * If not, we don't have to go checking for all other
	 * mounts
	 */
390
	if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt))
391 392
		return 1;

393 394
	for (m = propagation_next(parent, parent); m;
	     		m = propagation_next(m, parent)) {
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
		int count = 1;
		child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
		if (!child)
			continue;

		/* Is there exactly one mount on the child that covers
		 * it completely whose reference should be ignored?
		 */
		topper = find_topper(child);
		if (topper)
			count += 1;
		else if (!list_empty(&child->mnt_mounts))
			continue;

		if (do_refcount_check(child, count))
			return 1;
411
	}
412
	return 0;
413 414
}

415 416 417 418 419 420 421 422 423 424 425 426 427 428
/*
 * Clear MNT_LOCKED when it can be shown to be safe.
 *
 * mount_lock lock must be held for write
 */
void propagate_mount_unlock(struct mount *mnt)
{
	struct mount *parent = mnt->mnt_parent;
	struct mount *m, *child;

	BUG_ON(parent == mnt);

	for (m = propagation_next(parent, parent); m;
			m = propagation_next(m, parent)) {
429
		child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
430 431 432 433 434
		if (child)
			child->mnt.mnt_flags &= ~MNT_LOCKED;
	}
}

435
static void umount_one(struct mount *mnt, struct list_head *to_umount)
436
{
437 438 439 440 441
	CLEAR_MNT_MARK(mnt);
	mnt->mnt.mnt_flags |= MNT_UMOUNT;
	list_del_init(&mnt->mnt_child);
	list_del_init(&mnt->mnt_umounting);
	list_move_tail(&mnt->mnt_list, to_umount);
442 443
}

444 445 446 447
/*
 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
 * parent propagates to.
 */
448 449 450
static bool __propagate_umount(struct mount *mnt,
			       struct list_head *to_umount,
			       struct list_head *to_restore)
451
{
452 453
	bool progress = false;
	struct mount *child;
454

455 456 457 458 459 460
	/*
	 * The state of the parent won't change if this mount is
	 * already unmounted or marked as without children.
	 */
	if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED))
		goto out;
461

462 463 464 465 466
	/* Verify topper is the only grandchild that has not been
	 * speculatively unmounted.
	 */
	list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
		if (child->mnt_mountpoint == mnt->mnt.mnt_root)
467
			continue;
468 469 470 471 472
		if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child))
			continue;
		/* Found a mounted child */
		goto children;
	}
473

474 475 476
	/* Mark mounts that can be unmounted if not locked */
	SET_MNT_MARK(mnt);
	progress = true;
477

478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
	/* If a mount is without children and not locked umount it. */
	if (!IS_MNT_LOCKED(mnt)) {
		umount_one(mnt, to_umount);
	} else {
children:
		list_move_tail(&mnt->mnt_umounting, to_restore);
	}
out:
	return progress;
}

static void umount_list(struct list_head *to_umount,
			struct list_head *to_restore)
{
	struct mount *mnt, *child, *tmp;
	list_for_each_entry(mnt, to_umount, mnt_list) {
		list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
			/* topper? */
			if (child->mnt_mountpoint == mnt->mnt.mnt_root)
				list_move_tail(&child->mnt_umounting, to_restore);
			else
				umount_one(child, to_umount);
Al Viro's avatar
Al Viro committed
500
		}
501 502 503
	}
}

504
static void restore_mounts(struct list_head *to_restore)
505
{
506 507
	/* Restore mounts to a clean working state */
	while (!list_empty(to_restore)) {
508 509 510
		struct mount *mnt, *parent;
		struct mountpoint *mp;

511 512 513
		mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
		CLEAR_MNT_MARK(mnt);
		list_del_init(&mnt->mnt_umounting);
514

515
		/* Should this mount be reparented? */
516 517 518 519 520 521
		mp = mnt->mnt_mp;
		parent = mnt->mnt_parent;
		while (parent->mnt.mnt_flags & MNT_UMOUNT) {
			mp = parent->mnt_mp;
			parent = parent->mnt_parent;
		}
522 523
		if (parent != mnt->mnt_parent)
			mnt_change_mountpoint(parent, mp, mnt);
524 525 526
	}
}

527 528 529 530 531 532 533 534 535
static void cleanup_umount_visitations(struct list_head *visited)
{
	while (!list_empty(visited)) {
		struct mount *mnt =
			list_first_entry(visited, struct mount, mnt_umounting);
		list_del_init(&mnt->mnt_umounting);
	}
}

536 537 538 539
/*
 * collect all mounts that receive propagation from the mount in @list,
 * and return these additional mounts in the same list.
 * @list: the list of mounts to be unmounted.
Nick Piggin's avatar
Nick Piggin committed
540 541
 *
 * vfsmount lock must be held for write
542
 */
543
int propagate_umount(struct list_head *list)
544
{
545
	struct mount *mnt;
546 547
	LIST_HEAD(to_restore);
	LIST_HEAD(to_umount);
548
	LIST_HEAD(visited);
549

550 551
	/* Find candidates for unmounting */
	list_for_each_entry_reverse(mnt, list, mnt_list) {
552 553
		struct mount *parent = mnt->mnt_parent;
		struct mount *m;
554

555 556 557 558 559 560 561 562 563 564
		/*
		 * If this mount has already been visited it is known that it's
		 * entire peer group and all of their slaves in the propagation
		 * tree for the mountpoint has already been visited and there is
		 * no need to visit them again.
		 */
		if (!list_empty(&mnt->mnt_umounting))
			continue;

		list_add_tail(&mnt->mnt_umounting, &visited);
565 566 567 568 569 570 571
		for (m = propagation_next(parent, parent); m;
		     m = propagation_next(m, parent)) {
			struct mount *child = __lookup_mnt(&m->mnt,
							   mnt->mnt_mountpoint);
			if (!child)
				continue;

572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592
			if (!list_empty(&child->mnt_umounting)) {
				/*
				 * If the child has already been visited it is
				 * know that it's entire peer group and all of
				 * their slaves in the propgation tree for the
				 * mountpoint has already been visited and there
				 * is no need to visit this subtree again.
				 */
				m = skip_propagation_subtree(m, parent);
				continue;
			} else if (child->mnt.mnt_flags & MNT_UMOUNT) {
				/*
				 * We have come accross an partially unmounted
				 * mount in list that has not been visited yet.
				 * Remember it has been visited and continue
				 * about our merry way.
				 */
				list_add_tail(&child->mnt_umounting, &visited);
				continue;
			}

593 594 595 596 597 598 599 600 601 602
			/* Check the child and parents while progress is made */
			while (__propagate_umount(child,
						  &to_umount, &to_restore)) {
				/* Is the parent a umount candidate? */
				child = child->mnt_parent;
				if (list_empty(&child->mnt_umounting))
					break;
			}
		}
	}
603

604 605
	umount_list(&to_umount, &to_restore);
	restore_mounts(&to_restore);
606
	cleanup_umount_visitations(&visited);
607
	list_splice_tail(&to_umount, list);
608

609 610
	return 0;
}