verifier.c 210 KB
Newer Older
1
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2
 * Copyright (c) 2016 Facebook
3
 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
4 5 6 7 8 9 10 11 12 13
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 */
14
#include <uapi/linux/btf.h>
15 16 17 18
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/bpf.h>
19
#include <linux/btf.h>
20
#include <linux/bpf_verifier.h>
21 22 23 24
#include <linux/filter.h>
#include <net/netlink.h>
#include <linux/file.h>
#include <linux/vmalloc.h>
25
#include <linux/stringify.h>
26 27
#include <linux/bsearch.h>
#include <linux/sort.h>
28
#include <linux/perf_event.h>
29
#include <linux/ctype.h>
30

31 32
#include "disasm.h"

33 34 35 36 37 38 39 40 41
static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
#define BPF_PROG_TYPE(_id, _name) \
	[_id] = & _name ## _verifier_ops,
#define BPF_MAP_TYPE(_id, _ops)
#include <linux/bpf_types.h>
#undef BPF_PROG_TYPE
#undef BPF_MAP_TYPE
};

42 43 44 45 46 47 48 49 50 51 52 53
/* bpf_check() is a static code analyzer that walks eBPF program
 * instruction by instruction and updates register/stack state.
 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
 *
 * The first pass is depth-first-search to check that the program is a DAG.
 * It rejects the following programs:
 * - larger than BPF_MAXINSNS insns
 * - if loop is present (detected via back-edge)
 * - unreachable insns exist (shouldn't be a forest. program = one function)
 * - out of bounds or malformed jumps
 * The second pass is all possible path descent from the 1st insn.
 * Since it's analyzing all pathes through the program, the length of the
54
 * analysis is limited to 64k insn, which may be hit even if total number of
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
 * insn is less then 4K, but there are too many branches that change stack/regs.
 * Number of 'branches to be analyzed' is limited to 1k
 *
 * On entry to each instruction, each register has a type, and the instruction
 * changes the types of the registers depending on instruction semantics.
 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
 * copied to R1.
 *
 * All registers are 64-bit.
 * R0 - return register
 * R1-R5 argument passing registers
 * R6-R9 callee saved registers
 * R10 - frame pointer read-only
 *
 * At the start of BPF program the register R1 contains a pointer to bpf_context
 * and has type PTR_TO_CTX.
 *
 * Verifier tracks arithmetic operations on pointers in case:
 *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
 * 1st insn copies R10 (which has FRAME_PTR) type into R1
 * and 2nd arithmetic instruction is pattern matched to recognize
 * that it wants to construct a pointer to some element within stack.
 * So after 2nd insn, the register R1 has type PTR_TO_STACK
 * (and -20 constant is saved for further stack bounds checking).
 * Meaning that this reg is a pointer to stack plus known immediate constant.
 *
82
 * Most of the time the registers have SCALAR_VALUE type, which
83
 * means the register has some value, but it's not a valid pointer.
84
 * (like pointer plus pointer becomes SCALAR_VALUE type)
85 86
 *
 * When verifier sees load or store instructions the type of base register
87 88
 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
 * four pointer types recognized by check_mem_access() function.
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
 *
 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
 * and the range of [ptr, ptr + map's value_size) is accessible.
 *
 * registers used to pass values to function calls are checked against
 * function argument constraints.
 *
 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
 * It means that the register type passed to this function must be
 * PTR_TO_STACK and it will be used inside the function as
 * 'pointer to map element key'
 *
 * For example the argument constraints for bpf_map_lookup_elem():
 *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
 *   .arg1_type = ARG_CONST_MAP_PTR,
 *   .arg2_type = ARG_PTR_TO_MAP_KEY,
 *
 * ret_type says that this function returns 'pointer to map elem value or null'
 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
 * 2nd argument should be a pointer to stack, which will be used inside
 * the helper function as a pointer to map element key.
 *
 * On the kernel side the helper function looks like:
 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 * {
 *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 *    void *key = (void *) (unsigned long) r2;
 *    void *value;
 *
 *    here kernel can access 'key' and 'map' pointers safely, knowing that
 *    [key, key + map->key_size) bytes are valid and were initialized on
 *    the stack of eBPF program.
 * }
 *
 * Corresponding eBPF program may look like:
 *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
 *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
 *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 * here verifier looks at prototype of map_lookup_elem() and sees:
 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
 *
 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
 * and were initialized prior to this call.
 * If it's ok, then verifier allows this BPF_CALL insn and looks at
 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
 * returns ether pointer to map value or NULL.
 *
 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
 * insn, the register holding that pointer in the true branch changes state to
 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
 * branch. See check_cond_jmp_op().
 *
 * After the call R0 is set to return type of the function and registers R1-R5
 * are set to NOT_INIT to indicate that they are no longer readable.
147 148 149 150 151 152 153 154 155 156 157 158
 *
 * The following reference types represent a potential reference to a kernel
 * resource which, after first being allocated, must be checked and freed by
 * the BPF program:
 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
 *
 * When the verifier sees a helper call return a reference type, it allocates a
 * pointer id for the reference and stores it in the current function state.
 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
 * passes through a NULL-check conditional. For the branch wherein the state is
 * changed to CONST_IMM, the verifier releases the reference.
159 160 161 162 163 164
 *
 * For each helper function that allocates a reference, such as
 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
 * bpf_sk_release(). When a reference type passes into the release function,
 * the verifier also releases the reference. If any unchecked or unreleased
 * reference remains at the end of the program, the verifier rejects it.
165 166
 */

167
/* verifier_state + insn_idx are pushed to stack when branch is encountered */
168
struct bpf_verifier_stack_elem {
169 170 171 172
	/* verifer state is 'st'
	 * before processing instruction 'insn_idx'
	 * and after processing instruction 'prev_insn_idx'
	 */
173
	struct bpf_verifier_state st;
174 175
	int insn_idx;
	int prev_insn_idx;
176
	struct bpf_verifier_stack_elem *next;
177 178
};

179
#define BPF_COMPLEXITY_LIMIT_INSNS	131072
180
#define BPF_COMPLEXITY_LIMIT_STACK	1024
181
#define BPF_COMPLEXITY_LIMIT_STATES	64
182

183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
#define BPF_MAP_PTR_UNPRIV	1UL
#define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
					  POISON_POINTER_DELTA))
#define BPF_MAP_PTR(X)		((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))

static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
{
	return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
}

static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
{
	return aux->map_state & BPF_MAP_PTR_UNPRIV;
}

static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
			      const struct bpf_map *map, bool unpriv)
{
	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
	unpriv |= bpf_map_ptr_unpriv(aux);
	aux->map_state = (unsigned long)map |
			 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
}
206

207 208
struct bpf_call_arg_meta {
	struct bpf_map *map_ptr;
209
	bool raw_mode;
210
	bool pkt_access;
211 212
	int regno;
	int access_size;
213 214
	s64 msize_smax_value;
	u64 msize_umax_value;
215
	int ptr_id;
216 217
};

218 219
static DEFINE_MUTEX(bpf_verifier_lock);

220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
static const struct bpf_line_info *
find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
{
	const struct bpf_line_info *linfo;
	const struct bpf_prog *prog;
	u32 i, nr_linfo;

	prog = env->prog;
	nr_linfo = prog->aux->nr_linfo;

	if (!nr_linfo || insn_off >= prog->len)
		return NULL;

	linfo = prog->aux->linfo;
	for (i = 1; i < nr_linfo; i++)
		if (insn_off < linfo[i].insn_off)
			break;

	return &linfo[i - 1];
}

241 242
void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
		       va_list args)
243
{
244
	unsigned int n;
245

246 247 248 249 250 251 252 253 254 255 256 257
	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);

	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
		  "verifier log line truncated - local buffer too short\n");

	n = min(log->len_total - log->len_used - 1, n);
	log->kbuf[n] = '\0';

	if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
		log->len_used += n;
	else
		log->ubuf = NULL;
258
}
259 260 261 262

/* log_level controls verbosity level of eBPF verifier.
 * bpf_verifier_log_write() is used to dump the verification trace to the log,
 * so the user can figure out what's wrong with the program
263
 */
264 265 266 267 268
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
					   const char *fmt, ...)
{
	va_list args;

269 270 271
	if (!bpf_verifier_log_needed(&env->log))
		return;

272
	va_start(args, fmt);
273
	bpf_verifier_vlog(&env->log, fmt, args);
274 275 276 277 278 279
	va_end(args);
}
EXPORT_SYMBOL_GPL(bpf_verifier_log_write);

__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
{
280
	struct bpf_verifier_env *env = private_data;
281 282
	va_list args;

283 284 285
	if (!bpf_verifier_log_needed(&env->log))
		return;

286
	va_start(args, fmt);
287
	bpf_verifier_vlog(&env->log, fmt, args);
288 289
	va_end(args);
}
290

291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
static const char *ltrim(const char *s)
{
	while (isspace(*s))
		s++;

	return s;
}

__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
					 u32 insn_off,
					 const char *prefix_fmt, ...)
{
	const struct bpf_line_info *linfo;

	if (!bpf_verifier_log_needed(&env->log))
		return;

	linfo = find_linfo(env, insn_off);
	if (!linfo || linfo == env->prev_linfo)
		return;

	if (prefix_fmt) {
		va_list args;

		va_start(args, prefix_fmt);
		bpf_verifier_vlog(&env->log, prefix_fmt, args);
		va_end(args);
	}

	verbose(env, "%s\n",
		ltrim(btf_name_by_offset(env->prog->aux->btf,
					 linfo->line_off)));

	env->prev_linfo = linfo;
}

327 328 329 330 331 332
static bool type_is_pkt_pointer(enum bpf_reg_type type)
{
	return type == PTR_TO_PACKET ||
	       type == PTR_TO_PACKET_META;
}

333 334
static bool reg_type_may_be_null(enum bpf_reg_type type)
{
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
	return type == PTR_TO_MAP_VALUE_OR_NULL ||
	       type == PTR_TO_SOCKET_OR_NULL;
}

static bool type_is_refcounted(enum bpf_reg_type type)
{
	return type == PTR_TO_SOCKET;
}

static bool type_is_refcounted_or_null(enum bpf_reg_type type)
{
	return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
}

static bool reg_is_refcounted(const struct bpf_reg_state *reg)
{
	return type_is_refcounted(reg->type);
}

static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
{
	return type_is_refcounted_or_null(reg->type);
}

static bool arg_type_is_refcounted(enum bpf_arg_type type)
{
	return type == ARG_PTR_TO_SOCKET;
}

/* Determine whether the function releases some resources allocated by another
 * function call. The first reference type argument will be assumed to be
 * released by release_reference().
 */
static bool is_release_function(enum bpf_func_id func_id)
{
370
	return func_id == BPF_FUNC_sk_release;
371 372
}

373 374 375
/* string representation of 'enum bpf_reg_type' */
static const char * const reg_type_str[] = {
	[NOT_INIT]		= "?",
376
	[SCALAR_VALUE]		= "inv",
377 378 379 380 381
	[PTR_TO_CTX]		= "ctx",
	[CONST_PTR_TO_MAP]	= "map_ptr",
	[PTR_TO_MAP_VALUE]	= "map_value",
	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
	[PTR_TO_STACK]		= "fp",
382
	[PTR_TO_PACKET]		= "pkt",
383
	[PTR_TO_PACKET_META]	= "pkt_meta",
384
	[PTR_TO_PACKET_END]	= "pkt_end",
385
	[PTR_TO_FLOW_KEYS]	= "flow_keys",
386 387
	[PTR_TO_SOCKET]		= "sock",
	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
388 389
};

390 391 392 393 394 395 396
static char slot_type_char[] = {
	[STACK_INVALID]	= '?',
	[STACK_SPILL]	= 'r',
	[STACK_MISC]	= 'm',
	[STACK_ZERO]	= '0',
};

397 398 399
static void print_liveness(struct bpf_verifier_env *env,
			   enum bpf_reg_liveness live)
{
400
	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
401 402 403 404 405
	    verbose(env, "_");
	if (live & REG_LIVE_READ)
		verbose(env, "r");
	if (live & REG_LIVE_WRITTEN)
		verbose(env, "w");
406 407
	if (live & REG_LIVE_DONE)
		verbose(env, "D");
408 409
}

410 411 412 413 414 415 416 417
static struct bpf_func_state *func(struct bpf_verifier_env *env,
				   const struct bpf_reg_state *reg)
{
	struct bpf_verifier_state *cur = env->cur_state;

	return cur->frame[reg->frameno];
}

418
static void print_verifier_state(struct bpf_verifier_env *env,
419
				 const struct bpf_func_state *state)
420
{
421
	const struct bpf_reg_state *reg;
422 423 424
	enum bpf_reg_type t;
	int i;

425 426
	if (state->frameno)
		verbose(env, " frame%d:", state->frameno);
427
	for (i = 0; i < MAX_BPF_REG; i++) {
428 429
		reg = &state->regs[i];
		t = reg->type;
430 431
		if (t == NOT_INIT)
			continue;
432 433 434
		verbose(env, " R%d", i);
		print_liveness(env, reg->live);
		verbose(env, "=%s", reg_type_str[t]);
435 436 437
		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
		    tnum_is_const(reg->var_off)) {
			/* reg->off should be 0 for SCALAR_VALUE */
438
			verbose(env, "%lld", reg->var_off.value + reg->off);
439 440
			if (t == PTR_TO_STACK)
				verbose(env, ",call_%d", func(env, reg)->callsite);
441
		} else {
442
			verbose(env, "(id=%d", reg->id);
443
			if (t != SCALAR_VALUE)
444
				verbose(env, ",off=%d", reg->off);
445
			if (type_is_pkt_pointer(t))
446
				verbose(env, ",r=%d", reg->range);
447 448 449
			else if (t == CONST_PTR_TO_MAP ||
				 t == PTR_TO_MAP_VALUE ||
				 t == PTR_TO_MAP_VALUE_OR_NULL)
450
				verbose(env, ",ks=%d,vs=%d",
451 452
					reg->map_ptr->key_size,
					reg->map_ptr->value_size);
453 454 455 456 457
			if (tnum_is_const(reg->var_off)) {
				/* Typically an immediate SCALAR_VALUE, but
				 * could be a pointer whose offset is too big
				 * for reg->off
				 */
458
				verbose(env, ",imm=%llx", reg->var_off.value);
459 460 461
			} else {
				if (reg->smin_value != reg->umin_value &&
				    reg->smin_value != S64_MIN)
462
					verbose(env, ",smin_value=%lld",
463 464 465
						(long long)reg->smin_value);
				if (reg->smax_value != reg->umax_value &&
				    reg->smax_value != S64_MAX)
466
					verbose(env, ",smax_value=%lld",
467 468
						(long long)reg->smax_value);
				if (reg->umin_value != 0)
469
					verbose(env, ",umin_value=%llu",
470 471
						(unsigned long long)reg->umin_value);
				if (reg->umax_value != U64_MAX)
472
					verbose(env, ",umax_value=%llu",
473 474 475
						(unsigned long long)reg->umax_value);
				if (!tnum_is_unknown(reg->var_off)) {
					char tn_buf[48];
476

477
					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
478
					verbose(env, ",var_off=%s", tn_buf);
479
				}
480
			}
481
			verbose(env, ")");
482
		}
483
	}
484
	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
		char types_buf[BPF_REG_SIZE + 1];
		bool valid = false;
		int j;

		for (j = 0; j < BPF_REG_SIZE; j++) {
			if (state->stack[i].slot_type[j] != STACK_INVALID)
				valid = true;
			types_buf[j] = slot_type_char[
					state->stack[i].slot_type[j]];
		}
		types_buf[BPF_REG_SIZE] = 0;
		if (!valid)
			continue;
		verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
		print_liveness(env, state->stack[i].spilled_ptr.live);
		if (state->stack[i].slot_type[0] == STACK_SPILL)
501
			verbose(env, "=%s",
502
				reg_type_str[state->stack[i].spilled_ptr.type]);
503 504
		else
			verbose(env, "=%s", types_buf);
505
	}
506 507 508 509 510 511
	if (state->acquired_refs && state->refs[0].id) {
		verbose(env, " refs=%d", state->refs[0].id);
		for (i = 1; i < state->acquired_refs; i++)
			if (state->refs[i].id)
				verbose(env, ",%d", state->refs[i].id);
	}
512
	verbose(env, "\n");
513 514
}

515 516 517 518 519 520 521 522 523 524 525 526 527 528
#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE)				\
static int copy_##NAME##_state(struct bpf_func_state *dst,		\
			       const struct bpf_func_state *src)	\
{									\
	if (!src->FIELD)						\
		return 0;						\
	if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) {			\
		/* internal bug, make state invalid to reject the program */ \
		memset(dst, 0, sizeof(*dst));				\
		return -EFAULT;						\
	}								\
	memcpy(dst->FIELD, src->FIELD,					\
	       sizeof(*src->FIELD) * (src->COUNT / SIZE));		\
	return 0;							\
529
}
530 531
/* copy_reference_state() */
COPY_STATE_FN(reference, acquired_refs, refs, 1)
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
/* copy_stack_state() */
COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
#undef COPY_STATE_FN

#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE)			\
static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
				  bool copy_old)			\
{									\
	u32 old_size = state->COUNT;					\
	struct bpf_##NAME##_state *new_##FIELD;				\
	int slot = size / SIZE;						\
									\
	if (size <= old_size || !size) {				\
		if (copy_old)						\
			return 0;					\
		state->COUNT = slot * SIZE;				\
		if (!size && old_size) {				\
			kfree(state->FIELD);				\
			state->FIELD = NULL;				\
		}							\
		return 0;						\
	}								\
	new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
				    GFP_KERNEL);			\
	if (!new_##FIELD)						\
		return -ENOMEM;						\
	if (copy_old) {							\
		if (state->FIELD)					\
			memcpy(new_##FIELD, state->FIELD,		\
			       sizeof(*new_##FIELD) * (old_size / SIZE)); \
		memset(new_##FIELD + old_size / SIZE, 0,		\
		       sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
	}								\
	state->COUNT = slot * SIZE;					\
	kfree(state->FIELD);						\
	state->FIELD = new_##FIELD;					\
	return 0;							\
}
570 571
/* realloc_reference_state() */
REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
572 573 574
/* realloc_stack_state() */
REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
#undef REALLOC_STATE_FN
575 576 577

/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
 * make it consume minimal amount of memory. check_stack_write() access from
578
 * the program calls into realloc_func_state() to grow the stack size.
579 580 581
 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
 * which realloc_stack_state() copies over. It points to previous
 * bpf_verifier_state which is never reallocated.
582
 */
583 584
static int realloc_func_state(struct bpf_func_state *state, int stack_size,
			      int refs_size, bool copy_old)
585
{
586 587 588 589 590 591 592 593 594 595
	int err = realloc_reference_state(state, refs_size, copy_old);
	if (err)
		return err;
	return realloc_stack_state(state, stack_size, copy_old);
}

/* Acquire a pointer id from the env and update the state->refs to include
 * this new pointer reference.
 * On success, returns a valid pointer id to associate with the register
 * On failure, returns a negative errno.
596
 */
597
static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
598
{
599 600 601 602 603 604 605 606 607 608
	struct bpf_func_state *state = cur_func(env);
	int new_ofs = state->acquired_refs;
	int id, err;

	err = realloc_reference_state(state, state->acquired_refs + 1, true);
	if (err)
		return err;
	id = ++env->id_gen;
	state->refs[new_ofs].id = id;
	state->refs[new_ofs].insn_idx = insn_idx;
609

610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628
	return id;
}

/* release function corresponding to acquire_reference_state(). Idempotent. */
static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
{
	int i, last_idx;

	if (!ptr_id)
		return -EFAULT;

	last_idx = state->acquired_refs - 1;
	for (i = 0; i < state->acquired_refs; i++) {
		if (state->refs[i].id == ptr_id) {
			if (last_idx && i != last_idx)
				memcpy(&state->refs[i], &state->refs[last_idx],
				       sizeof(*state->refs));
			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
			state->acquired_refs--;
629 630 631
			return 0;
		}
	}
632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
	return -EFAULT;
}

/* variation on the above for cases where we expect that there must be an
 * outstanding reference for the specified ptr_id.
 */
static int release_reference_state(struct bpf_verifier_env *env, int ptr_id)
{
	struct bpf_func_state *state = cur_func(env);
	int err;

	err = __release_reference_state(state, ptr_id);
	if (WARN_ON_ONCE(err != 0))
		verbose(env, "verifier internal error: can't release reference\n");
	return err;
}

static int transfer_reference_state(struct bpf_func_state *dst,
				    struct bpf_func_state *src)
{
	int err = realloc_reference_state(dst, src->acquired_refs, false);
	if (err)
		return err;
	err = copy_reference_state(dst, src);
	if (err)
		return err;
658 659 660
	return 0;
}

661 662
static void free_func_state(struct bpf_func_state *state)
{
663 664
	if (!state)
		return;
665
	kfree(state->refs);
666 667 668 669
	kfree(state->stack);
	kfree(state);
}

670 671
static void free_verifier_state(struct bpf_verifier_state *state,
				bool free_self)
672
{
673 674 675 676 677 678
	int i;

	for (i = 0; i <= state->curframe; i++) {
		free_func_state(state->frame[i]);
		state->frame[i] = NULL;
	}
679 680
	if (free_self)
		kfree(state);
681 682 683 684 685
}

/* copy verifier state from src to dst growing dst stack space
 * when necessary to accommodate larger src stack
 */
686 687
static int copy_func_state(struct bpf_func_state *dst,
			   const struct bpf_func_state *src)
688 689 690
{
	int err;

691 692 693 694 695 696
	err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
				 false);
	if (err)
		return err;
	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
	err = copy_reference_state(dst, src);
697 698 699 700 701
	if (err)
		return err;
	return copy_stack_state(dst, src);
}

702 703 704 705 706 707 708 709 710 711 712
static int copy_verifier_state(struct bpf_verifier_state *dst_state,
			       const struct bpf_verifier_state *src)
{
	struct bpf_func_state *dst;
	int i, err;

	/* if dst has more stack frames then src frame, free them */
	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
		free_func_state(dst_state->frame[i]);
		dst_state->frame[i] = NULL;
	}
713
	dst_state->speculative = src->speculative;
714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729
	dst_state->curframe = src->curframe;
	for (i = 0; i <= src->curframe; i++) {
		dst = dst_state->frame[i];
		if (!dst) {
			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
			if (!dst)
				return -ENOMEM;
			dst_state->frame[i] = dst;
		}
		err = copy_func_state(dst, src->frame[i]);
		if (err)
			return err;
	}
	return 0;
}

730 731 732 733 734 735
static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
		     int *insn_idx)
{
	struct bpf_verifier_state *cur = env->cur_state;
	struct bpf_verifier_stack_elem *elem, *head = env->head;
	int err;
736 737

	if (env->head == NULL)
738
		return -ENOENT;
739

740 741 742 743 744 745 746
	if (cur) {
		err = copy_verifier_state(cur, &head->st);
		if (err)
			return err;
	}
	if (insn_idx)
		*insn_idx = head->insn_idx;
747
	if (prev_insn_idx)
748 749
		*prev_insn_idx = head->prev_insn_idx;
	elem = head->next;
750
	free_verifier_state(&head->st, false);
751
	kfree(head);
752 753
	env->head = elem;
	env->stack_size--;
754
	return 0;
755 756
}

757
static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
758 759
					     int insn_idx, int prev_insn_idx,
					     bool speculative)
760
{
761
	struct bpf_verifier_state *cur = env->cur_state;
762
	struct bpf_verifier_stack_elem *elem;
763
	int err;
764

765
	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
766 767 768 769 770 771 772 773
	if (!elem)
		goto err;

	elem->insn_idx = insn_idx;
	elem->prev_insn_idx = prev_insn_idx;
	elem->next = env->head;
	env->head = elem;
	env->stack_size++;
774 775 776
	err = copy_verifier_state(&elem->st, cur);
	if (err)
		goto err;
777
	elem->st.speculative |= speculative;
778
	if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
779
		verbose(env, "BPF program is too complex\n");
780 781 782 783
		goto err;
	}
	return &elem->st;
err:
784 785
	free_verifier_state(env->cur_state, true);
	env->cur_state = NULL;
786
	/* pop all elements and return */
787
	while (!pop_stack(env, NULL, NULL));
788 789 790 791 792 793 794 795
	return NULL;
}

#define CALLER_SAVED_REGS 6
static const int caller_saved[CALLER_SAVED_REGS] = {
	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};

796 797
static void __mark_reg_not_init(struct bpf_reg_state *reg);

798 799 800 801 802
/* Mark the unknown part of a register (variable offset or scalar value) as
 * known to have the value @imm.
 */
static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
{
803 804 805
	/* Clear id, off, and union(map_ptr, range) */
	memset(((u8 *)reg) + sizeof(reg->type), 0,
	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
806 807 808 809 810 811 812
	reg->var_off = tnum_const(imm);
	reg->smin_value = (s64)imm;
	reg->smax_value = (s64)imm;
	reg->umin_value = imm;
	reg->umax_value = imm;
}

813 814 815 816
/* Mark the 'variable offset' part of a register as zero.  This should be
 * used only on registers holding a pointer type.
 */
static void __mark_reg_known_zero(struct bpf_reg_state *reg)
817
{
818
	__mark_reg_known(reg, 0);
819
}
820

821 822 823 824 825 826
static void __mark_reg_const_zero(struct bpf_reg_state *reg)
{
	__mark_reg_known(reg, 0);
	reg->type = SCALAR_VALUE;
}

827 828
static void mark_reg_known_zero(struct bpf_verifier_env *env,
				struct bpf_reg_state *regs, u32 regno)
829 830
{
	if (WARN_ON(regno >= MAX_BPF_REG)) {
831
		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
832 833 834 835 836 837 838 839
		/* Something bad happened, let's kill all regs */
		for (regno = 0; regno < MAX_BPF_REG; regno++)
			__mark_reg_not_init(regs + regno);
		return;
	}
	__mark_reg_known_zero(regs + regno);
}

840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864
static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
{
	return type_is_pkt_pointer(reg->type);
}

static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
{
	return reg_is_pkt_pointer(reg) ||
	       reg->type == PTR_TO_PACKET_END;
}

/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
				    enum bpf_reg_type which)
{
	/* The register can already have a range from prior markings.
	 * This is fine as long as it hasn't been advanced from its
	 * origin.
	 */
	return reg->type == which &&
	       reg->id == 0 &&
	       reg->off == 0 &&
	       tnum_equals_const(reg->var_off, 0);
}

865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930
/* Attempts to improve min/max values based on var_off information */
static void __update_reg_bounds(struct bpf_reg_state *reg)
{
	/* min signed is max(sign bit) | min(other bits) */
	reg->smin_value = max_t(s64, reg->smin_value,
				reg->var_off.value | (reg->var_off.mask & S64_MIN));
	/* max signed is min(sign bit) | max(other bits) */
	reg->smax_value = min_t(s64, reg->smax_value,
				reg->var_off.value | (reg->var_off.mask & S64_MAX));
	reg->umin_value = max(reg->umin_value, reg->var_off.value);
	reg->umax_value = min(reg->umax_value,
			      reg->var_off.value | reg->var_off.mask);
}

/* Uses signed min/max values to inform unsigned, and vice-versa */
static void __reg_deduce_bounds(struct bpf_reg_state *reg)
{
	/* Learn sign from signed bounds.
	 * If we cannot cross the sign boundary, then signed and unsigned bounds
	 * are the same, so combine.  This works even in the negative case, e.g.
	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
	 */
	if (reg->smin_value >= 0 || reg->smax_value < 0) {
		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
							  reg->umin_value);
		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
							  reg->umax_value);
		return;
	}
	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
	 * boundary, so we must be careful.
	 */
	if ((s64)reg->umax_value >= 0) {
		/* Positive.  We can't learn anything from the smin, but smax
		 * is positive, hence safe.
		 */
		reg->smin_value = reg->umin_value;
		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
							  reg->umax_value);
	} else if ((s64)reg->umin_value < 0) {
		/* Negative.  We can't learn anything from the smax, but smin
		 * is negative, hence safe.
		 */
		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
							  reg->umin_value);
		reg->smax_value = reg->umax_value;
	}
}

/* Attempts to improve var_off based on unsigned min/max information */
static void __reg_bound_offset(struct bpf_reg_state *reg)
{
	reg->var_off = tnum_intersect(reg->var_off,
				      tnum_range(reg->umin_value,
						 reg->umax_value));
}

/* Reset the min/max bounds of a register */
static void __mark_reg_unbounded(struct bpf_reg_state *reg)
{
	reg->smin_value = S64_MIN;
	reg->smax_value = S64_MAX;
	reg->umin_value = 0;
	reg->umax_value = U64_MAX;
}

931 932 933
/* Mark a register as having a completely unknown (scalar) value. */
static void __mark_reg_unknown(struct bpf_reg_state *reg)
{
934 935 936 937 938
	/*
	 * Clear type, id, off, and union(map_ptr, range) and
	 * padding between 'type' and union
	 */
	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
939 940
	reg->type = SCALAR_VALUE;
	reg->var_off = tnum_unknown;
941
	reg->frameno = 0;
942
	__mark_reg_unbounded(reg);
943 944
}

945 946
static void mark_reg_unknown(struct bpf_verifier_env *env,
			     struct bpf_reg_state *regs, u32 regno)
947 948
{
	if (WARN_ON(regno >= MAX_BPF_REG)) {
949
		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
950 951
		/* Something bad happened, let's kill all regs except FP */
		for (regno = 0; regno < BPF_REG_FP; regno++)
952 953 954 955 956 957 958 959 960 961 962 963
			__mark_reg_not_init(regs + regno);
		return;
	}
	__mark_reg_unknown(regs + regno);
}

static void __mark_reg_not_init(struct bpf_reg_state *reg)
{
	__mark_reg_unknown(reg);
	reg->type = NOT_INIT;
}

964 965
static void mark_reg_not_init(struct bpf_verifier_env *env,
			      struct bpf_reg_state *regs, u32 regno)
966 967
{
	if (WARN_ON(regno >= MAX_BPF_REG)) {
968
		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
969 970
		/* Something bad happened, let's kill all regs except FP */
		for (regno = 0; regno < BPF_REG_FP; regno++)
971 972 973 974
			__mark_reg_not_init(regs + regno);
		return;
	}
	__mark_reg_not_init(regs + regno);
975 976
}

977
static void init_reg_state(struct bpf_verifier_env *env,
978
			   struct bpf_func_state *state)
979
{
980
	struct bpf_reg_state *regs = state->regs;
981 982
	int i;

983
	for (i = 0; i < MAX_BPF_REG; i++) {
984
		mark_reg_not_init(env, regs, i);
985
		regs[i].live = REG_LIVE_NONE;
986
		regs[i].parent = NULL;
987
	}
988 989

	/* frame pointer */
990
	regs[BPF_REG_FP].type = PTR_TO_STACK;
991
	mark_reg_known_zero(env, regs, BPF_REG_FP);
992
	regs[BPF_REG_FP].frameno = state->frameno;
993 994 995

	/* 1st arg to a function */
	regs[BPF_REG_1].type = PTR_TO_CTX;
996
	mark_reg_known_zero(env, regs, BPF_REG_1);
997 998
}

999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009
#define BPF_MAIN_FUNC (-1)
static void init_func_state(struct bpf_verifier_env *env,
			    struct bpf_func_state *state,
			    int callsite, int frameno, int subprogno)
{
	state->callsite = callsite;
	state->frameno = frameno;
	state->subprogno = subprogno;
	init_reg_state(env, state);
}

1010 1011 1012 1013 1014 1015
enum reg_arg_type {
	SRC_OP,		/* register is used as source operand */
	DST_OP,		/* register is used as destination operand */
	DST_OP_NO_MARK	/* same as above, check only, don't mark */
};

1016 1017
static int cmp_subprogs(const void *a, const void *b)
{
1018 1019
	return ((struct bpf_subprog_info *)a)->start -
	       ((struct bpf_subprog_info *)b)->start;
1020 1021 1022 1023
}

static int find_subprog(struct bpf_verifier_env *env, int off)
{
1024
	struct bpf_subprog_info *p;
1025

1026 1027
	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
		    sizeof(env->subprog_info[0]), cmp_subprogs);
1028 1029
	if (!p)
		return -ENOENT;
1030
	return p - env->subprog_info;
1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045

}

static int add_subprog(struct bpf_verifier_env *env, int off)
{
	int insn_cnt = env->prog->len;
	int ret;

	if (off >= insn_cnt || off < 0) {
		verbose(env, "call to invalid destination\n");
		return -EINVAL;
	}
	ret = find_subprog(env, off);
	if (ret >= 0)
		return 0;
1046
	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1047 1048 1049
		verbose(env, "too many subprograms\n");
		return -E2BIG;
	}
1050 1051 1052
	env->subprog_info[env->subprog_cnt++].start = off;
	sort(env->subprog_info, env->subprog_cnt,
	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1053 1054 1055 1056 1057 1058
	return 0;
}

static int check_subprogs(struct bpf_verifier_env *env)
{
	int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
1059
	struct bpf_subprog_info *subprog = env->subprog_info;
1060 1061 1062
	struct bpf_insn *insn = env->prog->insnsi;
	int insn_cnt = env->prog->len;

1063 1064 1065 1066 1067
	/* Add entry function. */
	ret = add_subprog(env, 0);
	if (ret < 0)
		return ret;

1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
	/* determine subprog starts. The end is one before the next starts */
	for (i = 0; i < insn_cnt; i++) {
		if (insn[i].code != (BPF_JMP | BPF_CALL))
			continue;
		if (insn[i].src_reg != BPF_PSEUDO_CALL)
			continue;
		if (!env->allow_ptr_leaks) {
			verbose(env, "function calls to other bpf functions are allowed for root only\n");
			return -EPERM;
		}
		ret = add_subprog(env, i + insn[i].imm + 1);
		if (ret < 0)
			return ret;
	}

1083 1084 1085 1086 1087
	/* Add a fake 'exit' subprog which could simplify subprog iteration
	 * logic. 'subprog_cnt' should not be increased.
	 */
	subprog[env->subprog_cnt].start = insn_cnt;

1088 1089
	if (env->log.level > 1)
		for (i = 0; i < env->subprog_cnt; i++)
1090
			verbose(env, "func#%d @%d\n", i, subprog[i].start);
1091 1092

	/* now check that all jumps are within the same subprog */
1093 1094
	subprog_start = subprog[cur_subprog].start;
	subprog_end = subprog[cur_subprog + 1].start;
1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118
	for (i = 0; i < insn_cnt; i++) {
		u8 code = insn[i].code;

		if (BPF_CLASS(code) != BPF_JMP)
			goto next;
		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
			goto next;
		off = i + insn[i].off + 1;
		if (off < subprog_start || off >= subprog_end) {
			verbose(env, "jump out of range from insn %d to %d\n", i, off);
			return -EINVAL;
		}
next:
		if (i == subprog_end - 1) {
			/* to avoid fall-through from one subprog into another
			 * the last insn of the subprog should be either exit
			 * or unconditional jump back
			 */
			if (code != (BPF_JMP | BPF_EXIT) &&
			    code != (BPF_JMP | BPF_JA)) {
				verbose(env, "last insn is not an exit or jmp\n");
				return -EINVAL;
			}
			subprog_start = subprog_end;
1119 1120
			cur_subprog++;
			if (cur_subprog < env->subprog_cnt)
1121
				subprog_end = subprog[cur_subprog + 1].start;
1122 1123 1124 1125 1126
		}
	}
	return 0;
}

1127 1128 1129
/* Parentage chain of this register (or stack slot) should take care of all
 * issues like callee-saved registers, stack slot allocation time, etc.
 */
1130
static int mark_reg_read(struct bpf_verifier_env *env,
1131 1132
			 const struct bpf_reg_state *state,
			 struct bpf_reg_state *parent)
1133 1134
{
	bool writes = parent == state->parent; /* Observe write marks */
1135 1136 1137

	while (parent) {
		/* if read wasn't screened by an earlier write ... */
1138
		if (writes && state->live & REG_LIVE_WRITTEN)
1139
			break;
1140 1141 1142 1143 1144 1145
		if (parent->live & REG_LIVE_DONE) {
			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
				reg_type_str[parent->type],
				parent->var_off.value, parent->off);
			return -EFAULT;
		}
1146
		/* ... then we depend on parent's value */
1147
		parent->live |= REG_LIVE_READ;
1148 1149
		state = parent;
		parent = state->parent;
1150
		writes = true;
1151
	}
1152
	return 0;
1153 1154 1155
}

static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
1156 1157
			 enum reg_arg_type t)
{
1158 1159 1160
	struct bpf_verifier_state *vstate = env->cur_state;
	struct bpf_func_state *state = vstate->frame[vstate->curframe];
	struct bpf_reg_state *regs = state->regs;
1161

1162
	if (regno >= MAX_BPF_REG) {
1163
		verbose(env, "R%d is invalid\n", regno);
1164 1165 1166 1167 1168 1169
		return -EINVAL;
	}

	if (t == SRC_OP) {
		/* check whether register used as source operand can be read */
		if (regs[regno].type == NOT_INIT) {
1170
			verbose(env, "R%d !read_ok\n", regno);
1171 1172
			return -EACCES;
		}
1173 1174 1175 1176
		/* We don't need to worry about FP liveness because it's read-only */
		if (regno != BPF_REG_FP)
			return mark_reg_read(env, &regs[regno],
					     regs[regno].parent);
1177 1178 1179
	} else {
		/* check whether register used as dest operand can be written to */
		if (regno == BPF_REG_FP) {
1180
			verbose(env, "frame pointer is read only\n");
1181 1182
			return -EACCES;
		}
1183
		regs[regno].live |= REG_LIVE_WRITTEN;
1184
		if (t == DST_OP)
1185
			mark_reg_unknown(env, regs, regno);
1186 1187 1188 1189
	}
	return 0;
}

1190 1191 1192 1193 1194 1195 1196
static bool is_spillable_regtype(enum bpf_reg_type type)
{
	switch (type) {
	case PTR_TO_MAP_VALUE:
	case PTR_TO_MAP_VALUE_OR_NULL:
	case PTR_TO_STACK:
	case PTR_TO_CTX:
1197
	case PTR_TO_PACKET:
1198
	case PTR_TO_PACKET_META:
1199
	case PTR_TO_PACKET_END:
1200
	case PTR_TO_FLOW_KEYS:
1201
	case CONST_PTR_TO_MAP:
1202 1203
	case PTR_TO_SOCKET:
	case PTR_TO_SOCKET_OR_NULL:
1204 1205 1206 1207 1208 1209
		return true;
	default:
		return false;
	}
}

1210 1211 1212 1213 1214 1215
/* Does this register contain a constant zero? */
static bool register_is_null(struct bpf_reg_state *reg)
{
	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
}

1216 1217 1218
/* check_stack_read/write functions track spill/fill of registers,
 * stack boundary and alignment are checked in check_mem_access()
 */
1219
static int check_stack_write(struct bpf_verifier_env *env,
1220
			     struct bpf_func_state *state, /* func where register points to */
1221
			     int off, int size, int value_regno, int insn_idx)
1222
{
1223
	struct bpf_func_state *cur; /* state of the current function */
1224
	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
1225
	enum bpf_reg_type type;
1226

1227
	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
1228
				 state->acquired_refs, true);
1229 1230
	if (err)
		return err;
1231 1232 1233
	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
	 * so it's aligned access and [off, off + size) are within stack limits
	 */
1234 1235 1236 1237 1238 1239
	if (!env->allow_ptr_leaks &&
	    state->stack[spi].slot_type[0] == STACK_SPILL &&
	    size != BPF_REG_SIZE) {
		verbose(env, "attempt to corrupt spilled pointer on stack\n");
		return -EACCES;
	}
1240

1241
	cur = env->cur_state->frame[env->cur_state->curframe];
1242
	if (value_regno >= 0 &&
1243
	    is_spillable_regtype((type = cur->regs[value_regno].type))) {
1244 1245

		/* register containing pointer is being spilled into stack */
1246
		if (size != BPF_REG_SIZE) {
1247
			verbose(env, "invalid size of register spill\n");
1248 1249 1250
			return -EACCES;
		}

1251 1252 1253 1254 1255
		if (state != cur && type == PTR_TO_STACK) {
			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
			return -EINVAL;
		}

1256
		/* save register state */
1257
		state->stack[spi].spilled_ptr = cur->regs[value_regno];
1258
		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1259

1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284
		for (i = 0; i < BPF_REG_SIZE; i++) {
			if (state->stack[spi].slot_type[i] == STACK_MISC &&
			    !env->allow_ptr_leaks) {
				int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
				int soff = (-spi - 1) * BPF_REG_SIZE;

				/* detected reuse of integer stack slot with a pointer
				 * which means either llvm is reusing stack slot or
				 * an attacker is trying to exploit CVE-2018-3639
				 * (speculative store bypass)
				 * Have to sanitize that slot with preemptive
				 * store of zero.
				 */
				if (*poff && *poff != soff) {
					/* disallow programs where single insn stores
					 * into two different stack slots, since verifier
					 * cannot sanitize them
					 */
					verbose(env,
						"insn %d cannot access two stack slots fp%d and fp%d",
						insn_idx, *poff, soff);
					return -EINVAL;
				}
				*poff = soff;
			}
1285
			state->stack[spi].slot_type[i] = STACK_SPILL;
1286
		}
1287
	} else {
1288 1289
		u8 type = STACK_MISC;

1290 1291
		/* regular write of data into stack destroys any spilled ptr */
		state->stack[spi].spilled_ptr.type = NOT_INIT;
1292 1293 1294 1295
		/* Mark slots as STACK_MISC if they belonged to spilled ptr. */
		if (state->stack[spi].slot_type[0] == STACK_SPILL)
			for (i = 0; i < BPF_REG_SIZE; i++)
				state->stack[spi].slot_type[i] = STACK_MISC;
1296

1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312
		/* only mark the slot as written if all 8 bytes were written
		 * otherwise read propagation may incorrectly stop too soon
		 * when stack slots are partially written.
		 * This heuristic means that read propagation will be
		 * conservative, since it will add reg_live_read marks
		 * to stack slots all the way to first state when programs
		 * writes+reads less than 8 bytes
		 */
		if (size == BPF_REG_SIZE)
			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;

		/* when we zero initialize stack slots mark them as such */
		if (value_regno >= 0 &&
		    register_is_null(&cur->regs[value_regno]))
			type = STACK_ZERO;

1313
		/* Mark slots affected by this stack write. */
1314
		for (i = 0; i < size; i++)
1315
			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
1316
				type;
1317 1318 1319 1320
	}
	return 0;
}

1321
static int check_stack_read(struct bpf_verifier_env *env,
1322 1323
			    struct bpf_func_state *reg_state /* func where register points to */,
			    int off, int size, int value_regno)
1324
{
1325 1326
	struct bpf_verifier_state *vstate = env->cur_state;
	struct bpf_func_state *state = vstate->frame[vstate->curframe];
1327 1328
	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
	u8 *stype;
1329

1330
	if (reg_state->allocated_stack <= slot) {
1331 1332 1333 1334
		verbose(env, "invalid read from stack off %d+0 size %d\n",
			off, size);
		return -EACCES;
	}
1335
	stype = reg_state->stack[spi].slot_type;
1336

1337
	if (stype[0] == STACK_SPILL) {
1338
		if (size != BPF_REG_SIZE) {
1339
			verbose(env, "invalid size of register spill\n");
1340 1341
			return -EACCES;
		}
1342
		for (i = 1; i < BPF_REG_SIZE; i++) {
1343
			if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
1344
				verbose(env, "corrupted spill memory\n");
1345 1346 1347 1348
				return -EACCES;
			}
		}

1349
		if (value_regno >= 0) {
1350
			/* restore register state from stack */
1351
			state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
1352 1353 1354 1355 1356
			/* mark reg as written since spilled pointer state likely
			 * has its liveness marks cleared by is_state_visited()
			 * which resets stack/reg liveness for state transitions
			 */
			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1357
		}
1358 1359
		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
			      reg_state->stack[spi].spilled_ptr.parent);
1360 1361
		return 0;
	} else {
1362 1363
		int zeros = 0;

1364
		for (i = 0; i < size; i++) {
1365 1366 1367 1368 1369
			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
				continue;
			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
				zeros++;
				continue;
1370
			}
1371 1372 1373 1374
			verbose(env, "invalid read from stack off %d+%d size %d\n",
				off, i, size);
			return -EACCES;
		}
1375 1376
		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
			      reg_state->stack[spi].spilled_ptr.parent);
1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387
		if (value_regno >= 0) {
			if (zeros == size) {
				/* any size read into register is zero extended,
				 * so the whole register == const_zero
				 */
				__mark_reg_const_zero(&state->regs[value_regno]);
			} else {
				/* have read misc data from the stack */
				mark_reg_unknown(env, state->regs, value_regno);
			}
			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
1388 1389 1390 1391 1392
		}
		return 0;
	}
}

1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417
static int check_stack_access(struct bpf_verifier_env *env,
			      const struct bpf_reg_state *reg,
			      int off, int size)
{
	/* Stack accesses must be at a fixed offset, so that we
	 * can determine what type of data were returned. See
	 * check_stack_read().
	 */
	if (!tnum_is_const(reg->var_off)) {
		char tn_buf[48];

		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
		verbose(env, "variable stack access var_off=%s off=%d size=%d",
			tn_buf, off, size);
		return -EACCES;
	}

	if (off >= 0 || off < -MAX_BPF_STACK) {
		verbose(env, "invalid stack off=%d size=%d\n", off, size);
		return -EACCES;
	}

	return 0;
}

1418
/* check read/write into map element returned by bpf_map_lookup_elem() */
1419
static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
1420
			      int size, bool zero_size_allowed)
1421
{
1422 1423
	struct bpf_reg_state *regs = cur_regs(env);
	struct bpf_map *map = regs[regno].map_ptr;
1424

1425 1426
	if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
	    off + size > map->value_size) {
1427
		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
1428 1429 1430 1431 1432 1433
			map->value_size, off, size);
		return -EACCES;
	}
	return 0;
}

1434 1435
/* check read/write into a map element with possible variable offset */
static int check_map_access(struct bpf_verifier_env *env, u32 regno,
1436
			    int off, int size, bool zero_size_allowed)
1437
{
1438 1439
	struct bpf_verifier_state *vstate = env->cur_state;
	struct bpf_func_state *state = vstate->frame[vstate->curframe];
1440 1441 1442
	struct bpf_reg_state *reg = &state->regs[regno];
	int err;

1443 1444 1445
	/* We may have adjusted the register to this map value, so we
	 * need to try adding each of min_value and max_value to off
	 * to make sure our theoretical access will be safe.
1446
	 */
1447 1448
	if (env->log.level)
		print_verifier_state(env, state);
1449

1450 1451 1452 1453 1454 1455
	/* The minimum value is only important with signed
	 * comparisons where we can't assume the floor of a
	 * value is 0.  If we are using signed variables for our
	 * index'es we need to make sure that whatever we use
	 * will have a set floor within our range.
	 */
1456 1457 1458 1459
	if (reg->smin_value < 0 &&
	    (reg->smin_value == S64_MIN ||
	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
	      reg->smin_value + off < 0)) {
1460
		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1461 1462 1463
			regno);
		return -EACCES;
	}
1464 1465
	err = __check_map_access(env, regno, reg->smin_value + off, size,
				 zero_size_allowed);
1466
	if (err) {
1467 1468
		verbose(env, "R%d min value is outside of the array range\n",
			regno);
1469 1470 1471
		return err;
	}

1472 1473 1474
	/* If we haven't set a max value then we need to bail since we can't be
	 * sure we won't do bad things.
	 * If reg->umax_value + off could overflow, treat that as unbounded too.
1475
	 */
1476
	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
1477
		verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
1478 1479 1480
			regno);
		return -EACCES;
	}
1481 1482
	err = __check_map_access(env, regno, reg->umax_value + off, size,
				 zero_size_allowed);
1483
	if (err)
1484 1485
		verbose(env, "R%d max value is outside of the array range\n",
			regno);
1486
	return err;
1487 1488
}

1489 1490
#define MAX_PACKET_OFF 0xffff

1491
static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
1492 1493
				       const struct bpf_call_arg_meta *meta,
				       enum bpf_access_type t)
1494
{
1495
	switch (env->prog->type) {
1496
	/* Program types only with direct read access go here! */
1497 1498
	case BPF_PROG_TYPE_LWT_IN:
	case BPF_PROG_TYPE_LWT_OUT:
1499
	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
1500
	case BPF_PROG_TYPE_SK_REUSEPORT:
1501
	case BPF_PROG_TYPE_FLOW_DISSECTOR:
1502
	case BPF_PROG_TYPE_CGROUP_SKB:
1503 1504
		if (t == BPF_WRITE)
			return false;
1505
		/* fallthrough */
1506 1507

	/* Program types with direc