Commit 0841d986 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2018-06-16

The following pull-request contains BPF updates for your *net* tree.

The main changes are:

1) Fix a panic in devmap handling in generic XDP where return type
   of __devmap_lookup_elem() got changed recently but generic XDP
   code missed the related update, from Toshiaki.

2) Fix a freeze when BPF progs are loaded that include BPF to BPF
   calls when JIT is enabled where we would later bail out via error
   path w/o dropping kallsyms, and another one to silence syzkaller
   splats from locking prog read-only, from Daniel.

3) Fix a bug in test_offloads.py BPF selftest which must not assume
   that the underlying system have no BPF progs loaded prior to test,
   and one in bpftool to fix accuracy of program load time, from Jakub.

4) Fix a bug in bpftool's probe for availability of the bpf(2)
   BPF_TASK_FD_QUERY subcommand, from Yonghong.

5) Fix a regression in AF_XDP's XDP_SKB receive path where queue
   id check got erroneously removed, from Björn.

6) Fix missing state cleanup in BPF's xfrm tunnel test, from William.

7) Check tunnel type more accurately in BPF's tunnel collect metadata
   kselftest, from Jian.

8) Fix missing Kconfig fragments for BPF kselftests, from Anders.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 35773c93 6d5fc195
......@@ -488,12 +488,15 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
/* Map specifics */
struct xdp_buff;
struct sk_buff;
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
void __dev_map_flush(struct bpf_map *map);
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx);
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
struct bpf_prog *xdp_prog);
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
......@@ -586,6 +589,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
return 0;
}
struct sk_buff;
static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
struct sk_buff *skb,
struct bpf_prog *xdp_prog)
{
return 0;
}
static inline
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
{
......
......@@ -19,6 +19,7 @@
#include <linux/cryptohash.h>
#include <linux/set_memory.h>
#include <linux/kallsyms.h>
#include <linux/if_vlan.h>
#include <net/sch_generic.h>
......@@ -469,7 +470,8 @@ struct sock_fprog_kern {
};
struct bpf_binary_header {
unsigned int pages;
u16 pages;
u16 locked:1;
u8 image[];
};
......@@ -671,15 +673,18 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
#define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
{
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
fp->locked = 1;
WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages));
if (set_memory_ro((unsigned long)fp, fp->pages))
fp->locked = 0;
#endif
}
static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
{
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
if (fp->locked) {
WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
/* In case set_memory_rw() fails, we want to be the first
......@@ -687,34 +692,30 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
*/
fp->locked = 0;
}
#endif
}
static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
{
WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages));
}
static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
{
WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
}
#else
static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
{
}
static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
{
}
static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
{
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
hdr->locked = 1;
if (set_memory_ro((unsigned long)hdr, hdr->pages))
hdr->locked = 0;
#endif
}
static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
{
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
if (hdr->locked) {
WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
/* In case set_memory_rw() fails, we want to be the first
* to crash here instead of some random place later on.
*/
hdr->locked = 0;
}
#endif
}
#endif /* CONFIG_ARCH_HAS_SET_MEMORY */
static inline struct bpf_binary_header *
bpf_jit_binary_hdr(const struct bpf_prog *fp)
......@@ -725,6 +726,22 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
return (void *)addr;
}
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
{
if (!fp->locked)
return -ENOLCK;
if (fp->jited) {
const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
if (!hdr->locked)
return -ENOLCK;
}
return 0;
}
#endif
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
{
......@@ -786,6 +803,21 @@ static inline bool bpf_dump_raw_ok(void)
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
const struct bpf_insn *patch, u32 len);
static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
struct net_device *fwd)
{
unsigned int len;
if (unlikely(!(fwd->flags & IFF_UP)))
return -ENETDOWN;
len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
if (skb->len > len)
return -EMSGSIZE;
return 0;
}
/* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
* same cpu context. Further for best results no more than a single map
* for the do_redirect/do_flush pair should be used. This limitation is
......@@ -961,6 +993,9 @@ static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
}
#endif /* CONFIG_BPF_JIT */
void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp);
void bpf_prog_kallsyms_del_all(struct bpf_prog *fp);
#define BPF_ANC BIT(15)
static inline bool bpf_needs_clear_a(const struct sock_filter *first)
......
......@@ -350,6 +350,20 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
return prog_adj;
}
void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
{
int i;
for (i = 0; i < fp->aux->func_cnt; i++)
bpf_prog_kallsyms_del(fp->aux->func[i]);
}
void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
{
bpf_prog_kallsyms_del_subprogs(fp);
bpf_prog_kallsyms_del(fp);
}
#ifdef CONFIG_BPF_JIT
/* All BPF JIT sysctl knobs here. */
int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
......@@ -584,6 +598,8 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
bpf_fill_ill_insns(hdr, size);
hdr->pages = size / PAGE_SIZE;
hdr->locked = 0;
hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
PAGE_SIZE - sizeof(*hdr));
start = (get_random_int() % hole) & ~(alignment - 1);
......@@ -1434,6 +1450,33 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
return 0;
}
static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp)
{
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
int i, err;
for (i = 0; i < fp->aux->func_cnt; i++) {
err = bpf_prog_check_pages_ro_single(fp->aux->func[i]);
if (err)
return err;
}
return bpf_prog_check_pages_ro_single(fp);
#endif
return 0;
}
static void bpf_prog_select_func(struct bpf_prog *fp)
{
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
#else
fp->bpf_func = __bpf_prog_ret0_warn;
#endif
}
/**
* bpf_prog_select_runtime - select exec runtime for BPF program
* @fp: bpf_prog populated with internal BPF program
......@@ -1444,13 +1487,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
*/
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
{
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
/* In case of BPF to BPF calls, verifier did all the prep
* work with regards to JITing, etc.
*/
if (fp->bpf_func)
goto finalize;
fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
#else
fp->bpf_func = __bpf_prog_ret0_warn;
#endif
bpf_prog_select_func(fp);
/* eBPF JITs can rewrite the program in case constant
* blinding is active. However, in case of error during
......@@ -1471,6 +1514,8 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
if (*err)
return fp;
}
finalize:
bpf_prog_lock_ro(fp);
/* The tail call compatibility check can only be done at
......@@ -1479,7 +1524,17 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
* all eBPF JITs might immediately support all features.
*/
*err = bpf_check_tail_call(fp);
if (*err)
return fp;
/* Checkpoint: at this point onwards any cBPF -> eBPF or
* native eBPF program is read-only. If we failed to change
* the page attributes (e.g. allocation failure from
* splitting large pages), then reject the whole program
* in order to guarantee not ending up with any W+X pages
* from BPF side in kernel.
*/
*err = bpf_prog_check_pages_ro_locked(fp);
return fp;
}
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
......
......@@ -345,6 +345,20 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
return bq_enqueue(dst, xdpf, dev_rx);
}
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
struct bpf_prog *xdp_prog)
{
int err;
err = __xdp_generic_ok_fwd_dev(skb, dst->dev);
if (unlikely(err))
return err;
skb->dev = dst->dev;
generic_xdp_tx(skb, xdp_prog);
return 0;
}
static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
......
......@@ -1034,14 +1034,9 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
{
if (atomic_dec_and_test(&prog->aux->refcnt)) {
int i;
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
for (i = 0; i < prog->aux->func_cnt; i++)
bpf_prog_kallsyms_del(prog->aux->func[i]);
bpf_prog_kallsyms_del(prog);
bpf_prog_kallsyms_del_all(prog);
call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
}
......@@ -1358,9 +1353,7 @@ static int bpf_prog_load(union bpf_attr *attr)
if (err < 0)
goto free_used_maps;
/* eBPF program is ready to be JITed */
if (!prog->bpf_func)
prog = bpf_prog_select_runtime(prog, &err);
prog = bpf_prog_select_runtime(prog, &err);
if (err < 0)
goto free_used_maps;
......@@ -1384,6 +1377,7 @@ static int bpf_prog_load(union bpf_attr *attr)
return err;
free_used_maps:
bpf_prog_kallsyms_del_subprogs(prog);
free_used_maps(prog->aux);
free_prog:
bpf_prog_uncharge_memlock(prog);
......
......@@ -3214,20 +3214,6 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
{
unsigned int len;
if (unlikely(!(fwd->flags & IFF_UP)))
return -ENETDOWN;
len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
if (skb->len > len)
return -EMSGSIZE;
return 0;
}
static int xdp_do_generic_redirect_map(struct net_device *dev,
struct sk_buff *skb,
struct xdp_buff *xdp,
......@@ -3256,10 +3242,11 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
}
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
struct bpf_dtab_netdev *dst = fwd;
err = dev_map_generic_redirect(dst, skb, xdp_prog);
if (unlikely(err))
goto err;
skb->dev = fwd;
generic_xdp_tx(skb, xdp_prog);
} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
struct xdp_sock *xs = fwd;
......
......@@ -118,6 +118,9 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
u64 addr;
int err;
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
if (!xskq_peek_addr(xs->umem->fq, &addr) ||
len > xs->umem->chunk_size_nohr) {
xs->rx_dropped++;
......
......@@ -29,9 +29,10 @@ static bool has_perf_query_support(void)
if (perf_query_supported)
goto out;
fd = open(bin_name, O_RDONLY);
fd = open("/", O_RDONLY);
if (fd < 0) {
p_err("perf_query_support: %s", strerror(errno));
p_err("perf_query_support: cannot open directory \"/\" (%s)",
strerror(errno));
goto out;
}
......
......@@ -90,7 +90,9 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
}
wallclock_secs = (real_time_ts.tv_sec - boot_time_ts.tv_sec) +
nsecs / 1000000000;
(real_time_ts.tv_nsec - boot_time_ts.tv_nsec + nsecs) /
1000000000;
if (!localtime_r(&wallclock_secs, &load_tm)) {
snprintf(buf, size, "%llu", nsecs / 1000000000);
......
......@@ -7,3 +7,13 @@ CONFIG_CGROUP_BPF=y
CONFIG_NETDEVSIM=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_SCH_INGRESS=y
CONFIG_NET_IPIP=y
CONFIG_IPV6=y
CONFIG_NET_IPGRE_DEMUX=y
CONFIG_NET_IPGRE=y
CONFIG_IPV6_GRE=y
CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_HMAC=m
CONFIG_CRYPTO_SHA256=m
CONFIG_VXLAN=y
CONFIG_GENEVE=y
......@@ -163,6 +163,10 @@ def bpftool(args, JSON=True, ns="", fail=True):
def bpftool_prog_list(expected=None, ns=""):
_, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
# Remove the base progs
for p in base_progs:
if p in progs:
progs.remove(p)
if expected is not None:
if len(progs) != expected:
fail(True, "%d BPF programs loaded, expected %d" %
......@@ -171,6 +175,10 @@ def bpftool_prog_list(expected=None, ns=""):
def bpftool_map_list(expected=None, ns=""):
_, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
# Remove the base maps
for m in base_maps:
if m in maps:
maps.remove(m)
if expected is not None:
if len(maps) != expected:
fail(True, "%d BPF maps loaded, expected %d" %
......@@ -585,8 +593,8 @@ skip(os.getuid() != 0, "test must be run as root")
# Check tools
ret, progs = bpftool("prog", fail=False)
skip(ret != 0, "bpftool not installed")
# Check no BPF programs are loaded
skip(len(progs) != 0, "BPF programs already loaded on the system")
base_progs = progs
_, base_maps = bpftool("map")
# Check netdevsim
ret, out = cmd("modprobe netdevsim", fail=False)
......
......@@ -608,28 +608,26 @@ setup_xfrm_tunnel()
test_xfrm_tunnel()
{
config_device
#tcpdump -nei veth1 ip &
output=$(mktemp)
cat /sys/kernel/debug/tracing/trace_pipe | tee $output &
setup_xfrm_tunnel
> /sys/kernel/debug/tracing/trace
setup_xfrm_tunnel
tc qdisc add dev veth1 clsact
tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
sec xfrm_get_state
ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
sleep 1
grep "reqid 1" $output
grep "reqid 1" /sys/kernel/debug/tracing/trace
check_err $?
grep "spi 0x1" $output
grep "spi 0x1" /sys/kernel/debug/tracing/trace
check_err $?
grep "remote ip 0xac100164" $output
grep "remote ip 0xac100164" /sys/kernel/debug/tracing/trace
check_err $?
cleanup
if [ $ret -ne 0 ]; then
echo -e ${RED}"FAIL: xfrm tunnel"${NC}
return 1
fi
echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
echo -e ${RED}"FAIL: xfrm tunnel"${NC}
return 1
fi
echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
}
attach_bpf()
......@@ -657,6 +655,10 @@ cleanup()
ip link del ip6geneve11 2> /dev/null
ip link del erspan11 2> /dev/null
ip link del ip6erspan11 2> /dev/null
ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null
ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null
ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null
ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null
}
cleanup_exit()
......@@ -668,7 +670,7 @@ cleanup_exit()
check()
{
ip link help $1 2>&1 | grep -q "^Usage:"
ip link help 2>&1 | grep -q "\s$1\s"
if [ $? -ne 0 ];then
echo "SKIP $1: iproute2 not support"
cleanup
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment