Commit 7c60c48f authored by Eric W. Biederman's avatar Eric W. Biederman

sysctl: Improve the sysctl sanity checks

- Stop validating subdirectories now that we only register leaf tables

- Cleanup and improve the duplicate filename check.
  * Run the duplicate filename check under the sysctl_lock to guarantee
    we never add duplicate names.
  * Reduce the duplicate filename check to nearly O(M*N) where M is the
    number of entries in tthe table we are registering and N is the
    number of entries in the directory before we got there.

- Move the duplicate filename check into it's own function and call
  it directtly from __register_sysctl_table

- Kill the config option as the sanity checks are now cheap enough
  the config option is unnecessary. The original reason for the config
  option was because we had a huge table used to verify the proc filename
  to binary sysctl mapping.  That table has now evolved into the binary_sysctl
  translation layer and is no longer part of the sysctl_check code.

- Tighten up the permission checks.  Guarnateeing that files only have read
  or write permissions.

- Removed redudant check for parents having a procname as now everything has
  a procname.

- Generalize the backtrace logic so that we print a backtrace from
  any failure of __register_sysctl_table that was not caused by
  a memmory allocation failure.  The backtrace allows us to track
  down who erroneously registered a sysctl table.

Bechmark before (CONFIG_SYSCTL_CHECK=y):
    make-dummies 0 999 -> 12s
    rmmod dummy        -> 0.08s

Bechmark before (CONFIG_SYSCTL_CHECK=n):
    make-dummies 0 999 -> 0.7s
    rmmod dummy        -> 0.06s
    make-dummies 0 99999 -> 1m13s
    rmmod dummy          -> 0.38s

Benchmark after:
    make-dummies 0 999 -> 0.65s
    rmmod dummy        -> 0.055s
    make-dummies 0 9999 -> 1m10s
    rmmod dummy         -> 0.39s

The sysctl sanity checks now impose no measurable cost.
Signed-off-by: default avatarEric W. Biederman <>
parent f728019b
......@@ -726,160 +726,106 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
static int sysctl_depth(struct ctl_table *table)
static int sysctl_check_table_dups(const char *path, struct ctl_table *old,
struct ctl_table *table)
struct ctl_table *tmp;
int depth;
depth = 0;
for (tmp = table; tmp->parent; tmp = tmp->parent)
struct ctl_table *entry, *test;
int error = 0;
return depth;
for (entry = old; entry->procname; entry++) {
for (test = table; test->procname; test++) {
if (strcmp(entry->procname, test->procname) == 0) {
printk(KERN_ERR "sysctl duplicate entry: %s/%s\n",
path, test->procname);
error = -EEXIST;
return error;
static struct ctl_table *sysctl_parent(struct ctl_table *table, int n)
static int sysctl_check_dups(struct nsproxy *namespaces,
struct ctl_table_header *header,
const char *path, struct ctl_table *table)
int i;
struct ctl_table_root *root;
struct ctl_table_set *set;
struct ctl_table_header *dir_head, *head;
struct ctl_table *dir_table;
int error = 0;
for (i = 0; table && i < n; i++)
table = table->parent;
/* No dups if we are the only member of our directory */
if (header->attached_by != table)
return 0;
return table;
dir_head = header->parent;
dir_table = header->attached_to;
error = sysctl_check_table_dups(path, dir_table, table);
static void sysctl_print_path(struct ctl_table *table)
struct ctl_table *tmp;
int depth, i;
depth = sysctl_depth(table);
if (table->procname) {
for (i = depth; i >= 0; i--) {
tmp = sysctl_parent(table, i);
printk("/%s", tmp->procname?tmp->procname:"");
printk(" ");
root = &sysctl_table_root;
do {
set = lookup_header_set(root, namespaces);
static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
struct ctl_table *table)
struct ctl_table_header *head;
struct ctl_table *ref, *test;
int depth, cur_depth;
depth = sysctl_depth(table);
for (head = __sysctl_head_next(namespaces, NULL); head;
head = __sysctl_head_next(namespaces, head)) {
cur_depth = depth;
ref = head->ctl_table;
test = sysctl_parent(table, cur_depth);
for (; ref->procname; ref++) {
int match = 0;
if (cur_depth && !ref->child)
list_for_each_entry(head, &set->list, ctl_entry) {
if (head->unregistering)
if (test->procname && ref->procname &&
(strcmp(test->procname, ref->procname) == 0))
if (match) {
if (cur_depth != 0) {
ref = ref->child;
goto repeat;
goto out;
if (head->attached_to != dir_table)
error = sysctl_check_table_dups(path, head->attached_by,
ref = NULL;
return ref;
root = list_entry(root->,
struct ctl_table_root, root_list);
} while (root != &sysctl_table_root);
return error;
static void set_fail(const char **fail, struct ctl_table *table, const char *str)
static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
if (*fail) {
printk(KERN_ERR "sysctl table check failed: ");
printk(" %s\n", *fail);
*fail = str;
struct va_format vaf;
va_list args;
static void sysctl_check_leaf(struct nsproxy *namespaces,
struct ctl_table *table, const char **fail)
struct ctl_table *ref;
va_start(args, fmt);
vaf.fmt = fmt; = &args;
printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n",
path, table->procname, &vaf);
ref = sysctl_check_lookup(namespaces, table);
if (ref && (ref != table))
set_fail(fail, table, "Sysctl already exists");
return -EINVAL;
static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
static int sysctl_check_table(const char *path, struct ctl_table *table)
int error = 0;
int err = 0;
for (; table->procname; table++) {
const char *fail = NULL;
if (table->parent) {
if (!table->parent->procname)
set_fail(&fail, table, "Parent without procname");
if (table->child) {
if (table->data)
set_fail(&fail, table, "Directory with data?");
if (table->maxlen)
set_fail(&fail, table, "Directory with maxlen?");
if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode)
set_fail(&fail, table, "Writable sysctl directory");
if (table->proc_handler)
set_fail(&fail, table, "Directory with proc_handler");
if (table->extra1)
set_fail(&fail, table, "Directory with extra1");
if (table->extra2)
set_fail(&fail, table, "Directory with extra2");
} else {
if ((table->proc_handler == proc_dostring) ||
(table->proc_handler == proc_dointvec) ||
(table->proc_handler == proc_dointvec_minmax) ||
(table->proc_handler == proc_dointvec_jiffies) ||
(table->proc_handler == proc_dointvec_userhz_jiffies) ||
(table->proc_handler == proc_dointvec_ms_jiffies) ||
(table->proc_handler == proc_doulongvec_minmax) ||
(table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
if (!table->data)
set_fail(&fail, table, "No data");
if (!table->maxlen)
set_fail(&fail, table, "No maxlen");
if (!table->proc_handler)
set_fail(&fail, table, "No proc_handler");
sysctl_check_leaf(namespaces, table, &fail);
if (table->mode > 0777)
set_fail(&fail, table, "bogus .mode");
if (fail) {
set_fail(&fail, table, NULL);
error = -EINVAL;
if (table->child)
error |= sysctl_check_table(namespaces, table->child);
err = sysctl_err(path, table, "Not a file");
if ((table->proc_handler == proc_dostring) ||
(table->proc_handler == proc_dointvec) ||
(table->proc_handler == proc_dointvec_minmax) ||
(table->proc_handler == proc_dointvec_jiffies) ||
(table->proc_handler == proc_dointvec_userhz_jiffies) ||
(table->proc_handler == proc_dointvec_ms_jiffies) ||
(table->proc_handler == proc_doulongvec_minmax) ||
(table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
if (!table->data)
err = sysctl_err(path, table, "No data");
if (!table->maxlen)
err = sysctl_err(path, table, "No maxlen");
if (!table->proc_handler)
err = sysctl_err(path, table, "No proc_handler");
if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
err = sysctl_err(path, table, "bogus .mode 0%o",
return error;
return err;
* __register_sysctl_table - register a leaf sysctl table
......@@ -1003,12 +949,8 @@ struct ctl_table_header *__register_sysctl_table(
header->root = root;
sysctl_set_parent(NULL, header->ctl_table);
header->count = 1;
if (sysctl_check_table(namespaces, header->ctl_table)) {
return NULL;
if (sysctl_check_table(path, table))
goto fail;
header->set = lookup_header_set(root, namespaces);
header->attached_by = header->ctl_table;
......@@ -1029,11 +971,19 @@ struct ctl_table_header *__register_sysctl_table(
struct ctl_table_root, root_list);
set = lookup_header_set(root, namespaces);
if (sysctl_check_dups(namespaces, header, path, table))
goto fail_locked;
list_add_tail(&header->ctl_entry, &header->set->list);
return header;
return NULL;
static char *append_path(const char *path, char *pos, const char *name)
......@@ -1113,14 +1113,6 @@ config LATENCYTOP
Enable this option if you want to use the LatencyTOP tool
to find out which userspace is blocking on what kernel operations.
bool "Sysctl checks"
depends on SYSCTL
sys_sysctl uses binary paths that have been found challenging
to properly maintain and use. This enables checks that help
you to keep things correct.
source mm/Kconfig.debug
source kernel/trace/Kconfig
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment