blob: d9c3ae6afe4c78bf1886fbeb371ca3d35098e950 [file] [log] [blame]
Eric W. Biederman77b14db2007-02-14 00:34:12 -08001/*
2 * /proc/sys support
3 */
Alexey Dobriyan1e0edd32008-10-17 05:07:44 +04004#include <linux/init.h>
Eric W. Biederman77b14db2007-02-14 00:34:12 -08005#include <linux/sysctl.h>
Lucas De Marchif1ecf062011-11-02 13:39:22 -07006#include <linux/poll.h>
Eric W. Biederman77b14db2007-02-14 00:34:12 -08007#include <linux/proc_fs.h>
8#include <linux/security.h>
Nick Piggin34286d62011-01-07 17:49:57 +11009#include <linux/namei.h>
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -080010#include <linux/module.h>
Eric W. Biederman77b14db2007-02-14 00:34:12 -080011#include "internal.h"
12
Al Virod72f71e2009-02-20 05:58:47 +000013static const struct dentry_operations proc_sys_dentry_operations;
Eric W. Biederman77b14db2007-02-14 00:34:12 -080014static const struct file_operations proc_sys_file_operations;
Jan Engelhardt03a44822008-02-08 04:21:19 -080015static const struct inode_operations proc_sys_inode_operations;
Al Viro9043476f2008-07-15 08:54:06 -040016static const struct file_operations proc_sys_dir_file_operations;
17static const struct inode_operations proc_sys_dir_operations;
Eric W. Biederman77b14db2007-02-14 00:34:12 -080018
Lucas De Marchif1ecf062011-11-02 13:39:22 -070019void proc_sys_poll_notify(struct ctl_table_poll *poll)
20{
21 if (!poll)
22 return;
23
24 atomic_inc(&poll->event);
25 wake_up_interruptible(&poll->wait);
26}
27
Eric W. Biedermana1945582012-01-21 17:51:48 -080028static struct ctl_table root_table[] = {
29 {
30 .procname = "",
31 .mode = S_IRUGO|S_IXUGO,
32 .child = &root_table[1],
33 },
34 { }
35};
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -080036static struct ctl_table_root sysctl_table_root;
37static struct ctl_table_header root_table_header = {
38 {{.count = 1,
Eric W. Biederman938aaa42012-01-09 17:24:30 -080039 .nreg = 1,
40 .ctl_table = root_table,
41 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -080042 .root = &sysctl_table_root,
43 .set = &sysctl_table_root.default_set,
44};
45static struct ctl_table_root sysctl_table_root = {
46 .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
47 .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
48};
49
50static DEFINE_SPINLOCK(sysctl_lock);
51
Eric W. Biederman076c3ee2012-01-09 21:42:02 -080052static int namecmp(const char *name1, int len1, const char *name2, int len2)
53{
54 int minlen;
55 int cmp;
56
57 minlen = len1;
58 if (minlen > len2)
59 minlen = len2;
60
61 cmp = memcmp(name1, name2, minlen);
62 if (cmp == 0)
63 cmp = len1 - len2;
64 return cmp;
65}
66
67static struct ctl_table *find_entry(struct ctl_table_header **phead,
68 struct ctl_table_set *set,
69 struct ctl_table_header *dir_head, struct ctl_table *dir,
70 const char *name, int namelen)
71{
72 struct ctl_table_header *head;
73 struct ctl_table *entry;
74
75 if (dir_head->set == set) {
76 for (entry = dir; entry->procname; entry++) {
77 const char *procname = entry->procname;
78 if (namecmp(procname, strlen(procname), name, namelen) == 0) {
79 *phead = dir_head;
80 return entry;
81 }
82 }
83 }
84
85 list_for_each_entry(head, &set->list, ctl_entry) {
86 if (head->unregistering)
87 continue;
88 if (head->attached_to != dir)
89 continue;
90 for (entry = head->attached_by; entry->procname; entry++) {
91 const char *procname = entry->procname;
92 if (namecmp(procname, strlen(procname), name, namelen) == 0) {
93 *phead = head;
94 return entry;
95 }
96 }
97 }
98 return NULL;
99}
100
Eric W. Biedermane0d04522012-01-09 22:36:41 -0800101static void init_header(struct ctl_table_header *head,
102 struct ctl_table_root *root, struct ctl_table_set *set,
103 struct ctl_table *table)
104{
105 head->ctl_table_arg = table;
106 INIT_LIST_HEAD(&head->ctl_entry);
107 head->used = 0;
108 head->count = 1;
109 head->nreg = 1;
110 head->unregistering = NULL;
111 head->root = root;
112 head->set = set;
113 head->parent = NULL;
114}
115
Eric W. Biederman8425d6a2012-01-09 17:35:01 -0800116static void erase_header(struct ctl_table_header *head)
117{
118 list_del_init(&head->ctl_entry);
119}
120
121static void insert_header(struct ctl_table_header *header)
122{
123 header->parent->count++;
124 list_add_tail(&header->ctl_entry, &header->set->list);
125}
126
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800127/* called under sysctl_lock */
128static int use_table(struct ctl_table_header *p)
129{
130 if (unlikely(p->unregistering))
131 return 0;
132 p->used++;
133 return 1;
134}
135
136/* called under sysctl_lock */
137static void unuse_table(struct ctl_table_header *p)
138{
139 if (!--p->used)
140 if (unlikely(p->unregistering))
141 complete(p->unregistering);
142}
143
144/* called under sysctl_lock, will reacquire if has to wait */
145static void start_unregistering(struct ctl_table_header *p)
146{
147 /*
148 * if p->used is 0, nobody will ever touch that entry again;
149 * we'll eliminate all paths to it before dropping sysctl_lock
150 */
151 if (unlikely(p->used)) {
152 struct completion wait;
153 init_completion(&wait);
154 p->unregistering = &wait;
155 spin_unlock(&sysctl_lock);
156 wait_for_completion(&wait);
157 spin_lock(&sysctl_lock);
158 } else {
159 /* anything non-NULL; we'll never dereference it */
160 p->unregistering = ERR_PTR(-EINVAL);
161 }
162 /*
163 * do not remove from the list until nobody holds it; walking the
164 * list in do_sysctl() relies on that.
165 */
Eric W. Biederman8425d6a2012-01-09 17:35:01 -0800166 erase_header(p);
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800167}
168
169static void sysctl_head_get(struct ctl_table_header *head)
170{
171 spin_lock(&sysctl_lock);
172 head->count++;
173 spin_unlock(&sysctl_lock);
174}
175
176void sysctl_head_put(struct ctl_table_header *head)
177{
178 spin_lock(&sysctl_lock);
179 if (!--head->count)
180 kfree_rcu(head, rcu);
181 spin_unlock(&sysctl_lock);
182}
183
184static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
185{
186 if (!head)
187 BUG();
188 spin_lock(&sysctl_lock);
189 if (!use_table(head))
190 head = ERR_PTR(-ENOENT);
191 spin_unlock(&sysctl_lock);
192 return head;
193}
194
195static void sysctl_head_finish(struct ctl_table_header *head)
196{
197 if (!head)
198 return;
199 spin_lock(&sysctl_lock);
200 unuse_table(head);
201 spin_unlock(&sysctl_lock);
202}
203
204static struct ctl_table_set *
205lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
206{
207 struct ctl_table_set *set = &root->default_set;
208 if (root->lookup)
209 set = root->lookup(root, namespaces);
210 return set;
211}
212
213static struct list_head *
214lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
215{
216 struct ctl_table_set *set = lookup_header_set(root, namespaces);
217 return &set->list;
218}
219
Eric W. Biederman076c3ee2012-01-09 21:42:02 -0800220static struct ctl_table *lookup_entry(struct ctl_table_header **phead,
221 struct ctl_table_header *dir_head,
222 struct ctl_table *dir,
223 const char *name, int namelen)
224{
225 struct ctl_table_header *head;
226 struct ctl_table *entry;
227 struct ctl_table_root *root;
228 struct ctl_table_set *set;
229
230 spin_lock(&sysctl_lock);
231 root = &sysctl_table_root;
232 do {
233 set = lookup_header_set(root, current->nsproxy);
234 entry = find_entry(&head, set, dir_head, dir, name, namelen);
235 if (entry && use_table(head))
236 *phead = head;
237 else
238 entry = NULL;
239 root = list_entry(root->root_list.next,
240 struct ctl_table_root, root_list);
241 } while (!entry && root != &sysctl_table_root);
242 spin_unlock(&sysctl_lock);
243 return entry;
244}
245
Eric W. Biederman6a75ce12012-01-18 03:15:51 -0800246static struct ctl_table_header *next_usable_entry(struct ctl_table *dir,
247 struct ctl_table_root *root, struct list_head *tmp)
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800248{
Eric W. Biederman6a75ce12012-01-18 03:15:51 -0800249 struct nsproxy *namespaces = current->nsproxy;
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800250 struct list_head *header_list;
251 struct ctl_table_header *head;
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800252
Eric W. Biederman6a75ce12012-01-18 03:15:51 -0800253 goto next;
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800254 for (;;) {
255 head = list_entry(tmp, struct ctl_table_header, ctl_entry);
Eric W. Biederman6a75ce12012-01-18 03:15:51 -0800256 root = head->root;
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800257
Eric W. Biederman6a75ce12012-01-18 03:15:51 -0800258 if (head->attached_to != dir ||
259 !head->attached_by->procname ||
260 !use_table(head))
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800261 goto next;
Eric W. Biederman6a75ce12012-01-18 03:15:51 -0800262
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800263 return head;
264 next:
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800265 tmp = tmp->next;
266 header_list = lookup_header_list(root, namespaces);
267 if (tmp != header_list)
268 continue;
269
270 do {
271 root = list_entry(root->root_list.next,
272 struct ctl_table_root, root_list);
273 if (root == &sysctl_table_root)
274 goto out;
275 header_list = lookup_header_list(root, namespaces);
276 } while (list_empty(header_list));
277 tmp = header_list->next;
278 }
279out:
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800280 return NULL;
281}
282
Eric W. Biederman6a75ce12012-01-18 03:15:51 -0800283static void first_entry(
284 struct ctl_table_header *dir_head, struct ctl_table *dir,
285 struct ctl_table_header **phead, struct ctl_table **pentry)
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800286{
Eric W. Biederman6a75ce12012-01-18 03:15:51 -0800287 struct ctl_table_header *head = dir_head;
288 struct ctl_table *entry = dir;
289
290 spin_lock(&sysctl_lock);
291 if (entry->procname) {
292 use_table(head);
293 } else {
294 head = next_usable_entry(dir, &sysctl_table_root,
295 &sysctl_table_root.default_set.list);
296 if (head)
297 entry = head->attached_by;
298 }
299 spin_unlock(&sysctl_lock);
300 *phead = head;
301 *pentry = entry;
302}
303
304static void next_entry(struct ctl_table *dir,
305 struct ctl_table_header **phead, struct ctl_table **pentry)
306{
307 struct ctl_table_header *head = *phead;
308 struct ctl_table *entry = *pentry;
309
310 entry++;
311 if (!entry->procname) {
312 struct ctl_table_root *root = head->root;
313 struct list_head *tmp = &head->ctl_entry;
314 if (head->attached_to != dir) {
315 root = &sysctl_table_root;
316 tmp = &sysctl_table_root.default_set.list;
317 }
318 spin_lock(&sysctl_lock);
319 unuse_table(head);
320 head = next_usable_entry(dir, root, tmp);
321 spin_unlock(&sysctl_lock);
322 if (head)
323 entry = head->attached_by;
324 }
325 *phead = head;
326 *pentry = entry;
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800327}
328
329void register_sysctl_root(struct ctl_table_root *root)
330{
331 spin_lock(&sysctl_lock);
332 list_add_tail(&root->root_list, &sysctl_table_root.root_list);
333 spin_unlock(&sysctl_lock);
334}
335
336/*
337 * sysctl_perm does NOT grant the superuser all rights automatically, because
338 * some sysctl variables are readonly even to root.
339 */
340
341static int test_perm(int mode, int op)
342{
343 if (!current_euid())
344 mode >>= 6;
345 else if (in_egroup_p(0))
346 mode >>= 3;
347 if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
348 return 0;
349 return -EACCES;
350}
351
352static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
353{
354 int mode;
355
356 if (root->permissions)
357 mode = root->permissions(root, current->nsproxy, table);
358 else
359 mode = table->mode;
360
361 return test_perm(mode, op);
362}
363
Al Viro9043476f2008-07-15 08:54:06 -0400364static struct inode *proc_sys_make_inode(struct super_block *sb,
365 struct ctl_table_header *head, struct ctl_table *table)
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800366{
367 struct inode *inode;
Al Viro9043476f2008-07-15 08:54:06 -0400368 struct proc_inode *ei;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800369
Al Viro9043476f2008-07-15 08:54:06 -0400370 inode = new_inode(sb);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800371 if (!inode)
372 goto out;
373
Christoph Hellwig85fe4022010-10-23 11:19:54 -0400374 inode->i_ino = get_next_ino();
375
Al Viro9043476f2008-07-15 08:54:06 -0400376 sysctl_head_get(head);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800377 ei = PROC_I(inode);
Al Viro9043476f2008-07-15 08:54:06 -0400378 ei->sysctl = head;
379 ei->sysctl_entry = table;
380
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800381 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
Al Viro9043476f2008-07-15 08:54:06 -0400382 inode->i_mode = table->mode;
383 if (!table->child) {
384 inode->i_mode |= S_IFREG;
385 inode->i_op = &proc_sys_inode_operations;
386 inode->i_fop = &proc_sys_file_operations;
387 } else {
388 inode->i_mode |= S_IFDIR;
Al Viro9043476f2008-07-15 08:54:06 -0400389 inode->i_op = &proc_sys_dir_operations;
390 inode->i_fop = &proc_sys_dir_file_operations;
391 }
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800392out:
393 return inode;
394}
395
Adrian Bunk81324362008-10-03 00:33:54 +0400396static struct ctl_table_header *grab_header(struct inode *inode)
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800397{
Eric W. Biederman3cc3e042012-01-07 06:57:47 -0800398 struct ctl_table_header *head = PROC_I(inode)->sysctl;
399 if (!head)
400 head = &root_table_header;
401 return sysctl_head_grab(head);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800402}
403
404static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
405 struct nameidata *nd)
406{
Al Viro9043476f2008-07-15 08:54:06 -0400407 struct ctl_table_header *head = grab_header(dir);
408 struct ctl_table *table = PROC_I(dir)->sysctl_entry;
409 struct ctl_table_header *h = NULL;
410 struct qstr *name = &dentry->d_name;
411 struct ctl_table *p;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800412 struct inode *inode;
Al Viro9043476f2008-07-15 08:54:06 -0400413 struct dentry *err = ERR_PTR(-ENOENT);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800414
Al Viro9043476f2008-07-15 08:54:06 -0400415 if (IS_ERR(head))
416 return ERR_CAST(head);
417
418 if (table && !table->child) {
419 WARN_ON(1);
420 goto out;
421 }
422
Eric W. Biedermana1945582012-01-21 17:51:48 -0800423 table = table ? table->child : &head->ctl_table[1];
Al Viro9043476f2008-07-15 08:54:06 -0400424
Eric W. Biederman076c3ee2012-01-09 21:42:02 -0800425 p = lookup_entry(&h, head, table, name->name, name->len);
Al Viro9043476f2008-07-15 08:54:06 -0400426 if (!p)
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800427 goto out;
428
429 err = ERR_PTR(-ENOMEM);
Al Viro9043476f2008-07-15 08:54:06 -0400430 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
431 if (h)
432 sysctl_head_finish(h);
433
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800434 if (!inode)
435 goto out;
436
437 err = NULL;
Nick Pigginfb045ad2011-01-07 17:49:55 +1100438 d_set_d_op(dentry, &proc_sys_dentry_operations);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800439 d_add(dentry, inode);
440
441out:
442 sysctl_head_finish(head);
443 return err;
444}
445
Pavel Emelyanov7708bfb2008-04-29 01:02:40 -0700446static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
447 size_t count, loff_t *ppos, int write)
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800448{
Al Viro9043476f2008-07-15 08:54:06 -0400449 struct inode *inode = filp->f_path.dentry->d_inode;
450 struct ctl_table_header *head = grab_header(inode);
451 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
David Howells2a2da532007-10-25 15:27:40 +0100452 ssize_t error;
453 size_t res;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800454
Al Viro9043476f2008-07-15 08:54:06 -0400455 if (IS_ERR(head))
456 return PTR_ERR(head);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800457
458 /*
459 * At this point we know that the sysctl was not unregistered
460 * and won't be until we finish.
461 */
462 error = -EPERM;
Pavel Emelyanovd7321cd2008-04-29 01:02:44 -0700463 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800464 goto out;
465
Al Viro9043476f2008-07-15 08:54:06 -0400466 /* if that can happen at all, it should be -EINVAL, not -EISDIR */
467 error = -EINVAL;
468 if (!table->proc_handler)
469 goto out;
470
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800471 /* careful: calling conventions are nasty here */
472 res = count;
Alexey Dobriyan8d65af72009-09-23 15:57:19 -0700473 error = table->proc_handler(table, write, buf, &res, ppos);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800474 if (!error)
475 error = res;
476out:
477 sysctl_head_finish(head);
478
479 return error;
480}
481
Pavel Emelyanov7708bfb2008-04-29 01:02:40 -0700482static ssize_t proc_sys_read(struct file *filp, char __user *buf,
483 size_t count, loff_t *ppos)
484{
485 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
486}
487
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800488static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
489 size_t count, loff_t *ppos)
490{
Pavel Emelyanov7708bfb2008-04-29 01:02:40 -0700491 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800492}
493
Lucas De Marchif1ecf062011-11-02 13:39:22 -0700494static int proc_sys_open(struct inode *inode, struct file *filp)
495{
496 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
497
498 if (table->poll)
499 filp->private_data = proc_sys_poll_event(table->poll);
500
501 return 0;
502}
503
504static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
505{
506 struct inode *inode = filp->f_path.dentry->d_inode;
507 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
508 unsigned long event = (unsigned long)filp->private_data;
509 unsigned int ret = DEFAULT_POLLMASK;
510
511 if (!table->proc_handler)
512 goto out;
513
514 if (!table->poll)
515 goto out;
516
517 poll_wait(filp, &table->poll->wait, wait);
518
519 if (event != atomic_read(&table->poll->event)) {
520 filp->private_data = proc_sys_poll_event(table->poll);
521 ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI;
522 }
523
524out:
525 return ret;
526}
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800527
528static int proc_sys_fill_cache(struct file *filp, void *dirent,
Al Viro9043476f2008-07-15 08:54:06 -0400529 filldir_t filldir,
530 struct ctl_table_header *head,
531 struct ctl_table *table)
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800532{
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800533 struct dentry *child, *dir = filp->f_path.dentry;
534 struct inode *inode;
535 struct qstr qname;
536 ino_t ino = 0;
537 unsigned type = DT_UNKNOWN;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800538
539 qname.name = table->procname;
540 qname.len = strlen(table->procname);
541 qname.hash = full_name_hash(qname.name, qname.len);
542
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800543 child = d_lookup(dir, &qname);
544 if (!child) {
Al Viro9043476f2008-07-15 08:54:06 -0400545 child = d_alloc(dir, &qname);
546 if (child) {
547 inode = proc_sys_make_inode(dir->d_sb, head, table);
548 if (!inode) {
549 dput(child);
550 return -ENOMEM;
551 } else {
Nick Pigginfb045ad2011-01-07 17:49:55 +1100552 d_set_d_op(child, &proc_sys_dentry_operations);
Al Viro9043476f2008-07-15 08:54:06 -0400553 d_add(child, inode);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800554 }
Al Viro9043476f2008-07-15 08:54:06 -0400555 } else {
556 return -ENOMEM;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800557 }
558 }
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800559 inode = child->d_inode;
Al Viro9043476f2008-07-15 08:54:06 -0400560 ino = inode->i_ino;
561 type = inode->i_mode >> 12;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800562 dput(child);
Al Viro9043476f2008-07-15 08:54:06 -0400563 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
564}
565
566static int scan(struct ctl_table_header *head, ctl_table *table,
567 unsigned long *pos, struct file *file,
568 void *dirent, filldir_t filldir)
569{
Eric W. Biederman6a75ce12012-01-18 03:15:51 -0800570 int res;
Al Viro9043476f2008-07-15 08:54:06 -0400571
Eric W. Biederman6a75ce12012-01-18 03:15:51 -0800572 if ((*pos)++ < file->f_pos)
573 return 0;
Al Viro9043476f2008-07-15 08:54:06 -0400574
Eric W. Biederman6a75ce12012-01-18 03:15:51 -0800575 res = proc_sys_fill_cache(file, dirent, filldir, head, table);
Al Viro9043476f2008-07-15 08:54:06 -0400576
Eric W. Biederman6a75ce12012-01-18 03:15:51 -0800577 if (res == 0)
578 file->f_pos = *pos;
Al Viro9043476f2008-07-15 08:54:06 -0400579
Eric W. Biederman6a75ce12012-01-18 03:15:51 -0800580 return res;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800581}
582
583static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
584{
Al Viro9043476f2008-07-15 08:54:06 -0400585 struct dentry *dentry = filp->f_path.dentry;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800586 struct inode *inode = dentry->d_inode;
Al Viro9043476f2008-07-15 08:54:06 -0400587 struct ctl_table_header *head = grab_header(inode);
588 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
589 struct ctl_table_header *h = NULL;
Eric W. Biederman6a75ce12012-01-18 03:15:51 -0800590 struct ctl_table *entry;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800591 unsigned long pos;
Al Viro9043476f2008-07-15 08:54:06 -0400592 int ret = -EINVAL;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800593
Al Viro9043476f2008-07-15 08:54:06 -0400594 if (IS_ERR(head))
595 return PTR_ERR(head);
596
597 if (table && !table->child) {
598 WARN_ON(1);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800599 goto out;
Al Viro9043476f2008-07-15 08:54:06 -0400600 }
601
Eric W. Biedermana1945582012-01-21 17:51:48 -0800602 table = table ? table->child : &head->ctl_table[1];
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800603
604 ret = 0;
605 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
606 if (filp->f_pos == 0) {
607 if (filldir(dirent, ".", 1, filp->f_pos,
608 inode->i_ino, DT_DIR) < 0)
609 goto out;
610 filp->f_pos++;
611 }
612 if (filp->f_pos == 1) {
613 if (filldir(dirent, "..", 2, filp->f_pos,
614 parent_ino(dentry), DT_DIR) < 0)
615 goto out;
616 filp->f_pos++;
617 }
618 pos = 2;
619
Eric W. Biederman6a75ce12012-01-18 03:15:51 -0800620 for (first_entry(head, table, &h, &entry); h; next_entry(table, &h, &entry)) {
621 ret = scan(h, entry, &pos, filp, dirent, filldir);
Al Viro9043476f2008-07-15 08:54:06 -0400622 if (ret) {
623 sysctl_head_finish(h);
624 break;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800625 }
626 }
627 ret = 1;
628out:
629 sysctl_head_finish(head);
630 return ret;
631}
632
Al Viro10556cb2011-06-20 19:28:19 -0400633static int proc_sys_permission(struct inode *inode, int mask)
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800634{
635 /*
636 * sysctl entries that are not writeable,
637 * are _NOT_ writeable, capabilities or not.
638 */
Miklos Szeredif696a362008-07-31 13:41:58 +0200639 struct ctl_table_header *head;
640 struct ctl_table *table;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800641 int error;
642
Miklos Szeredif696a362008-07-31 13:41:58 +0200643 /* Executable files are not allowed under /proc/sys/ */
644 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
645 return -EACCES;
646
647 head = grab_header(inode);
Al Viro9043476f2008-07-15 08:54:06 -0400648 if (IS_ERR(head))
649 return PTR_ERR(head);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800650
Miklos Szeredif696a362008-07-31 13:41:58 +0200651 table = PROC_I(inode)->sysctl_entry;
Al Viro9043476f2008-07-15 08:54:06 -0400652 if (!table) /* global root - r-xr-xr-x */
653 error = mask & MAY_WRITE ? -EACCES : 0;
654 else /* Use the permissions on the sysctl table entry */
Al Viro1fc0f782011-06-20 18:59:02 -0400655 error = sysctl_perm(head->root, table, mask & ~MAY_NOT_BLOCK);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800656
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800657 sysctl_head_finish(head);
658 return error;
659}
660
661static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
662{
663 struct inode *inode = dentry->d_inode;
664 int error;
665
666 if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
667 return -EPERM;
668
669 error = inode_change_ok(inode, attr);
Christoph Hellwig10257742010-06-04 11:30:02 +0200670 if (error)
671 return error;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800672
Christoph Hellwig10257742010-06-04 11:30:02 +0200673 if ((attr->ia_valid & ATTR_SIZE) &&
674 attr->ia_size != i_size_read(inode)) {
675 error = vmtruncate(inode, attr->ia_size);
676 if (error)
677 return error;
678 }
679
680 setattr_copy(inode, attr);
681 mark_inode_dirty(inode);
682 return 0;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800683}
684
Al Viro9043476f2008-07-15 08:54:06 -0400685static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
686{
687 struct inode *inode = dentry->d_inode;
688 struct ctl_table_header *head = grab_header(inode);
689 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
690
691 if (IS_ERR(head))
692 return PTR_ERR(head);
693
694 generic_fillattr(inode, stat);
695 if (table)
696 stat->mode = (stat->mode & S_IFMT) | table->mode;
697
698 sysctl_head_finish(head);
699 return 0;
700}
701
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800702static const struct file_operations proc_sys_file_operations = {
Lucas De Marchif1ecf062011-11-02 13:39:22 -0700703 .open = proc_sys_open,
704 .poll = proc_sys_poll,
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800705 .read = proc_sys_read,
706 .write = proc_sys_write,
Arnd Bergmann6038f372010-08-15 18:52:59 +0200707 .llseek = default_llseek,
Al Viro9043476f2008-07-15 08:54:06 -0400708};
709
710static const struct file_operations proc_sys_dir_file_operations = {
Pavel Emelyanov887df072011-11-02 13:38:42 -0700711 .read = generic_read_dir,
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800712 .readdir = proc_sys_readdir,
Christoph Hellwig3222a3e2008-09-03 21:53:01 +0200713 .llseek = generic_file_llseek,
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800714};
715
Jan Engelhardt03a44822008-02-08 04:21:19 -0800716static const struct inode_operations proc_sys_inode_operations = {
Al Viro9043476f2008-07-15 08:54:06 -0400717 .permission = proc_sys_permission,
718 .setattr = proc_sys_setattr,
719 .getattr = proc_sys_getattr,
720};
721
722static const struct inode_operations proc_sys_dir_operations = {
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800723 .lookup = proc_sys_lookup,
724 .permission = proc_sys_permission,
725 .setattr = proc_sys_setattr,
Al Viro9043476f2008-07-15 08:54:06 -0400726 .getattr = proc_sys_getattr,
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800727};
728
729static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
730{
Nick Piggin34286d62011-01-07 17:49:57 +1100731 if (nd->flags & LOOKUP_RCU)
732 return -ECHILD;
Al Viro9043476f2008-07-15 08:54:06 -0400733 return !PROC_I(dentry->d_inode)->sysctl->unregistering;
734}
735
Nick Pigginfe15ce42011-01-07 17:49:23 +1100736static int proc_sys_delete(const struct dentry *dentry)
Al Viro9043476f2008-07-15 08:54:06 -0400737{
738 return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
739}
740
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800741static int sysctl_is_seen(struct ctl_table_header *p)
742{
743 struct ctl_table_set *set = p->set;
744 int res;
745 spin_lock(&sysctl_lock);
746 if (p->unregistering)
747 res = 0;
748 else if (!set->is_seen)
749 res = 1;
750 else
751 res = set->is_seen(set);
752 spin_unlock(&sysctl_lock);
753 return res;
754}
755
Nick Piggin621e1552011-01-07 17:49:27 +1100756static int proc_sys_compare(const struct dentry *parent,
757 const struct inode *pinode,
758 const struct dentry *dentry, const struct inode *inode,
759 unsigned int len, const char *str, const struct qstr *name)
Al Viro9043476f2008-07-15 08:54:06 -0400760{
Al Virodfef6dcd32011-03-08 01:25:28 -0500761 struct ctl_table_header *head;
Nick Piggin31e6b012011-01-07 17:49:52 +1100762 /* Although proc doesn't have negative dentries, rcu-walk means
763 * that inode here can be NULL */
Al Virodfef6dcd32011-03-08 01:25:28 -0500764 /* AV: can it, indeed? */
Nick Piggin31e6b012011-01-07 17:49:52 +1100765 if (!inode)
Al Virodfef6dcd32011-03-08 01:25:28 -0500766 return 1;
Nick Piggin621e1552011-01-07 17:49:27 +1100767 if (name->len != len)
Al Viro9043476f2008-07-15 08:54:06 -0400768 return 1;
Nick Piggin621e1552011-01-07 17:49:27 +1100769 if (memcmp(name->name, str, len))
Al Viro9043476f2008-07-15 08:54:06 -0400770 return 1;
Al Virodfef6dcd32011-03-08 01:25:28 -0500771 head = rcu_dereference(PROC_I(inode)->sysctl);
772 return !head || !sysctl_is_seen(head);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800773}
774
Al Virod72f71e2009-02-20 05:58:47 +0000775static const struct dentry_operations proc_sys_dentry_operations = {
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800776 .d_revalidate = proc_sys_revalidate,
Al Viro9043476f2008-07-15 08:54:06 -0400777 .d_delete = proc_sys_delete,
778 .d_compare = proc_sys_compare,
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800779};
780
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800781static struct ctl_table *is_branch_in(struct ctl_table *branch,
782 struct ctl_table *table)
783{
784 struct ctl_table *p;
785 const char *s = branch->procname;
786
787 /* branch should have named subdirectory as its first element */
788 if (!s || !branch->child)
789 return NULL;
790
791 /* ... and nothing else */
792 if (branch[1].procname)
793 return NULL;
794
795 /* table should contain subdirectory with the same name */
796 for (p = table; p->procname; p++) {
797 if (!p->child)
798 continue;
799 if (p->procname && strcmp(p->procname, s) == 0)
800 return p;
801 }
802 return NULL;
803}
804
805/* see if attaching q to p would be an improvement */
806static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
807{
808 struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
809 struct ctl_table *next;
810 int is_better = 0;
811 int not_in_parent = !p->attached_by;
812
813 while ((next = is_branch_in(by, to)) != NULL) {
814 if (by == q->attached_by)
815 is_better = 1;
816 if (to == p->attached_by)
817 not_in_parent = 1;
818 by = by->child;
819 to = next->child;
820 }
821
822 if (is_better && not_in_parent) {
823 q->attached_by = by;
824 q->attached_to = to;
825 q->parent = p;
826 }
827}
828
Eric W. Biederman7c60c482012-01-21 13:34:05 -0800829static int sysctl_check_table_dups(const char *path, struct ctl_table *old,
830 struct ctl_table *table)
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800831{
Eric W. Biederman7c60c482012-01-21 13:34:05 -0800832 struct ctl_table *entry, *test;
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800833 int error = 0;
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800834
Eric W. Biederman7c60c482012-01-21 13:34:05 -0800835 for (entry = old; entry->procname; entry++) {
836 for (test = table; test->procname; test++) {
837 if (strcmp(entry->procname, test->procname) == 0) {
838 printk(KERN_ERR "sysctl duplicate entry: %s/%s\n",
839 path, test->procname);
840 error = -EEXIST;
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800841 }
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800842 }
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800843 }
844 return error;
845}
Eric W. Biederman7c60c482012-01-21 13:34:05 -0800846
847static int sysctl_check_dups(struct nsproxy *namespaces,
848 struct ctl_table_header *header,
849 const char *path, struct ctl_table *table)
850{
851 struct ctl_table_root *root;
852 struct ctl_table_set *set;
853 struct ctl_table_header *dir_head, *head;
854 struct ctl_table *dir_table;
855 int error = 0;
856
857 /* No dups if we are the only member of our directory */
858 if (header->attached_by != table)
859 return 0;
860
861 dir_head = header->parent;
862 dir_table = header->attached_to;
863
864 error = sysctl_check_table_dups(path, dir_table, table);
865
866 root = &sysctl_table_root;
867 do {
868 set = lookup_header_set(root, namespaces);
869
870 list_for_each_entry(head, &set->list, ctl_entry) {
871 if (head->unregistering)
872 continue;
873 if (head->attached_to != dir_table)
874 continue;
875 error = sysctl_check_table_dups(path, head->attached_by,
876 table);
877 }
878 root = list_entry(root->root_list.next,
879 struct ctl_table_root, root_list);
880 } while (root != &sysctl_table_root);
881 return error;
882}
883
884static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
885{
886 struct va_format vaf;
887 va_list args;
888
889 va_start(args, fmt);
890 vaf.fmt = fmt;
891 vaf.va = &args;
892
893 printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n",
894 path, table->procname, &vaf);
895
896 va_end(args);
897 return -EINVAL;
898}
899
900static int sysctl_check_table(const char *path, struct ctl_table *table)
901{
902 int err = 0;
903 for (; table->procname; table++) {
904 if (table->child)
905 err = sysctl_err(path, table, "Not a file");
906
907 if ((table->proc_handler == proc_dostring) ||
908 (table->proc_handler == proc_dointvec) ||
909 (table->proc_handler == proc_dointvec_minmax) ||
910 (table->proc_handler == proc_dointvec_jiffies) ||
911 (table->proc_handler == proc_dointvec_userhz_jiffies) ||
912 (table->proc_handler == proc_dointvec_ms_jiffies) ||
913 (table->proc_handler == proc_doulongvec_minmax) ||
914 (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
915 if (!table->data)
916 err = sysctl_err(path, table, "No data");
917 if (!table->maxlen)
918 err = sysctl_err(path, table, "No maxlen");
919 }
920 if (!table->proc_handler)
921 err = sysctl_err(path, table, "No proc_handler");
922
923 if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
924 err = sysctl_err(path, table, "bogus .mode 0%o",
925 table->mode);
926 }
927 return err;
928}
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800929
930/**
Eric W. Biedermanf7280192012-01-22 18:22:05 -0800931 * __register_sysctl_table - register a leaf sysctl table
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800932 * @root: List of sysctl headers to register on
933 * @namespaces: Data to compute which lists of sysctl entries are visible
934 * @path: The path to the directory the sysctl table is in.
935 * @table: the top-level table structure
936 *
937 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
938 * array. A completely 0 filled entry terminates the table.
939 *
940 * The members of the &struct ctl_table structure are used as follows:
941 *
942 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
943 * enter a sysctl file
944 *
945 * data - a pointer to data for use by proc_handler
946 *
947 * maxlen - the maximum size in bytes of the data
948 *
Eric W. Biedermanf7280192012-01-22 18:22:05 -0800949 * mode - the file permissions for the /proc/sys file
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800950 *
Eric W. Biedermanf7280192012-01-22 18:22:05 -0800951 * child - must be %NULL.
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800952 *
953 * proc_handler - the text handler routine (described below)
954 *
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800955 * extra1, extra2 - extra pointers usable by the proc handler routines
956 *
957 * Leaf nodes in the sysctl tree will be represented by a single file
958 * under /proc; non-leaf nodes will be represented by directories.
959 *
Eric W. Biedermanf7280192012-01-22 18:22:05 -0800960 * There must be a proc_handler routine for any terminal nodes.
961 * Several default handlers are available to cover common cases -
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800962 *
963 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
964 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
965 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
966 *
967 * It is the handler's job to read the input buffer from user memory
968 * and process it. The handler should return 0 on success.
969 *
970 * This routine returns %NULL on a failure to register, and a pointer
971 * to the table header on success.
972 */
Eric W. Biederman6e9d5162012-01-21 10:26:26 -0800973struct ctl_table_header *__register_sysctl_table(
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800974 struct ctl_table_root *root,
975 struct nsproxy *namespaces,
Eric W. Biederman6e9d5162012-01-21 10:26:26 -0800976 const char *path, struct ctl_table *table)
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800977{
978 struct ctl_table_header *header;
979 struct ctl_table *new, **prevp;
Eric W. Biederman6e9d5162012-01-21 10:26:26 -0800980 const char *name, *nextname;
981 unsigned int npath = 0;
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800982 struct ctl_table_set *set;
Eric W. Biedermanf05e53a2012-01-21 10:03:13 -0800983 size_t path_bytes = 0;
984 char *new_name;
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -0800985
986 /* Count the path components */
Eric W. Biederman6e9d5162012-01-21 10:26:26 -0800987 for (name = path; name; name = nextname) {
988 int namelen;
989 nextname = strchr(name, '/');
990 if (nextname) {
991 namelen = nextname - name;
992 nextname++;
993 } else {
994 namelen = strlen(name);
995 }
996 if (namelen == 0)
997 continue;
998 path_bytes += namelen + 1;
999 npath++;
1000 }
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001001
1002 /*
1003 * For each path component, allocate a 2-element ctl_table array.
1004 * The first array element will be filled with the sysctl entry
1005 * for this, the second will be the sentinel (procname == 0).
1006 *
1007 * We allocate everything in one go so that we don't have to
1008 * worry about freeing additional memory in unregister_sysctl_table.
1009 */
Eric W. Biedermanf05e53a2012-01-21 10:03:13 -08001010 header = kzalloc(sizeof(struct ctl_table_header) + path_bytes +
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001011 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
1012 if (!header)
1013 return NULL;
1014
1015 new = (struct ctl_table *) (header + 1);
Eric W. Biedermanf05e53a2012-01-21 10:03:13 -08001016 new_name = (char *)(new + (2 * npath));
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001017
1018 /* Now connect the dots */
1019 prevp = &header->ctl_table;
Eric W. Biederman6e9d5162012-01-21 10:26:26 -08001020 for (name = path; name; name = nextname) {
1021 int namelen;
1022 nextname = strchr(name, '/');
1023 if (nextname) {
1024 namelen = nextname - name;
1025 nextname++;
1026 } else {
1027 namelen = strlen(name);
1028 }
1029 if (namelen == 0)
1030 continue;
1031 memcpy(new_name, name, namelen);
1032 new_name[namelen] = '\0';
1033
Eric W. Biedermanf05e53a2012-01-21 10:03:13 -08001034 new->procname = new_name;
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001035 new->mode = 0555;
1036
1037 *prevp = new;
1038 prevp = &new->child;
1039
1040 new += 2;
Eric W. Biederman6e9d5162012-01-21 10:26:26 -08001041 new_name += namelen + 1;
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001042 }
1043 *prevp = table;
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001044
Eric W. Biedermane0d04522012-01-09 22:36:41 -08001045 init_header(header, root, NULL, table);
Eric W. Biederman7c60c482012-01-21 13:34:05 -08001046 if (sysctl_check_table(path, table))
1047 goto fail;
Eric W. Biederman8d6ecfc2012-01-06 11:55:30 -08001048
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001049 spin_lock(&sysctl_lock);
1050 header->set = lookup_header_set(root, namespaces);
1051 header->attached_by = header->ctl_table;
Eric W. Biedermana1945582012-01-21 17:51:48 -08001052 header->attached_to = &root_table[1];
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001053 header->parent = &root_table_header;
Eric W. Biedermanbd295b52012-01-22 21:10:21 -08001054 set = header->set;
1055 root = header->root;
1056 for (;;) {
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001057 struct ctl_table_header *p;
1058 list_for_each_entry(p, &set->list, ctl_entry) {
1059 if (p->unregistering)
1060 continue;
1061 try_attach(p, header);
1062 }
Eric W. Biedermanbd295b52012-01-22 21:10:21 -08001063 if (root == &sysctl_table_root)
1064 break;
1065 root = list_entry(root->root_list.prev,
1066 struct ctl_table_root, root_list);
1067 set = lookup_header_set(root, namespaces);
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001068 }
Eric W. Biederman7c60c482012-01-21 13:34:05 -08001069 if (sysctl_check_dups(namespaces, header, path, table))
1070 goto fail_locked;
Eric W. Biederman8425d6a2012-01-09 17:35:01 -08001071 insert_header(header);
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001072 spin_unlock(&sysctl_lock);
1073
1074 return header;
Eric W. Biederman7c60c482012-01-21 13:34:05 -08001075fail_locked:
1076 spin_unlock(&sysctl_lock);
1077fail:
1078 kfree(header);
1079 dump_stack();
1080 return NULL;
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001081}
1082
Eric W. Biederman6e9d5162012-01-21 10:26:26 -08001083static char *append_path(const char *path, char *pos, const char *name)
1084{
1085 int namelen;
1086 namelen = strlen(name);
1087 if (((pos - path) + namelen + 2) >= PATH_MAX)
1088 return NULL;
1089 memcpy(pos, name, namelen);
1090 pos[namelen] = '/';
1091 pos[namelen + 1] = '\0';
1092 pos += namelen + 1;
1093 return pos;
1094}
1095
Eric W. Biedermanf7280192012-01-22 18:22:05 -08001096static int count_subheaders(struct ctl_table *table)
1097{
1098 int has_files = 0;
1099 int nr_subheaders = 0;
1100 struct ctl_table *entry;
1101
1102 /* special case: no directory and empty directory */
1103 if (!table || !table->procname)
1104 return 1;
1105
1106 for (entry = table; entry->procname; entry++) {
1107 if (entry->child)
1108 nr_subheaders += count_subheaders(entry->child);
1109 else
1110 has_files = 1;
1111 }
1112 return nr_subheaders + has_files;
1113}
1114
1115static int register_leaf_sysctl_tables(const char *path, char *pos,
1116 struct ctl_table_header ***subheader,
1117 struct ctl_table_root *root, struct nsproxy *namespaces,
1118 struct ctl_table *table)
1119{
1120 struct ctl_table *ctl_table_arg = NULL;
1121 struct ctl_table *entry, *files;
1122 int nr_files = 0;
1123 int nr_dirs = 0;
1124 int err = -ENOMEM;
1125
1126 for (entry = table; entry->procname; entry++) {
1127 if (entry->child)
1128 nr_dirs++;
1129 else
1130 nr_files++;
1131 }
1132
1133 files = table;
1134 /* If there are mixed files and directories we need a new table */
1135 if (nr_dirs && nr_files) {
1136 struct ctl_table *new;
1137 files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1),
1138 GFP_KERNEL);
1139 if (!files)
1140 goto out;
1141
1142 ctl_table_arg = files;
1143 for (new = files, entry = table; entry->procname; entry++) {
1144 if (entry->child)
1145 continue;
1146 *new = *entry;
1147 new++;
1148 }
1149 }
1150
1151 /* Register everything except a directory full of subdirectories */
1152 if (nr_files || !nr_dirs) {
1153 struct ctl_table_header *header;
1154 header = __register_sysctl_table(root, namespaces, path, files);
1155 if (!header) {
1156 kfree(ctl_table_arg);
1157 goto out;
1158 }
1159
1160 /* Remember if we need to free the file table */
1161 header->ctl_table_arg = ctl_table_arg;
1162 **subheader = header;
1163 (*subheader)++;
1164 }
1165
1166 /* Recurse into the subdirectories. */
1167 for (entry = table; entry->procname; entry++) {
1168 char *child_pos;
1169
1170 if (!entry->child)
1171 continue;
1172
1173 err = -ENAMETOOLONG;
1174 child_pos = append_path(path, pos, entry->procname);
1175 if (!child_pos)
1176 goto out;
1177
1178 err = register_leaf_sysctl_tables(path, child_pos, subheader,
1179 root, namespaces, entry->child);
1180 pos[0] = '\0';
1181 if (err)
1182 goto out;
1183 }
1184 err = 0;
1185out:
1186 /* On failure our caller will unregister all registered subheaders */
1187 return err;
1188}
1189
Eric W. Biederman6e9d5162012-01-21 10:26:26 -08001190/**
1191 * __register_sysctl_paths - register a sysctl table hierarchy
1192 * @root: List of sysctl headers to register on
1193 * @namespaces: Data to compute which lists of sysctl entries are visible
1194 * @path: The path to the directory the sysctl table is in.
1195 * @table: the top-level table structure
1196 *
1197 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1198 * array. A completely 0 filled entry terminates the table.
1199 *
1200 * See __register_sysctl_table for more details.
1201 */
1202struct ctl_table_header *__register_sysctl_paths(
1203 struct ctl_table_root *root,
1204 struct nsproxy *namespaces,
1205 const struct ctl_path *path, struct ctl_table *table)
1206{
Eric W. Biedermanec6a5262012-01-21 12:35:23 -08001207 struct ctl_table *ctl_table_arg = table;
Eric W. Biedermanf7280192012-01-22 18:22:05 -08001208 int nr_subheaders = count_subheaders(table);
1209 struct ctl_table_header *header = NULL, **subheaders, **subheader;
Eric W. Biederman6e9d5162012-01-21 10:26:26 -08001210 const struct ctl_path *component;
1211 char *new_path, *pos;
1212
1213 pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL);
1214 if (!new_path)
1215 return NULL;
1216
1217 pos[0] = '\0';
1218 for (component = path; component->procname; component++) {
1219 pos = append_path(new_path, pos, component->procname);
1220 if (!pos)
1221 goto out;
1222 }
Eric W. Biedermanec6a5262012-01-21 12:35:23 -08001223 while (table->procname && table->child && !table[1].procname) {
1224 pos = append_path(new_path, pos, table->procname);
1225 if (!pos)
1226 goto out;
1227 table = table->child;
1228 }
Eric W. Biedermanf7280192012-01-22 18:22:05 -08001229 if (nr_subheaders == 1) {
1230 header = __register_sysctl_table(root, namespaces, new_path, table);
1231 if (header)
1232 header->ctl_table_arg = ctl_table_arg;
1233 } else {
1234 header = kzalloc(sizeof(*header) +
1235 sizeof(*subheaders)*nr_subheaders, GFP_KERNEL);
1236 if (!header)
1237 goto out;
1238
1239 subheaders = (struct ctl_table_header **) (header + 1);
1240 subheader = subheaders;
Eric W. Biedermanec6a5262012-01-21 12:35:23 -08001241 header->ctl_table_arg = ctl_table_arg;
Eric W. Biedermanf7280192012-01-22 18:22:05 -08001242
1243 if (register_leaf_sysctl_tables(new_path, pos, &subheader,
1244 root, namespaces, table))
1245 goto err_register_leaves;
1246 }
1247
Eric W. Biederman6e9d5162012-01-21 10:26:26 -08001248out:
1249 kfree(new_path);
1250 return header;
Eric W. Biedermanf7280192012-01-22 18:22:05 -08001251
1252err_register_leaves:
1253 while (subheader > subheaders) {
1254 struct ctl_table_header *subh = *(--subheader);
1255 struct ctl_table *table = subh->ctl_table_arg;
1256 unregister_sysctl_table(subh);
1257 kfree(table);
1258 }
1259 kfree(header);
1260 header = NULL;
1261 goto out;
Eric W. Biederman6e9d5162012-01-21 10:26:26 -08001262}
1263
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001264/**
1265 * register_sysctl_table_path - register a sysctl table hierarchy
1266 * @path: The path to the directory the sysctl table is in.
1267 * @table: the top-level table structure
1268 *
1269 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1270 * array. A completely 0 filled entry terminates the table.
1271 *
1272 * See __register_sysctl_paths for more details.
1273 */
1274struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1275 struct ctl_table *table)
1276{
1277 return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
1278 path, table);
1279}
1280EXPORT_SYMBOL(register_sysctl_paths);
1281
1282/**
1283 * register_sysctl_table - register a sysctl table hierarchy
1284 * @table: the top-level table structure
1285 *
1286 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1287 * array. A completely 0 filled entry terminates the table.
1288 *
1289 * See register_sysctl_paths for more details.
1290 */
1291struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1292{
1293 static const struct ctl_path null_path[] = { {} };
1294
1295 return register_sysctl_paths(null_path, table);
1296}
1297EXPORT_SYMBOL(register_sysctl_table);
1298
Eric W. Biederman938aaa42012-01-09 17:24:30 -08001299static void drop_sysctl_table(struct ctl_table_header *header)
1300{
1301 if (--header->nreg)
1302 return;
1303
1304 start_unregistering(header);
1305 if (!--header->parent->count) {
1306 WARN_ON(1);
1307 kfree_rcu(header->parent, rcu);
1308 }
1309 if (!--header->count)
1310 kfree_rcu(header, rcu);
1311}
1312
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001313/**
1314 * unregister_sysctl_table - unregister a sysctl table hierarchy
1315 * @header: the header returned from register_sysctl_table
1316 *
1317 * Unregisters the sysctl table and all children. proc entries may not
1318 * actually be removed until they are no longer used by anyone.
1319 */
1320void unregister_sysctl_table(struct ctl_table_header * header)
1321{
Eric W. Biedermanf7280192012-01-22 18:22:05 -08001322 int nr_subheaders;
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001323 might_sleep();
1324
1325 if (header == NULL)
1326 return;
1327
Eric W. Biedermanf7280192012-01-22 18:22:05 -08001328 nr_subheaders = count_subheaders(header->ctl_table_arg);
1329 if (unlikely(nr_subheaders > 1)) {
1330 struct ctl_table_header **subheaders;
1331 int i;
1332
1333 subheaders = (struct ctl_table_header **)(header + 1);
1334 for (i = nr_subheaders -1; i >= 0; i--) {
1335 struct ctl_table_header *subh = subheaders[i];
1336 struct ctl_table *table = subh->ctl_table_arg;
1337 unregister_sysctl_table(subh);
1338 kfree(table);
1339 }
1340 kfree(header);
1341 return;
1342 }
1343
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001344 spin_lock(&sysctl_lock);
Eric W. Biederman938aaa42012-01-09 17:24:30 -08001345 drop_sysctl_table(header);
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001346 spin_unlock(&sysctl_lock);
1347}
1348EXPORT_SYMBOL(unregister_sysctl_table);
1349
1350void setup_sysctl_set(struct ctl_table_set *p,
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001351 int (*is_seen)(struct ctl_table_set *))
1352{
1353 INIT_LIST_HEAD(&p->list);
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001354 p->is_seen = is_seen;
1355}
1356
Eric W. Biederman97324cd82012-01-09 22:19:13 -08001357void retire_sysctl_set(struct ctl_table_set *set)
1358{
1359 WARN_ON(!list_empty(&set->list));
1360}
Eric W. Biederman1f87f0b2012-01-06 04:07:15 -08001361
Alexey Dobriyan1e0edd32008-10-17 05:07:44 +04001362int __init proc_sys_init(void)
Eric W. Biederman77b14db2007-02-14 00:34:12 -08001363{
Alexey Dobriyane1675232008-10-03 00:23:32 +04001364 struct proc_dir_entry *proc_sys_root;
1365
Eric W. Biederman77b14db2007-02-14 00:34:12 -08001366 proc_sys_root = proc_mkdir("sys", NULL);
Al Viro9043476f2008-07-15 08:54:06 -04001367 proc_sys_root->proc_iops = &proc_sys_dir_operations;
1368 proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
Eric W. Biederman77b14db2007-02-14 00:34:12 -08001369 proc_sys_root->nlink = 0;
Eric W. Biedermande4e83bd2012-01-06 03:34:20 -08001370
1371 return sysctl_init();
Eric W. Biederman77b14db2007-02-14 00:34:12 -08001372}