summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolas Pitre <nicolas.pitre@linaro.org>2017-05-29 15:45:45 -0400
committerNicolas Pitre <nicolas.pitre@linaro.org>2017-10-06 15:33:38 -0400
commit15a6a150761495055ddf82f5177765e677b8ec37 (patch)
tree74eb096fda1db25dd4d47ae348c4fb68023e0ebe
parent08421be0d951e620c916998260d9e6b6bfd97ae4 (diff)
downloadlinux-optional_sched_classes.tar.gz
sched/rt: make it configurableoptional_sched_classes
On most small systems where user space is tightly controlled, the realtime scheduling class can often be dispensed with to reduce the kernel footprint. Let's make it configurable. Before (with CONFIG_SCHED_DL=n): $ size -t kernel/sched/built-in.o text data bss dec hex filename [...] 18336 3388 92 21816 5538 (TOTALS) With CONFIG_SCHED_RT=n: $ size -t kernel/sched/built-in.o text data bss dec hex filename [...] 15272 3356 32 18660 48e4 (TOTALS) Signed-off-by: Nicolas Pitre <nico@linaro.org>
-rw-r--r--include/linux/init_task.h15
-rw-r--r--include/linux/rtmutex.h2
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/sched/rt.h8
-rw-r--r--init/Kconfig11
-rw-r--r--kernel/sched/Makefile3
-rw-r--r--kernel/sched/core.c31
-rw-r--r--kernel/sched/deadline.c4
-rw-r--r--kernel/sched/debug.c2
-rw-r--r--kernel/sched/sched.h33
-rw-r--r--kernel/sched/stop_task.c4
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/time/posix-cpu-timers.c10
13 files changed, 100 insertions, 27 deletions
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 3c07ace..74a9512 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -218,6 +218,16 @@ extern struct cred init_cred;
#define INIT_TASK_SECURITY
#endif
+#ifdef CONFIG_SCHED_RT
+#define INIT_TASK_RT(tsk) \
+ .rt = { \
+ .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
+ .time_slice = RR_TIMESLICE, \
+ },
+#else
+#define INIT_TASK_RT(tsk)
+#endif
+
/*
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -243,10 +253,7 @@ extern struct cred init_cred;
.se = { \
.group_node = LIST_HEAD_INIT(tsk.se.group_node), \
}, \
- .rt = { \
- .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
- .time_slice = RR_TIMESLICE, \
- }, \
+ INIT_TASK_RT(tsk) \
.tasks = LIST_HEAD_INIT(tsk.tasks), \
INIT_PUSHABLE_TASKS(tsk) \
INIT_CGROUP_SCHED(tsk) \
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 2560dd5..40622e3 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -12,8 +12,6 @@
#ifndef __LINUX_RT_MUTEX_H
#define __LINUX_RT_MUTEX_H
-#define CONFIG_SCHED_RT /* temporary until kconfig defines it */
-
#if defined(CONFIG_SCHED_RT) || defined(NO_RT_MUTEX_FALLBACK)
#include <linux/linkage.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 00b4bed..7172d74 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -561,7 +561,9 @@ struct task_struct {
const struct sched_class *sched_class;
struct sched_entity se;
+#ifdef CONFIG_SCHED_RT
struct sched_rt_entity rt;
+#endif
#ifdef CONFIG_CGROUP_SCHED
struct task_group *sched_task_group;
#endif
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index f93329a..2a6c2b0 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -7,7 +7,7 @@ struct task_struct;
static inline int rt_prio(int prio)
{
- if (unlikely(prio < MAX_RT_PRIO))
+ if (IS_ENABLED(CONFIG_SCHED_RT) && unlikely(prio < MAX_RT_PRIO))
return 1;
return 0;
}
@@ -52,4 +52,10 @@ extern void normalize_rt_tasks(void);
*/
#define RR_TIMESLICE (100 * HZ / 1000)
+#ifdef CONFIG_SCHED_RT
+#define rt_timeout(tsk) (tsk)->rt.timeout
+#else
+#define rt_timeout(tsk) 0
+#endif
+
#endif /* _LINUX_SCHED_RT_H */
diff --git a/init/Kconfig b/init/Kconfig
index f252e0d..a553205 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -743,7 +743,7 @@ config CFS_BANDWIDTH
config RT_GROUP_SCHED
bool "Group scheduling for SCHED_RR/FIFO"
- depends on CGROUP_SCHED
+ depends on CGROUP_SCHED && SCHED_RT
default n
help
This feature lets you explicitly allocate real CPU bandwidth
@@ -959,6 +959,14 @@ config SCHED_AUTOGROUP
desktop applications. Task group autogeneration is currently based
upon task session.
+config SCHED_RT
+ bool "Real Time Task Scheduling" if EXPERT
+ default y
+ help
+ This adds the sched_rt scheduling class to the kernel providing
+ support for the SCHED_FIFO and SCHED_RR policies. You might want
+ to disable this to reduce the kernel size. If unsure say y.
+
config SCHED_DL
bool "Deadline Task Scheduling" if EXPERT
default y
@@ -1673,6 +1681,7 @@ config SLABINFO
config RT_MUTEXES
bool
+ depends on SCHED_RT
config BASE_SMALL
int
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 0d3baba..62fa935 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -16,8 +16,9 @@ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
endif
obj-y += core.o loadavg.o clock.o cputime.o
-obj-y += idle_task.o fair.o rt.o
+obj-y += idle_task.o fair.o
obj-y += wait.o wait_bit.o swait.o completion.o idle.o
+obj-$(CONFIG_SCHED_RT) += rt.o
obj-$(CONFIG_SCHED_DL) += deadline.o $(if $(CONFIG_SMP),cpudeadline.o)
obj-$(CONFIG_SMP) += cpupri.o topology.o stop_task.o
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8dc1fd7..6193e08 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -643,8 +643,8 @@ bool sched_can_stop_tick(struct rq *rq)
* If there are more than one RR tasks, we need the tick to effect the
* actual RR behaviour.
*/
- if (rq->rt.rr_nr_running) {
- if (rq->rt.rr_nr_running == 1)
+ if (rt_rr_nr_running(rq)) {
+ if (rt_rr_nr_running(rq) == 1)
return true;
else
return false;
@@ -654,7 +654,7 @@ bool sched_can_stop_tick(struct rq *rq)
* If there's no RR tasks, but FIFO tasks, we can skip the tick, no
* forced preemption between FIFO tasks.
*/
- fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
+ fifo_nr_running = rt_rt_nr_running(rq) - rt_rr_nr_running(rq);
if (fifo_nr_running)
return true;
@@ -1594,7 +1594,7 @@ void sched_set_stop_task(int cpu, struct task_struct *stop)
* Reset it back to a normal scheduling class so that
* it can die in pieces.
*/
- old_stop->sched_class = &rt_sched_class;
+ old_stop->sched_class = stop_sched_class.next;
}
}
@@ -2182,11 +2182,13 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
__dl_clear_params(p);
#endif
+#ifdef CONFIG_SCHED_RT
INIT_LIST_HEAD(&p->rt.run_list);
p->rt.timeout = 0;
p->rt.time_slice = sched_rr_timeslice;
p->rt.on_rq = 0;
p->rt.on_list = 0;
+#endif
#ifdef CONFIG_PREEMPT_NOTIFIERS
INIT_HLIST_HEAD(&p->preempt_notifiers);
@@ -4021,6 +4023,23 @@ static int __sched_setscheduler(struct task_struct *p,
/* The pi code expects interrupts enabled */
BUG_ON(pi && in_interrupt());
+
+ /*
+ * When the RT scheduling class is disabled, let's make sure kernel threads
+ * wanting RT still get lowest nice value to give them highest available
+ * priority rather than simply returning an error. Obviously we can't use
+ * rt_policy() here as it is always false in that case.
+ */
+ if (!IS_ENABLED(CONFIG_SCHED_RT) && !user &&
+ (policy == SCHED_FIFO || policy == SCHED_RR)) {
+ static const struct sched_attr k_attr = {
+ .sched_policy = SCHED_NORMAL,
+ .sched_nice = MIN_NICE,
+ };
+ attr = &k_attr;
+ policy = SCHED_NORMAL;
+ }
+
recheck:
/* Double check policy once rq lock held: */
if (policy < 0) {
@@ -5848,7 +5867,10 @@ void __init sched_init(void)
rq->calc_load_active = 0;
rq->calc_load_update = jiffies + LOAD_FREQ;
init_cfs_rq(&rq->cfs);
+#ifdef CONFIG_SCHED_RT
init_rt_rq(&rq->rt);
+ rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
+#endif
init_dl_rq(&rq->dl);
#ifdef CONFIG_FAIR_GROUP_SCHED
root_task_group.shares = ROOT_TASK_GROUP_LOAD;
@@ -5877,7 +5899,6 @@ void __init sched_init(void)
init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
#endif /* CONFIG_FAIR_GROUP_SCHED */
- rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
#ifdef CONFIG_RT_GROUP_SCHED
init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
#endif
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 0191ec7..713ca97 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2312,7 +2312,11 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
}
const struct sched_class dl_sched_class = {
+#ifdef CONFIG_SCHED_RT
.next = &rt_sched_class,
+#else
+ .next = &fair_sched_class,
+#endif
.enqueue_task = enqueue_task_dl,
.dequeue_task = dequeue_task_dl,
.yield_task = yield_task_dl,
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 775fa98..e977fd1 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -705,7 +705,9 @@ do { \
spin_lock_irqsave(&sched_debug_lock, flags);
print_cfs_stats(m, cpu);
+#ifdef CONFIG_SCHED_RT
print_rt_stats(m, cpu);
+#endif
#ifdef CONFIG_SCHED_DL
print_dl_stats(m, cpu);
#endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2f40f09..1060b4b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -132,7 +132,8 @@ static inline int fair_policy(int policy)
static inline int rt_policy(int policy)
{
- return policy == SCHED_FIFO || policy == SCHED_RR;
+ return IS_ENABLED(CONFIG_SCHED_RT) &&
+ (policy == SCHED_FIFO || policy == SCHED_RR);
}
static inline int dl_policy(int policy)
@@ -401,8 +402,6 @@ extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
-extern void free_rt_sched_group(struct task_group *tg);
-extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
struct sched_rt_entity *rt_se, int cpu,
struct sched_rt_entity *parent);
@@ -520,7 +519,7 @@ struct cfs_rq {
static inline int rt_bandwidth_enabled(void)
{
- return sysctl_sched_rt_runtime >= 0;
+ return IS_ENABLED(CONFIG_SCHED_RT) && sysctl_sched_rt_runtime >= 0;
}
/* RT IPI pull logic requires IRQ_WORK */
@@ -569,6 +568,24 @@ struct rt_rq {
#endif
};
+extern struct rt_bandwidth def_rt_bandwidth;
+
+#ifdef CONFIG_SCHED_RT
+#define rt_rr_nr_running(rq) (rq)->rt.rr_nr_running
+#define rt_rt_nr_running(rq) (rq)->rt.rt_nr_running
+extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
+extern void free_rt_sched_group(struct task_group *tg);
+extern void init_sched_rt_class(void);
+extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
+#else
+#define rt_rr_nr_running(rq) 0
+#define rt_rt_nr_running(rq) 0
+#define alloc_rt_sched_group(...) 1
+#define free_rt_sched_group(tg) do { } while (0)
+#define init_sched_rt_class() do { } while (0)
+#define init_rt_bandwidth(...) do { } while (0)
+#endif
+
/* Deadline class' related fields in a runqueue */
struct dl_rq {
/* runqueue is an rbtree, ordered by deadline */
@@ -1499,8 +1516,10 @@ static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
#define sched_class_highest (&stop_sched_class)
#elif defined(CONFIG_SCHED_DL)
#define sched_class_highest (&dl_sched_class)
-#else
+#elif defined(CONFIG_SCHED_RT)
#define sched_class_highest (&rt_sched_class)
+#else
+#define sched_class_highest (&fair_sched_class)
#endif
#define for_each_class(class) \
@@ -1553,15 +1572,11 @@ extern void sysrq_sched_debug_show(void);
extern void sched_init_granularity(void);
extern void update_max_interval(void);
-extern void init_sched_rt_class(void);
extern void init_sched_fair_class(void);
extern void resched_curr(struct rq *rq);
extern void resched_cpu(int cpu);
-extern struct rt_bandwidth def_rt_bandwidth;
-extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
-
extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 5632dc3..7cad8c1 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -112,8 +112,10 @@ static void update_curr_stop(struct rq *rq)
const struct sched_class stop_sched_class = {
#ifdef CONFIG_SCHED_DL
.next = &dl_sched_class,
-#else
+#elif defined(CONFIG_SCHED_RT)
.next = &rt_sched_class,
+#else
+ .next = &fair_sched_class,
#endif
.enqueue_task = enqueue_task_stop,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6648fbb..3f94373 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -422,6 +422,7 @@ static struct ctl_table kern_table[] = {
},
#endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_SCHED_DEBUG */
+#ifdef CONFIG_SCHED_RT
{
.procname = "sched_rt_period_us",
.data = &sysctl_sched_rt_period,
@@ -443,6 +444,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = sched_rr_handler,
},
+#endif
#ifdef CONFIG_SCHED_AUTOGROUP
{
.procname = "sched_autogroup_enabled",
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 8585ad6..2183b1b 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -4,6 +4,7 @@
#include <linux/sched/signal.h>
#include <linux/sched/cputime.h>
+#include <linux/sched/rt.h>
#include <linux/posix-timers.h>
#include <linux/errno.h>
#include <linux/math64.h>
@@ -822,12 +823,15 @@ static void check_thread_timers(struct task_struct *tsk,
/*
* Check for the special case thread timers.
*/
- soft = task_rlimit(tsk, RLIMIT_RTTIME);
+ if(IS_ENABLED(CONFIG_SCHED_RT))
+ soft = task_rlimit(tsk, RLIMIT_RTTIME);
+ else
+ soft = RLIM_INFINITY;
if (soft != RLIM_INFINITY) {
unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
if (hard != RLIM_INFINITY &&
- tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
+ rt_timeout(tsk) > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
/*
* At the hard limit, we just die.
* No need to calculate anything else now.
@@ -839,7 +843,7 @@ static void check_thread_timers(struct task_struct *tsk,
__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
return;
}
- if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
+ if (rt_timeout(tsk) > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
/*
* At the soft limit, send a SIGXCPU every second.
*/