summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/init_task.h15
-rw-r--r--include/linux/rtmutex.h2
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/sched/rt.h8
-rw-r--r--init/Kconfig11
-rw-r--r--kernel/sched/Makefile3
-rw-r--r--kernel/sched/core.c31
-rw-r--r--kernel/sched/deadline.c4
-rw-r--r--kernel/sched/debug.c2
-rw-r--r--kernel/sched/sched.h33
-rw-r--r--kernel/sched/stop_task.c4
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/time/posix-cpu-timers.c10
13 files changed, 100 insertions, 27 deletions
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 3c07ace5b431..74a9512e06d3 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -218,6 +218,16 @@ extern struct cred init_cred;
#define INIT_TASK_SECURITY
#endif
+#ifdef CONFIG_SCHED_RT
+#define INIT_TASK_RT(tsk) \
+ .rt = { \
+ .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
+ .time_slice = RR_TIMESLICE, \
+ },
+#else
+#define INIT_TASK_RT(tsk)
+#endif
+
/*
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -243,10 +253,7 @@ extern struct cred init_cred;
.se = { \
.group_node = LIST_HEAD_INIT(tsk.se.group_node), \
}, \
- .rt = { \
- .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
- .time_slice = RR_TIMESLICE, \
- }, \
+ INIT_TASK_RT(tsk) \
.tasks = LIST_HEAD_INIT(tsk.tasks), \
INIT_PUSHABLE_TASKS(tsk) \
INIT_CGROUP_SCHED(tsk) \
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 2560dd52d487..40622e3b506a 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -12,8 +12,6 @@
#ifndef __LINUX_RT_MUTEX_H
#define __LINUX_RT_MUTEX_H
-#define CONFIG_SCHED_RT /* temporary until kconfig defines it */
-
#if defined(CONFIG_SCHED_RT) || defined(NO_RT_MUTEX_FALLBACK)
#include <linux/linkage.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 00b4bed1706e..7172d74edc4d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -561,7 +561,9 @@ struct task_struct {
const struct sched_class *sched_class;
struct sched_entity se;
+#ifdef CONFIG_SCHED_RT
struct sched_rt_entity rt;
+#endif
#ifdef CONFIG_CGROUP_SCHED
struct task_group *sched_task_group;
#endif
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index f93329aba31a..2a6c2b0d8c57 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -7,7 +7,7 @@ struct task_struct;
static inline int rt_prio(int prio)
{
- if (unlikely(prio < MAX_RT_PRIO))
+ if (IS_ENABLED(CONFIG_SCHED_RT) && unlikely(prio < MAX_RT_PRIO))
return 1;
return 0;
}
@@ -52,4 +52,10 @@ extern void normalize_rt_tasks(void);
*/
#define RR_TIMESLICE (100 * HZ / 1000)
+#ifdef CONFIG_SCHED_RT
+#define rt_timeout(tsk) (tsk)->rt.timeout
+#else
+#define rt_timeout(tsk) 0
+#endif
+
#endif /* _LINUX_SCHED_RT_H */
diff --git a/init/Kconfig b/init/Kconfig
index f252e0dbee14..a55320528c05 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -743,7 +743,7 @@ config CFS_BANDWIDTH
config RT_GROUP_SCHED
bool "Group scheduling for SCHED_RR/FIFO"
- depends on CGROUP_SCHED
+ depends on CGROUP_SCHED && SCHED_RT
default n
help
This feature lets you explicitly allocate real CPU bandwidth
@@ -959,6 +959,14 @@ config SCHED_AUTOGROUP
desktop applications. Task group autogeneration is currently based
upon task session.
+config SCHED_RT
+ bool "Real Time Task Scheduling" if EXPERT
+ default y
+ help
+ This adds the sched_rt scheduling class to the kernel providing
+ support for the SCHED_FIFO and SCHED_RR policies. You might want
+ to disable this to reduce the kernel size. If unsure say y.
+
config SCHED_DL
bool "Deadline Task Scheduling" if EXPERT
default y
@@ -1673,6 +1681,7 @@ config SLABINFO
config RT_MUTEXES
bool
+ depends on SCHED_RT
config BASE_SMALL
int
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 0d3baba207a6..62fa935a341e 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -16,8 +16,9 @@ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
endif
obj-y += core.o loadavg.o clock.o cputime.o
-obj-y += idle_task.o fair.o rt.o
+obj-y += idle_task.o fair.o
obj-y += wait.o wait_bit.o swait.o completion.o idle.o
+obj-$(CONFIG_SCHED_RT) += rt.o
obj-$(CONFIG_SCHED_DL) += deadline.o $(if $(CONFIG_SMP),cpudeadline.o)
obj-$(CONFIG_SMP) += cpupri.o topology.o stop_task.o
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8dc1fd7bb714..6193e086b994 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -643,8 +643,8 @@ bool sched_can_stop_tick(struct rq *rq)
* If there are more than one RR tasks, we need the tick to effect the
* actual RR behaviour.
*/
- if (rq->rt.rr_nr_running) {
- if (rq->rt.rr_nr_running == 1)
+ if (rt_rr_nr_running(rq)) {
+ if (rt_rr_nr_running(rq) == 1)
return true;
else
return false;
@@ -654,7 +654,7 @@ bool sched_can_stop_tick(struct rq *rq)
* If there's no RR tasks, but FIFO tasks, we can skip the tick, no
* forced preemption between FIFO tasks.
*/
- fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
+ fifo_nr_running = rt_rt_nr_running(rq) - rt_rr_nr_running(rq);
if (fifo_nr_running)
return true;
@@ -1594,7 +1594,7 @@ void sched_set_stop_task(int cpu, struct task_struct *stop)
* Reset it back to a normal scheduling class so that
* it can die in pieces.
*/
- old_stop->sched_class = &rt_sched_class;
+ old_stop->sched_class = stop_sched_class.next;
}
}
@@ -2182,11 +2182,13 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
__dl_clear_params(p);
#endif
+#ifdef CONFIG_SCHED_RT
INIT_LIST_HEAD(&p->rt.run_list);
p->rt.timeout = 0;
p->rt.time_slice = sched_rr_timeslice;
p->rt.on_rq = 0;
p->rt.on_list = 0;
+#endif
#ifdef CONFIG_PREEMPT_NOTIFIERS
INIT_HLIST_HEAD(&p->preempt_notifiers);
@@ -4021,6 +4023,23 @@ static int __sched_setscheduler(struct task_struct *p,
/* The pi code expects interrupts enabled */
BUG_ON(pi && in_interrupt());
+
+ /*
+ * When the RT scheduling class is disabled, let's make sure kernel threads
+ * wanting RT still get lowest nice value to give them highest available
+ * priority rather than simply returning an error. Obviously we can't use
+ * rt_policy() here as it is always false in that case.
+ */
+ if (!IS_ENABLED(CONFIG_SCHED_RT) && !user &&
+ (policy == SCHED_FIFO || policy == SCHED_RR)) {
+ static const struct sched_attr k_attr = {
+ .sched_policy = SCHED_NORMAL,
+ .sched_nice = MIN_NICE,
+ };
+ attr = &k_attr;
+ policy = SCHED_NORMAL;
+ }
+
recheck:
/* Double check policy once rq lock held: */
if (policy < 0) {
@@ -5848,7 +5867,10 @@ void __init sched_init(void)
rq->calc_load_active = 0;
rq->calc_load_update = jiffies + LOAD_FREQ;
init_cfs_rq(&rq->cfs);
+#ifdef CONFIG_SCHED_RT
init_rt_rq(&rq->rt);
+ rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
+#endif
init_dl_rq(&rq->dl);
#ifdef CONFIG_FAIR_GROUP_SCHED
root_task_group.shares = ROOT_TASK_GROUP_LOAD;
@@ -5877,7 +5899,6 @@ void __init sched_init(void)
init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
#endif /* CONFIG_FAIR_GROUP_SCHED */
- rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
#ifdef CONFIG_RT_GROUP_SCHED
init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
#endif
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 0191ec7667c3..713ca972616d 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2312,7 +2312,11 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
}
const struct sched_class dl_sched_class = {
+#ifdef CONFIG_SCHED_RT
.next = &rt_sched_class,
+#else
+ .next = &fair_sched_class,
+#endif
.enqueue_task = enqueue_task_dl,
.dequeue_task = dequeue_task_dl,
.yield_task = yield_task_dl,
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 775fa98aec29..e977fd1c313e 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -705,7 +705,9 @@ do { \
spin_lock_irqsave(&sched_debug_lock, flags);
print_cfs_stats(m, cpu);
+#ifdef CONFIG_SCHED_RT
print_rt_stats(m, cpu);
+#endif
#ifdef CONFIG_SCHED_DL
print_dl_stats(m, cpu);
#endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2f40f09aaec5..1060b4beb0d0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -132,7 +132,8 @@ static inline int fair_policy(int policy)
static inline int rt_policy(int policy)
{
- return policy == SCHED_FIFO || policy == SCHED_RR;
+ return IS_ENABLED(CONFIG_SCHED_RT) &&
+ (policy == SCHED_FIFO || policy == SCHED_RR);
}
static inline int dl_policy(int policy)
@@ -401,8 +402,6 @@ extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
-extern void free_rt_sched_group(struct task_group *tg);
-extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
struct sched_rt_entity *rt_se, int cpu,
struct sched_rt_entity *parent);
@@ -520,7 +519,7 @@ struct cfs_rq {
static inline int rt_bandwidth_enabled(void)
{
- return sysctl_sched_rt_runtime >= 0;
+ return IS_ENABLED(CONFIG_SCHED_RT) && sysctl_sched_rt_runtime >= 0;
}
/* RT IPI pull logic requires IRQ_WORK */
@@ -569,6 +568,24 @@ struct rt_rq {
#endif
};
+extern struct rt_bandwidth def_rt_bandwidth;
+
+#ifdef CONFIG_SCHED_RT
+#define rt_rr_nr_running(rq) (rq)->rt.rr_nr_running
+#define rt_rt_nr_running(rq) (rq)->rt.rt_nr_running
+extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
+extern void free_rt_sched_group(struct task_group *tg);
+extern void init_sched_rt_class(void);
+extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
+#else
+#define rt_rr_nr_running(rq) 0
+#define rt_rt_nr_running(rq) 0
+#define alloc_rt_sched_group(...) 1
+#define free_rt_sched_group(tg) do { } while (0)
+#define init_sched_rt_class() do { } while (0)
+#define init_rt_bandwidth(...) do { } while (0)
+#endif
+
/* Deadline class' related fields in a runqueue */
struct dl_rq {
/* runqueue is an rbtree, ordered by deadline */
@@ -1499,8 +1516,10 @@ static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
#define sched_class_highest (&stop_sched_class)
#elif defined(CONFIG_SCHED_DL)
#define sched_class_highest (&dl_sched_class)
-#else
+#elif defined(CONFIG_SCHED_RT)
#define sched_class_highest (&rt_sched_class)
+#else
+#define sched_class_highest (&fair_sched_class)
#endif
#define for_each_class(class) \
@@ -1553,15 +1572,11 @@ extern void sysrq_sched_debug_show(void);
extern void sched_init_granularity(void);
extern void update_max_interval(void);
-extern void init_sched_rt_class(void);
extern void init_sched_fair_class(void);
extern void resched_curr(struct rq *rq);
extern void resched_cpu(int cpu);
-extern struct rt_bandwidth def_rt_bandwidth;
-extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
-
extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 5632dc3e6331..7cad8c154099 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -112,8 +112,10 @@ static void update_curr_stop(struct rq *rq)
const struct sched_class stop_sched_class = {
#ifdef CONFIG_SCHED_DL
.next = &dl_sched_class,
-#else
+#elif defined(CONFIG_SCHED_RT)
.next = &rt_sched_class,
+#else
+ .next = &fair_sched_class,
#endif
.enqueue_task = enqueue_task_stop,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6648fbbb8157..3f94373348f2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -422,6 +422,7 @@ static struct ctl_table kern_table[] = {
},
#endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_SCHED_DEBUG */
+#ifdef CONFIG_SCHED_RT
{
.procname = "sched_rt_period_us",
.data = &sysctl_sched_rt_period,
@@ -443,6 +444,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = sched_rr_handler,
},
+#endif
#ifdef CONFIG_SCHED_AUTOGROUP
{
.procname = "sched_autogroup_enabled",
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 8585ad6e472a..2183b1b5c9c3 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -4,6 +4,7 @@
#include <linux/sched/signal.h>
#include <linux/sched/cputime.h>
+#include <linux/sched/rt.h>
#include <linux/posix-timers.h>
#include <linux/errno.h>
#include <linux/math64.h>
@@ -822,12 +823,15 @@ static void check_thread_timers(struct task_struct *tsk,
/*
* Check for the special case thread timers.
*/
- soft = task_rlimit(tsk, RLIMIT_RTTIME);
+ if(IS_ENABLED(CONFIG_SCHED_RT))
+ soft = task_rlimit(tsk, RLIMIT_RTTIME);
+ else
+ soft = RLIM_INFINITY;
if (soft != RLIM_INFINITY) {
unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
if (hard != RLIM_INFINITY &&
- tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
+ rt_timeout(tsk) > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
/*
* At the hard limit, we just die.
* No need to calculate anything else now.
@@ -839,7 +843,7 @@ static void check_thread_timers(struct task_struct *tsk,
__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
return;
}
- if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
+ if (rt_timeout(tsk) > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
/*
* At the soft limit, send a SIGXCPU every second.
*/