1. 概述
2022年3月11日,Vincent Guittot写道:基于2020年Parth提供的[1-4]系列latency nice相关的patches,他重启了相关工作,提供了新的一组有关latency nice priority的patches。解决了CFS调度器中一个进程是否可以抢占当前正在运行的进程的问题。
2. 设计思想
latency-nice优先级其实和nice值类似,但latency-nice用于表示进程对latency延迟的容忍情况。比如,latency-nice值范围[-20,19],其中**latency_nice=-20 的进程A相比latency_nice=+19 的进程B而言,进程A需要更小的延迟特性来维持正常运行,调度器则需要优先考虑运行进程A**。
latency_nice通过从用户态获取latency延迟需求,然后对CFS SCHED_CLASS进行作用,达到控制的目的。
3. 代码实现
[PATCH 1/6]
主要作用:
- struct task_struct结构体增加变量
int latency_nice ; - /proc/<pid>/sched文件中显示该进程的
latency_nice ; - 定义
latency_nice 取值范围[-20,19]; - 定义
DEFAULT_LATENCY_NICE 值为0;
Signed-off-by: Parth Shah <parth@linux.ibm.com>
[rebase]
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
include/linux/sched.h | 1 +
kernel/sched/debug.c | 1 +
kernel/sched/sched.h | 18 ++++++++++++++++++
3 files changed, 20 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 508b91d57470..2aa889a59054 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -779,6 +779,7 @@ struct task_struct {
int static_prio;
int normal_prio;
unsigned int rt_priority;
+ int latency_nice;
struct sched_entity se;
struct sched_rt_entity rt;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 102d6f70e84d..5d76a8927888 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -1043,6 +1043,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
#endif
P(policy);
P(prio);
+ P(latency_nice);
if (task_has_dl_policy(p)) {
P(dl.runtime);
P(dl.deadline);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9b33ba9c3c42..456ad2159eb1 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -105,6 +105,24 @@ extern void call_trace_sched_update_nr_running(struct rq *rq, int count);
*/
#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
+
+#define MAX_LATENCY_NICE 19
+#define MIN_LATENCY_NICE -20
+
+#define LATENCY_NICE_WIDTH \
+ (MAX_LATENCY_NICE - MIN_LATENCY_NICE + 1)
+
+
+#define DEFAULT_LATENCY_NICE 0
+
[PATCH 2/6]
主要作用:
- 0号进程(也叫idle进程/swapper进程)的初始化 latency_nice = 0;
- 子进程的 latency_nice 继承父进程的 latency_nice值;
- 子进程如果设置了
sched_reset_on_fork ,则设置其 latency_nice = 0;
Signed-off-by: Parth Shah <parth@linux.ibm.com>
[rebase]
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
init/init_task.c | 1 +
kernel/sched/core.c | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/init/init_task.c b/init/init_task.c
index 73cc8f03511a..2afa249c253b 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -78,6 +78,7 @@ struct task_struct init_task
.prio = MAX_PRIO - 20,
.static_prio = MAX_PRIO - 20,
.normal_prio = MAX_PRIO - 20,
+ .latency_nice = 0,
.policy = SCHED_NORMAL,
.cpus_ptr = &init_task.cpus_mask,
.user_cpus_ptr = NULL,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1d863d7f6ad7..157eef880d1d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4393,6 +4393,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
*/
p->prio = current->normal_prio;
+
+ p->latency_nice = current->latency_nice;
+
uclamp_fork(p);
[PATCH 3/6]
主要作用:
- 在sched_attr结构体中增加sched_latency_nice,并且支持通过sched_setattr/sched_getaatr等系统调用来修改/获取;
- 新增flag标志SCHED_FLAG_LATENCY_NICE,每次系统调用修改sched_latency_nice后会在内核态修改相应的latency_nice/latency_prio;
Signed-off-by: Parth Shah <parth@linux.ibm.com>
[rebase and add a dedicated __setscheduler_latency ]
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
include/uapi/linux/sched.h | 4 +++-
include/uapi/linux/sched/types.h | 19 +++++++++++++++++++
kernel/sched/core.c | 26 ++++++++++++++++++++++++++
tools/include/uapi/linux/sched.h | 4 +++-
4 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 3bac0a8ceab2..b2e932c25be6 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -132,6 +132,7 @@ struct clone_args {
+
SCHED_FLAG_KEEP_PARAMS)
@@ -143,6 +144,7 @@ struct clone_args {
SCHED_FLAG_RECLAIM | \
SCHED_FLAG_DL_OVERRUN | \
SCHED_FLAG_KEEP_ALL | \
- SCHED_FLAG_UTIL_CLAMP)
+ SCHED_FLAG_UTIL_CLAMP | \
+ SCHED_FLAG_LATENCY_NICE)
diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h
index f2c4589d4dbf..0aa4e3b6ed59 100644
--- a/include/uapi/linux/sched/types.h
+++ b/include/uapi/linux/sched/types.h
@@ -10,6 +10,7 @@ struct sched_param {
+
/*
* Extended scheduling parameters data structure.
@@ -98,6 +99,22 @@ struct sched_param {
* scheduled on a CPU with no more capacity than the specified value.
*
* A task utilization boundary can be reset by setting the attribute to -1.
+ *
+ * Latency Tolerance Attributes
+ * ===========================
+ *
+ * A subset of sched_attr attributes allows to specify the relative latency
+ * requirements of a task with respect to the other tasks running/queued in the
+ * system.
+ *
+ * @ sched_latency_nice task's latency_nice value
+ *
+ * The latency_nice of a task can have any value in a range of
+ * [LATENCY_NICE_MIN..LATENCY_NICE_MAX].
+ *
+ * A task with latency_nice with the value of LATENCY_NICE_MIN can be
+ * taken for a task with lower latency requirements as opposed to the task with
+ * higher latency_nice.
*/
struct sched_attr {
__u32 size;
@@ -120,6 +137,8 @@ struct sched_attr {
__u32 sched_util_min;
__u32 sched_util_max;
+ /* latency requirement hints */
+ __s32 sched_latency_nice;
};
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 157eef880d1d..3edba1a38ecb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7219,6 +7219,16 @@ static void __setscheduler_params(struct task_struct *p,
p->rt_priority = attr->sched_priority;
p->normal_prio = normal_prio(p);
set_load_weight(p, true);
+
+}
+
+static void __setscheduler_latency(struct task_struct *p,
+ const struct sched_attr *attr)
+{
+ if (attr->sched_flags & SCHED_FLAG_LATENCY_NICE) {
+ p->latency_prio = NICE_TO_LATENCY(attr->sched_latency_nice);
+ set_latency_weight(p);
+ }
}
/*
@@ -7345,6 +7355,13 @@ static int __sched_setscheduler(struct task_struct *p,
return retval;
}
+ if (attr->sched_flags & SCHED_FLAG_LATENCY_NICE) {
+ if (attr->sched_latency_nice > MAX_LATENCY_NICE)
+ return -EINVAL;
+ if (attr->sched_latency_nice < MIN_LATENCY_NICE)
+ return -EINVAL;
+ }
+
if (pi)
cpuset_read_lock();
@@ -7379,6 +7396,9 @@ static int __sched_setscheduler(struct task_struct *p,
goto change;
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
goto change;
+ if (attr->sched_flags & SCHED_FLAG_LATENCY_NICE &&
+ attr->sched_latency_nice != p->latency_nice)
+ goto change;
p->sched_reset_on_fork = reset_on_fork;
retval = 0;
@@ -7467,6 +7487,7 @@ static int __sched_setscheduler(struct task_struct *p,
__setscheduler_params(p, attr);
__setscheduler_prio(p, newprio);
}
+ __setscheduler_latency(p, attr);
__setscheduler_uclamp(p, attr);
if (queued) {
@@ -7677,6 +7698,9 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr
*a
size < SCHED_ATTR_SIZE_VER1)
return -EINVAL;
+ if ((attr->sched_flags & SCHED_FLAG_LATENCY_NICE) &&
+ size < SCHED_ATTR_SIZE_VER2)
+ return -EINVAL;
/*
* XXX: Do we want to be lenient like existing syscalls; or do we want
* to be strict and return an error on out-of-bounds values?
@@ -7914,6 +7938,8 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *,
uattr,
get_params(p, &kattr);
kattr.sched_flags &= SCHED_FLAG_ALL;
+ kattr.sched_latency_nice = p->latency_nice;
+
/*
* This could race with another potential updater, but this is fine
diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h
index 3bac0a8ceab2..ecc4884bfe4b 100644
--- a/tools/include/uapi/linux/sched.h
+++ b/tools/include/uapi/linux/sched.h
@@ -132,6 +132,7 @@ struct clone_args {
+
SCHED_FLAG_KEEP_PARAMS)
@@ -143,6 +144,7 @@ struct clone_args {
SCHED_FLAG_RECLAIM | \
SCHED_FLAG_DL_OVERRUN | \
SCHED_FLAG_KEEP_ALL | \
- SCHED_FLAG_UTIL_CLAMP)
+ SCHED_FLAG_UTIL_CLAMP | \
+ SCHED_FLAG_LATENCY_NICE)
--
[PATCH 4/6]
主要作用:
- 类似于NICE值系统,使用CAP_SYS_NICE做安全检查,当非root用户尝试减小进程的latency_nice将会返回-EPERM;
Signed-off-by: Parth Shah <parth@linux.ibm.com>
[rebase]
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
kernel/sched/core.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3edba1a38ecb..8f8b102a75c4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7360,6 +7360,10 @@ static int __sched_setscheduler(struct task_struct *p,
return -EINVAL;
if (attr->sched_latency_nice < MIN_LATENCY_NICE)
return -EINVAL;
+ /* Use the same security checks as NICE */
+ if (attr->sched_latency_nice < p->latency_nice &&
+ !capable(CAP_SYS_NICE))
+ return -EPERM;
}
if (pi)
--
[PATCH 5/6]
主要作用:
- 将latency_nice转化为latency_prio,使用优先级来表示对延迟的敏感度,并且引入latency_weight;
- 进程拥有更大的latency_weight权重(也即更敏感的latency)且其时间片没有用完的情况下,可抢占当前正在运行的进程;
- 低latency_weight权重的进程只有在唤醒时可以抢占当前进程,以保证一定的公平性,否则只能等tick到来才能获取slice执行;
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
include/linux/sched.h | 4 ++-
init/init_task.c | 2 +-
kernel/sched/core.c | 32 +++++++++++++++++++----
kernel/sched/debug.c | 2 +-
kernel/sched/fair.c | 60 +++++++++++++++++++++++++++++++++++++++++--
kernel/sched/sched.h | 12 +++++++++
6 files changed, 102 insertions(+), 10 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2aa889a59054..9aeb157e819b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -560,6 +560,8 @@ struct sched_entity {
unsigned long runnable_weight;
+ int latency_weight;
+
/*
* Per entity load average tracking.
@@ -779,7 +781,7 @@ struct task_struct {
int static_prio;
int normal_prio;
unsigned int rt_priority;
- int latency_nice;
+ int latency_prio;
struct sched_entity se;
struct sched_rt_entity rt;
diff --git a/init/init_task.c b/init/init_task.c
index 2afa249c253b..e98c71f24981 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -78,7 +78,7 @@ struct task_struct init_task
.prio = MAX_PRIO - 20,
.static_prio = MAX_PRIO - 20,
.normal_prio = MAX_PRIO - 20,
- .latency_nice = 0,
+ .latency_prio = NICE_WIDTH - 20,
.policy = SCHED_NORMAL,
.cpus_ptr = &init_task.cpus_mask,
.user_cpus_ptr = NULL,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8f8b102a75c4..547b0da01efe 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1241,6 +1241,11 @@ static void set_load_weight(struct task_struct *p, bool update_load)
}
}
+static void set_latency_weight(struct task_struct *p)
+{
+ p->se.latency_weight = sched_latency_to_weight[p->latency_prio];
+}
+
/*
* Serializes updates of utilization clamp values
@@ -4394,7 +4399,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->prio = current->normal_prio;
/* Propagate the parent's latency requirements to the child as well */
- p->latency_nice = current->latency_nice;
+ p->latency_prio = current->latency_prio;
uclamp_fork(p);
@@ -4412,7 +4417,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->prio = p->normal_prio = p->static_prio;
set_load_weight(p, false);
- p->latency_nice = DEFAULT_LATENCY_NICE;
+ p->latency_prio = NICE_TO_LATENCY(0);
/*
* We don't need the reset flag anymore after the fork. It has
* fulfilled its duty:
@@ -4420,6 +4425,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->sched_reset_on_fork = 0;
}
+ /* Once latency_prio is set, update the latency weight */
+ set_latency_weight(p);
+
if (dl_prio(p->prio))
return -EAGAIN;
else if (rt_prio(p->prio))
@@ -7361,7 +7369,7 @@ static int __sched_setscheduler(struct task_struct *p,
if (attr->sched_latency_nice < MIN_LATENCY_NICE)
return -EINVAL;
/* Use the same security checks as NICE */
- if (attr->sched_latency_nice < p->latency_nice &&
+ if (attr->sched_latency_nice < LATENCY_TO_NICE(p->latency_prio) &&
!capable(CAP_SYS_NICE))
return -EPERM;
}
@@ -7401,7 +7409,7 @@ static int __sched_setscheduler(struct task_struct *p,
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
goto change;
if (attr->sched_flags & SCHED_FLAG_LATENCY_NICE &&
- attr->sched_latency_nice != p->latency_nice)
+ attr->sched_latency_nice != LATENCY_TO_NICE(p->latency_prio))
goto change;
p->sched_reset_on_fork = reset_on_fork;
@@ -7942,7 +7950,7 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *,
uattr,
get_params(p, &kattr);
kattr.sched_flags &= SCHED_FLAG_ALL;
- kattr.sched_latency_nice = p->latency_nice;
+ kattr.sched_latency_nice = LATENCY_TO_NICE(p->latency_prio);
/*
@@ -10954,6 +10962,20 @@ const u32 sched_prio_to_wmult[40] = {
/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
};
+/*
+ * latency weight for wakeup preemption
+ */
+const int sched_latency_to_weight[40] = {
+ /* -20 */ 1024, 973, 922, 870, 819,
+ /* -15 */ 768, 717, 666, 614, 563,
+ /* -10 */ 512, 461, 410, 358, 307,
+ /* -5 */ 256, 205, 154, 102, 51,
+ /* 0 */ 0, -51, -102, -154, -205,
+ /* 5 */ -256, -307, -358, -410, -461,
+ /* 10 */ -512, -563, -614, -666, -717,
+ /* 15 */ -768, -819, -870, -922, -973,
+};
+
void call_trace_sched_update_nr_running(struct rq *rq, int count)
{
trace_sched_update_nr_running_tp(rq, count);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 5d76a8927888..253e52ec73fb 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -1043,7 +1043,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
P(policy);
P(prio);
- P(latency_nice);
+ P(latency_prio);
if (task_has_dl_policy(p)) {
P(dl.runtime);
P(dl.deadline);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5c4bfffe8c2c..506c482a0e48 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5555,6 +5555,35 @@ static int sched_idle_cpu(int cpu)
}
+static void set_next_buddy(struct sched_entity *se);
+
+static void check_preempt_from_idle(struct cfs_rq *cfs, struct sched_entity *se)
+{
+ struct sched_entity *next;
+
+ if (se->latency_weight <= 0)
+ return;
+
+ if (cfs->nr_running <= 1)
+ return;
+ /*
+ * When waking from idle, we don't need to check to preempt at wakeup
+ * the idle thread and don't set next buddy as a candidate for being
+ * picked in priority.
+ * In case of simultaneous wakeup from idle, the latency sensitive tasks
+ * lost opportunity to preempt non sensitive tasks which woke up
+ * simultaneously.
+ */
+
+ if (cfs->next)
+ next = cfs->next;
+ else
+ next = __pick_first_entity(cfs);
+
+ if (next && wakeup_preempt_entity(next, se) == 1)
+ set_next_buddy(se);
+}
+
/*
* The enqueue_task method is called before nr_running is
* increased. Here we update the fair scheduling stats and
@@ -5648,6 +5677,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (!task_new)
update_overutilized_status(rq);
+ if (rq->curr == rq->idle)
+ check_preempt_from_idle(cfs_rq_of(&p->se), &p->se);
+
enqueue_throttle:
if (cfs_bandwidth_used()) {
/*
@@ -5669,8 +5701,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
hrtick_update(rq);
}
-static void set_next_buddy(struct sched_entity *se);
-
/*
* The dequeue_task method is called before nr_running is
* decreased. We remove the task from the rbtree and
@@ -6970,6 +7000,27 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
}
+static long wakeup_latency_gran(int latency_weight)
+{
+ long thresh = sysctl_sched_latency;
+
+ if (!latency_weight)
+ return 0;
+
+ if (sched_feat(GENTLE_FAIR_SLEEPERS))
+ thresh >>= 1;
+
+ /*
+ * Clamp the delta to stay in the scheduler period range
+ * [-sysctl_sched_latency:sysctl_sched_latency]
+ */
+ latency_weight = clamp_t(long, latency_weight,
+ -1 * NICE_LATENCY_WEIGHT_MAX,
+ NICE_LATENCY_WEIGHT_MAX);
+
+ return (thresh * latency_weight) >> NICE_LATENCY_SHIFT;
+}
+
static unsigned long wakeup_gran(struct sched_entity *se)
{
unsigned long gran = sysctl_sched_wakeup_granularity;
@@ -7008,6 +7059,10 @@ static int
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
{
s64 gran, vdiff = curr->vruntime - se->vruntime;
+ int latency_weight = se->latency_weight - curr->latency_weight;
+
+ latency_weight = min(latency_weight, se->latency_weight);
+ vdiff += wakeup_latency_gran(latency_weight);
if (vdiff <= 0)
return -1;
@@ -7117,6 +7172,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int
wake_
return;
update_curr(cfs_rq_of(se));
+
if (wakeup_preempt_entity(se, pse) == 1) {
/*
* Bias pick_next to pick the sched entity that is
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 456ad2159eb1..dd92aa9c36f9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -122,6 +122,17 @@ extern void call_trace_sched_update_nr_running(struct rq *rq, int count);
* Default tasks should be treated as a task with latency_nice = 0.
*/
+
+
+/*
+ * Convert user-nice values [ -20 ... 0 ... 19 ]
+ * to static latency [ 0..39 ],
+ * and back.
+ */
+
+
+
+
/*
* Increase resolution of nice-level calculations for 64-bit architectures.
@@ -2098,6 +2109,7 @@ static_assert(WF_TTWU == SD_BALANCE_WAKE);
extern const int sched_prio_to_weight[40];
extern const u32 sched_prio_to_wmult[40];
+extern const int sched_latency_to_weight[40];
/*
* {de,en}queue flags:
--
[PATCH 6/6]
主要作用:
- 组调度中,进程可以设置其latency_prio优先级,以便抢占当前进程,而其group组的latency优先级还是原来值;在task group中设置一个latency域,实现该功能。
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
kernel/sched/core.c | 41 +++++++++++++++++++++++++++++++++++++++++
kernel/sched/fair.c | 32 ++++++++++++++++++++++++++++++++
kernel/sched/sched.h | 4 ++++
3 files changed, 77 insertions(+)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 547b0da01efe..e0668652dd24 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10635,6 +10635,30 @@ static int cpu_idle_write_s64(struct cgroup_subsys_state *css,
{
return sched_group_set_idle(css_tg(css), idle);
}
+
+static s64 cpu_latency_read_s64(struct cgroup_subsys_state *css,
+ struct cftype *cft)
+{
+ return css_tg(css)->latency_prio;
+}
+
+static int cpu_latency_write_s64(struct cgroup_subsys_state *css,
+ struct cftype *cft, s64 latency_prio)
+{
+ return sched_group_set_latency(css_tg(css), latency_prio);
+}
+
+static s64 cpu_latency_nice_read_s64(struct cgroup_subsys_state *css,
+ struct cftype *cft)
+{
+ return LATENCY_TO_NICE(css_tg(css)->latency_prio);
+}
+
+static int cpu_latency_nice_write_s64(struct cgroup_subsys_state *css,
+ struct cftype *cft, s64 latency_nice)
+{
+ return sched_group_set_latency(css_tg(css), NICE_TO_LATENCY(latency_nice));
+}
static struct cftype cpu_legacy_files[] = {
@@ -10649,6 +10673,11 @@ static struct cftype cpu_legacy_files[] = {
.read_s64 = cpu_idle_read_s64,
.write_s64 = cpu_idle_write_s64,
},
+ {
+ .name = "latency",
+ .read_s64 = cpu_latency_read_s64,
+ .write_s64 = cpu_latency_write_s64,
+ },
{
@@ -10866,6 +10895,18 @@ static struct cftype cpu_files[] = {
.read_s64 = cpu_idle_read_s64,
.write_s64 = cpu_idle_write_s64,
},
+ {
+ .name = "latency",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .read_s64 = cpu_latency_read_s64,
+ .write_s64 = cpu_latency_write_s64,
+ },
+ {
+ .name = "latency.nice",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .read_s64 = cpu_latency_nice_read_s64,
+ .write_s64 = cpu_latency_nice_write_s64,
+ },
{
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 506c482a0e48..cbccef025089 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11496,6 +11496,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group
*parent)
goto err;
tg->shares = NICE_0_LOAD;
+ tg->latency_prio = DEFAULT_LATENCY_PRIO;
init_cfs_bandwidth(tg_cfs_bandwidth(tg));
@@ -11594,6 +11595,7 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
}
se->my_q = cfs_rq;
+ se->latency_weight = sched_latency_to_weight[tg->latency_prio];
/* guarantee group entities always have weight */
update_load_set(&se->load, NICE_0_LOAD);
se->parent = parent;
@@ -11724,6 +11726,36 @@ int sched_group_set_idle(struct task_group *tg, long idle)
return 0;
}
+int sched_group_set_latency(struct task_group *tg, long latency_prio)
+{
+ int i;
+
+ if (tg == &root_task_group)
+ return -EINVAL;
+
+ if (latency_prio < 0 ||
+ latency_prio > LATENCY_NICE_WIDTH)
+ return -EINVAL;
+
+ mutex_lock(&shares_mutex);
+
+ if (tg->latency_prio == latency_prio) {
+ mutex_unlock(&shares_mutex);
+ return 0;
+ }
+
+ tg->latency_prio = latency_prio;
+
+ for_each_possible_cpu(i) {
+ struct sched_entity *se = tg->se[i];
+
+ WRITE_ONCE(se->latency_weight, sched_latency_to_weight[latency_prio]);
+ }
+
+ mutex_unlock(&shares_mutex);
+ return 0;
+}
+
void free_fair_sched_group(struct task_group *tg) { }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index dd92aa9c36f9..885d1c809329 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -429,6 +429,8 @@ struct task_group {
/* A positive value indicates that this is a SCHED_IDLE group. */
int idle;
+ /* latency priority of the group. */
+ int latency_prio;
/*
@@ -542,6 +544,8 @@ extern int sched_group_set_shares(struct task_group *tg, unsigned long
shares);
extern int sched_group_set_idle(struct task_group *tg, long idle);
+extern int sched_group_set_latency(struct task_group *tg, long latency);
+
extern void set_task_rq_fair(struct sched_entity *se,
struct cfs_rq *prev, struct cfs_rq *next);
--
4. 总结
latency-nice值其实和现有的nice值系统十分类似,个人认为latency-nice是对Linux实时性较差的一个补充解决方案。而且该方案是一个十分有价值的参考,设想一下:你的定制系统如果需要对某类业务的实时响应十分严格,就可以参考latency-nice这种方法定制你想要的优先级系统。
|