[系统运维] Linux-进程调度（CFS）

开发: C++知识库 Java知识库 JavaScript Python PHP知识库人工智能区块链大数据移动开发嵌入式开发工具数据结构与算法开发测试游戏开发网络协议系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑笔记本显卡显示器固态硬盘硬盘耳机手机 iphone vivo oppo 小米华为单反装机图拉丁

-> 系统运维 -> Linux-进程调度（CFS） -> 正文阅读

[系统运维]Linux-进程调度（CFS）

比较早的调度策略：基于优先级的时间片调度等，这些策略影响调度的因素单一，效果不佳；CFS 基于优先级和负载调度策略，效果较优。

linux 4.19 中定义了6中调度策略算法；

kernel-4.19/include/uapi/linux/sched.h
kernel-4.19/include/linux/sched.h

/*
?* Scheduling policies
?*/
#define SCHED_NORMAL?? ??? ?0
#define SCHED_FIFO?? ??? ?1
#define SCHED_RR?? ??? ?2
#define SCHED_BATCH?? ??? ?3
/* SCHED_ISO: reserved but not implemented yet */
#define SCHED_IDLE?? ??? ?5
#define SCHED_DEADLINE?? ??? ?6

struct task_struct {
?? ?int?? ??? ??? ??? ?prio;
?? ?int?? ??? ??? ??? ?static_prio;
?? ?int?? ??? ??? ??? ?normal_prio;
?? ?unsigned int?? ??? ??? ?rt_priority;

? ? ? ? ? ? ? ? ? ? ? ? .policy?? ??? ?= SCHED_NORMAL,

?? ?const struct sched_class?? ?*sched_class;
?? ?struct sched_entity?? ??? ?se;
?? ?struct sched_rt_entity?? ??? ?rt;

}

struct sched_class {
?? ?const struct sched_class *next;

#ifdef CONFIG_UCLAMP_TASK
?? ?int uclamp_enabled;
#endif

?? ?void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
?? ?void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);

}

CFS 调度类
kernel-4.19/kernel/sched/fair.c

11957/*
11958 * All the scheduling class methods:
11959 */
11960const struct sched_class fair_sched_class = {
11961?? ?.next?? ??? ??? ?= &idle_sched_class,
11962?? ?.enqueue_task?? ??? ?= enqueue_task_fair,
11963?? ?.dequeue_task?? ??? ?= dequeue_task_fair,
?

}

调度类通过next 指针链接，优先级? stop_sched_class >?dl_sched_class >?rt_sched_class>fair_sched_class>idle_sched_class>null ，用户空间通过sched_set/getScheduler 获取和设置

SCHED_NORMAL、SCHED_BATCH?????属于cfs??
SCHED_FIFO、SCHED_RR?????属于rt

1821#ifdef CONFIG_SMP
1822#define sched_class_highest (&stop_sched_class)
1823#else
1824#define sched_class_highest (&dl_sched_class)
1825#endif
1826#define for_each_class(class) \
1827 ? for (class = sched_class_highest; class; class = class->next)

任务优先级

struct task_struct {
?? ?int?? ??? ??? ??? ?prio;
?? ?int?? ??? ??? ??? ?static_prio;
?? ?int?? ??? ??? ??? ?normal_prio;
?? ?unsigned int?? ??? ??? ?rt_priority;

}

内核使用 0~139 表示进程优先级，0~99 rt 优先级，100~139给普通进程使用；另外用户空间优先级 nice -20~19??映射到普通进程优先级 100~139。

static_prio 是进程启动时分配的，内核使用，NICE_TO_PRIO() 可以将nice 和内核优先级转换，用户可以通过nice 或sched_setscheduler 来改变。

rt_priority 实时进程优先级。

normal_prio 是基于static_prio 和调度策略计算出来的，创建进程时会继承父进程。对应普通进程static_prio 和normal_prio 相等，对于实时进程会根据rt_priority 重新计算normal_prio（effective_prio()函数实现）

prio 对应动态优先级，有时需要暂时提供进程优先级。

调度实体权重

不同的调度类对应不同的调度实体，对应load_weight 对应调度权重。

struct task_struct {

struct sched_entity?? ??? ?se;
?struct sched_rt_entity?? ??? ?rt;

}

484struct sched_entity {
485?? ?/* For load-balancing: */
486?? ?struct load_weight?? ??? ?load;
487?? ?unsigned long?? ??? ??? ?runnable_weight;
488?? ?struct rb_node?? ??? ??? ?run_node;
489?? ?struct list_head?? ??? ?group_node;
490?? ?unsigned int?? ??? ??? ?on_rq;
}

360struct load_weight {
361?? ?unsigned long?? ??? ??? ?weight;
362?? ?u32?? ??? ??? ??? ?inv_weight;
363};
364

kernel-4.19/kernel/sched/core.c

8383/*
8384 * Nice levels are multiplicative, with a gentle 10% change for every
8385 * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
8386 * nice 1, it will get ~10% less CPU time than another CPU-bound task
8387 * that remained on nice 0.
8388 *
8389 * The "10% effect" is relative and cumulative: from _any_ nice level,
8390 * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
8391 * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
8392 * If a task goes up by ~10% and another task goes down by ~10% then
8393 * the relative distance between them is ~25%.)
8394 */
8395const int sched_prio_to_weight[40] = {
8396 /* -20 */ ? ? 88761, ? ? 71755, ? ? 56483, ? ? 46273, ? ? 36291,
8397 /* -15 */ ? ? 29154, ? ? 23254, ? ? 18705, ? ? 14949, ? ? 11916,
8398 /* -10 */ ? ? ?9548, ? ? ?7620, ? ? ?6100, ? ? ?4904, ? ? ?3906,
8399 /* ?-5 */ ? ? ?3121, ? ? ?2501, ? ? ?1991, ? ? ?1586, ? ? ?1277,
8400 /* ? 0 */ ? ? ?1024, ? ? ? 820, ? ? ? 655, ? ? ? 526, ? ? ? 423,
8401 /* ? 5 */ ? ? ? 335, ? ? ? 272, ? ? ? 215, ? ? ? 172, ? ? ? 137,
8402 /* ?10 */ ? ? ? 110, ? ? ? ?87, ? ? ? ?70, ? ? ? ?56, ? ? ? ?45,
8403 /* ?15 */ ? ? ? ?36, ? ? ? ?29, ? ? ? ?23, ? ? ? ?18, ? ? ? ?15,
8404};
8405
8406/*
8407 * Inverse (2^32/x) values of the sched_prio_to_weight[] array, precalculated.
8408 *
8409 * In cases where the weight does not change often, we can use the
8410 * precalculated inverse to speed up arithmetics by turning divisions
8411 * into multiplications:
8412 */
8413const u32 sched_prio_to_wmult[40] = {
8414 /* -20 */ ? ? 48388, ? ? 59856, ? ? 76040, ? ? 92818, ? ?118348,
8415 /* -15 */ ? ?147320, ? ?184698, ? ?229616, ? ?287308, ? ?360437,
8416 /* -10 */ ? ?449829, ? ?563644, ? ?704093, ? ?875809, ? 1099582,
8417 /* ?-5 */ ? 1376151, ? 1717300, ? 2157191, ? 2708050, ? 3363326,
8418 /* ? 0 */ ? 4194304, ? 5237765, ? 6557202, ? 8165337, ?10153587,
8419 /* ? 5 */ ?12820798, ?15790321, ?19976592, ?24970740, ?31350126,
8420 /* ?10 */ ?39045157, ?49367440, ?61356676, ?76695844, ?95443717,
8421 /* ?15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
8422};

sched_prio_to_weight? 里面将用户空间nice 优先级 -20~19 对应cpu 执行时间权重映射，nice =0 对应 1024 ，nice 每差一个优先级，cpu 时间就相应相差 10% ；nice 对应权重约以1.25 比例增加。

A进程nice 0（权重1024），B进程nice0（权重1024），则A 、B的CPU时间，1024/(1024+1024) = 50%

A进程nice 0（权重1024），B进程nice1（权重820），则B的CPU时间，820/(1024+820) = 45%，A 的cpu 时间 55%

这里变化一个nice 优先级，CPU时间就相差 10%?

sched_prio_to_wmult[n] = (1/sched_prio_to_weight[n]) <<32?

718static void set_load_weight(struct task_struct *p, bool update_load)
719{
720?? ?int prio = p->static_prio - MAX_RT_PRIO;
721?? ?struct load_weight *load = &p->se.load;
722
723?? ?/*
724?? ? * SCHED_IDLE tasks get minimal weight:
725?? ? */
726?? ?if (idle_policy(p->policy)) {
727?? ??? ?load->weight = scale_load(WEIGHT_IDLEPRIO);
728?? ??? ?load->inv_weight = WMULT_IDLEPRIO;
729?? ??? ?p->se.runnable_weight = load->weight;
730?? ??? ?return;
731?? ?}
732
733?? ?/*
734?? ? * SCHED_OTHER tasks have to update their load when changing their
735?? ? * weight
736?? ? */
737?? ?if (update_load && p->sched_class == &fair_sched_class) {
738?? ??? ?reweight_task(p, prio);
739?? ?} else {
740?? ??? ?load->weight = scale_load(sched_prio_to_weight[prio]);
741?? ??? ?load->inv_weight = sched_prio_to_wmult[prio];
742?? ??? ?p->se.runnable_weight = load->weight;
743?? ?}
744}

系统运维最新文章

配置小型公司网络WLAN基本业务（AC通过三层

如何用DWDM射频光纤技术实现200公里外的站点

国内顺畅下载k8s.gcr.io的镜像

自动化测试appium

ctfshow ssrf

Linux操作系统学习之实用指令（Centos7/8均

加:2022-07-03 11:10:24 更:2022-07-03 11:12:13

360图书馆购物三丰科技阅读网日历万年历 2025年7日历

-2025/7/19 12:19:42-

图片自动播放器
↓图片自动播放器↓

TxT小说阅读器
↓语音阅读,小说下载,古典文学↓

一键清除垃圾
↓轻轻一点,清除系统垃圾↓

图片批量下载器
↓批量下载图片,美女图库↓

网站联系: qq:121756557 email:121756557@qq.com IT数码