进程proc文件
proc/${pid}/下是每个进程的proc信息,/proc/self比较特殊,哪个进程打开,对应链接到哪个进程。
1)内存相关:
- status: 进程使用虚拟内存和物理内存(RSS)信息
- statm: 内存详细信息;
- maps: 段内核映射VMA以及加载动态库信息;(映射分为文件映射和匿名映射)
- maps_files: 映射文件信息
- smaps: 是maps的扩展,比maps更详细,展示内核每个VMA的信息,可以看堆的实际物理内存。
- oom_xxx: oom评分;
2)调度相关
3)文件IO
4)系统、状态相关
内核部分
进程相关的proc文件主要在proc/base.c定义。
在数组tid_base_stuff中定义线程的文件列表,tgid_base_stuff数组中定义进程的列表。
每一项代表一个proc文件,如果是目录需要定义一个一个inode_operations和一个file_operation。
static const struct pid_entry tid_base_stuff[] = {
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
#ifdef CONFIG_NET
DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
#endif
REG("environ", S_IRUSR, proc_environ_operations),
REG("auxv", S_IRUSR, proc_auxv_operations),
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUSR, proc_pid_personality),
ONE("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
NOD("comm", S_IFREG|S_IRUGO|S_IWUSR,
&proc_tid_comm_inode_operations,
&proc_pid_set_comm_operations, {}),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
ONE("syscall", S_IRUSR, proc_pid_syscall),
#endif
REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
ONE("stat", S_IRUGO, proc_tid_stat),
ONE("statm", S_IRUGO, proc_pid_statm),
REG("maps", S_IRUGO, proc_pid_maps_operations),
#ifdef CONFIG_PROC_CHILDREN
REG("children", S_IRUGO, proc_tid_children_operations),
#endif
#ifdef CONFIG_NUMA
REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
#endif
REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
LNK("cwd", proc_cwd_link),
LNK("root", proc_root_link),
LNK("exe", proc_exe_link),
REG("mounts", S_IRUGO, proc_mounts_operations),
REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
#endif
#ifdef CONFIG_KALLSYMS
ONE("wchan", S_IRUGO, proc_pid_wchan),
#endif
#ifdef CONFIG_STACKTRACE
ONE("stack", S_IRUSR, proc_pid_stack),
#endif
#ifdef CONFIG_SCHED_INFO
ONE("schedstat", S_IRUGO, proc_pid_schedstat),
#endif
#ifdef CONFIG_LATENCYTOP
REG("latency", S_IRUGO, proc_lstats_operations),
#endif
#ifdef CONFIG_PROC_PID_CPUSET
ONE("cpuset", S_IRUGO, proc_cpuset_show),
#endif
#ifdef CONFIG_CGROUPS
ONE("cgroup", S_IRUGO, proc_cgroup_show),
#endif
ONE("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
REG("fail-nth", 0644, proc_fail_nth_operations),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
ONE("io", S_IRUSR, proc_tid_io_accounting),
#endif
#ifdef CONFIG_USER_NS
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
#ifdef CONFIG_LIVEPATCH
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
#endif
};
从proc跟文件的初始化跟踪,这些文件是如何初始化的。
proc根文件的初始化函数: <proc/root.c>
void __init proc_root_init(void)
{
proc_init_kmemcache();
set_proc_pid_nlink();
proc_self_init();
proc_thread_self_init();
proc_symlink("mounts", NULL, "self/mounts");
proc_net_init();
proc_mkdir("fs", NULL);
proc_mkdir("driver", NULL);
proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */
#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
/* just give it a mountpoint */
proc_create_mount_point("openprom");
#endif
proc_tty_init();
proc_mkdir("bus", NULL);
proc_sys_init();
register_filesystem(&proc_fs_type);
}
根目录的proc_root:
struct proc_dir_entry proc_root = {
.low_ino = PROC_ROOT_INO,
.namelen = 5,
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
.nlink = 2,
.refcnt = REFCOUNT_INIT(1),
.proc_iops = &proc_root_inode_operations,
.proc_fops = &proc_root_operations,
.parent = &proc_root,
.subdir = RB_ROOT,
.name = "/proc",
};
inode的lookup函数对应proc_root_lookup,查找该目录下的inode列表:
static const struct inode_operations proc_root_inode_operations = {
.lookup = proc_root_lookup,
.getattr = proc_root_getattr,
};
proc_root_lookup中会创建各pid对应的目录,调用关系如下:
proc_root_lookup
-> proc_pid_lookup
->-> proc_pid_instantiate
proc_pid_instantiate创建pid对应的proc目录,指定该目录的inode_operations和file_operations为proc_tgid_base_inode_operations和proc_tgid_base_operations。
static struct dentry *proc_pid_instantiate(struct dentry * dentry,
struct task_struct *task, const void *ptr)
{
struct inode *inode;
inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
if (!inode)
return ERR_PTR(-ENOENT);
inode->i_op = &proc_tgid_base_inode_operations;
inode->i_fop = &proc_tgid_base_operations;
inode->i_flags|=S_IMMUTABLE;
set_nlink(inode, nlink_tgid);
pid_update_inode(task, inode);
d_set_d_op(dentry, &pid_dentry_operations);
return d_splice_alias(inode, dentry);
}
proc_tgid_base_inode_operations的lookup函数和proc_tgid_base_operations的iterate_shared 都会遍历数组tgid_base_stuff。
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
{
return proc_pident_readdir(file, ctx,
tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
}
static const struct file_operations proc_tgid_base_operations = {
.read = generic_read_dir,
.iterate_shared = proc_tgid_base_readdir,
.llseek = generic_file_llseek,
};
static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
return proc_pident_lookup(dir, dentry,
tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
}
重点文件的实现
内存
包括匿名映射,和文件映射,内核对应VMA结构,每个VMA的起始地址和结束地址。
56018dfba000-56018e0c2000 r-xp 00000000 08:01 2359769 /usr/bin/bash
56018e2c1000-56018e2c5000 r--p 00107000 08:01 2359769 /usr/bin/bash
56018e2c5000-56018e2ce000 rw-p 0010b000 08:01 2359769 /usr/bin/bash
56018e2ce000-56018e2d8000 rw-p 00000000 00:00 0
56018efcc000-56018f027000 rw-p 00000000 00:00 0 [heap]
7f93f9542000-7f93f954d000 r-xp 00000000 08:01 2362059 /usr/lib64/libnss_files-2.28.so
......
7f93fa174000-7f93fa175000 rw-p 00000000 00:00 0
7fffc82fa000-7fffc831b000 rw-p 00000000 00:00 0 [stack]
7fffc83a1000-7fffc83a3000 r--p 00000000 00:00 0 [vvar]
7fffc83a3000-7fffc83a5000 r-xp 00000000 00:00 0 [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]
内核文件fs/proc/task_mmu.c
此目录有每个虚拟段对应使用的物理地址RSS和PSS信息。
7fffc83a3000-7fffc83a5000 r-xp 00000000 00:00 0 [vdso]
Size: 8 kB
Rss: 4 kB
Pss: 0 kB
Shared_Clean: 4 kB
Shared_Dirty: 0 kB
Private_Clean: 0 kB
Private_Dirty: 0 kB
Referenced: 4 kB
Anonymous: 0 kB
AnonHugePages: 0 kB
ShmemPmdMapped: 0 kB
Shared_Hugetlb: 0 kB
Private_Hugetlb: 0 kB
Swap: 0 kB
SwapPss: 0 kB
KernelPageSize: 4 kB
MMUPageSize: 4 kB
Locked: 0 kB
VmFlags: rd ex mr mw me de
[root@a82dc075718f self]
3031 824 681 264 0 165 0
内存信息, 分别表示含义:
int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
unsigned long size = 0, resident = 0, shared = 0, text = 0, data = 0;
struct mm_struct *mm = get_task_mm(task);
if (mm) {
size = task_statm(mm, &shared, &text, &data, &resident);
mmput(mm);
}
seq_put_decimal_ull(m, "", size);
seq_put_decimal_ull(m, " ", resident);
seq_put_decimal_ull(m, " ", shared);
seq_put_decimal_ull(m, " ", text);
seq_put_decimal_ull(m, " ", 0);
seq_put_decimal_ull(m, " ", data);
seq_put_decimal_ull(m, " ", 0);
seq_putc(m, '\n');
return 0;
}
size: 虚拟内存;
resident:物理内存;
share: 共享内存,so等;
text: 代码段;
data: 数据段和栈。
[root@a82dc075718f self]# cat status
Name: bash
Umask: 0022
State: S (sleeping)
Tgid: 1
Ngid: 0
Pid: 1
PPid: 0
TracerPid: 0
Uid: 0 0 0 0
Gid: 0 0 0 0
FDSize: 256
Groups:
NStgid: 1
NSpid: 1
NSpgid: 1
NSsid: 1
VmPeak: 12160 kB
VmSize: 12124 kB
VmLck: 0 kB
VmPin: 0 kB
VmHWM: 3296 kB
VmRSS: 3296 kB
RssAnon: 572 kB
RssFile: 2724 kB
RssShmem: 0 kB
VmData: 528 kB
VmStk: 132 kB
VmExe: 1056 kB
VmLib: 2156 kB
VmPTE: 52 kB
VmPMD: 12 kB
VmSwap: 0 kB
HugetlbPages: 0 kB
Threads: 1
SigQ: 0/7864
SigPnd: 0000000000000000
ShdPnd: 0000000000000000
SigBlk: 0000000000010000
SigIgn: 0000000000380004
SigCgt: 000000004b817efb
CapInh: 00000000a80425fb
CapPrm: 00000000a80425fb
CapEff: 00000000a80425fb
CapBnd: 00000000a80425fb
CapAmb: 0000000000000000
Seccomp: 2
Speculation_Store_Bypass: thread force mitigated
Cpus_allowed: 00000000,00000000,00000000,00000003
Cpus_allowed_list: 0-1
Mems_allowed: 1
Mems_allowed_list: 0
voluntary_ctxt_switches: 1116
nonvoluntary_ctxt_switches: 76
VmSize: 虚拟内存总数;
VmRSS:物理内存,共享内存没有在多个进程之间平均。另外有一个PSS,是多进程平分共享内存的占用。
RssAnon:匿名映射大小;
RssFile: 文件映射大小。
调度
调用栈信息,可以一定程度跟踪死锁问题,用户态栈空间。
调度信息,以及优先级。
[root@a82dc075718f self]# cat sched
bash (3996, #threads: 1)
-------------------------------------------------------------------
se.exec_start : 15948317.402577
se.vruntime : 919.743657
se.sum_exec_runtime : 248.854012
se.nr_migrations : 301
nr_switches : 771
nr_voluntary_switches : 724
nr_involuntary_switches : 47
se.load.weight : 1048576
se.avg.load_sum : 313658
se.avg.util_sum : 311654
se.avg.load_avg : 6
se.avg.util_avg : 6
se.avg.last_update_time : 15948317402577
policy : 0
prio : 120
clock-delta : 204
3个值分别是
sum_exec_runtime: 总共执行的时间;
run_delay:在run_queue中等待调度的总时长;
pcount: 进程调度到该进程的次数。
schedstat文件读取schedinfo信息,需要打开CONFIG_SCHED_INFO宏
#ifdef CONFIG_SCHED_INFO
/*
* Provides /proc/PID/schedstat
*/
static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
if (unlikely(!sched_info_on()))
seq_printf(m, "0 0 0\n");
else
seq_printf(m, "%llu %llu %lu\n",
(unsigned long long)task->se.sum_exec_runtime,
(unsigned long long)task->sched_info.run_delay,
task->sched_info.pcount);
return 0;
}
#endif
sched_info数据结构
struct sched_info {
#ifdef CONFIG_SCHED_INFO
/* Cumulative counters: */
/* # of times we have run on this CPU: */
unsigned long pcount;
/* Time spent waiting on a runqueue: */
unsigned long long run_delay;
/* Timestamps: */
/* When did we last run on a CPU? */
unsigned long long last_arrival;
/* When were we last queued to run? */
unsigned long long last_queued;
#endif /* CONFIG_SCHED_INFO */
};
pcount: 调度到该进程的次数。
run_delay: 在队列中等待的总时间。
last_arrival: 上一次执行的时间;
last_queued: 上一次进入队列的时机。
sched info只打印了run_delay和pcount。
这个打印太多,无法阅读,对着函数**do_task_stat<proc/arry.c>**解释即可。有调度信息,也有内存段信息,很杂乱。
cat /proc/2406/wchan
futex_wait_queue_meadmin
进程睡眠时的kernel 当前运行的函数。
static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
unsigned long wchan;
char symname[KSYM_NAME_LEN];
if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
goto print0;
wchan = get_wchan(task);
if (wchan && !lookup_symbol_name(wchan, symname)) {
seq_puts(m, symname);
return 0;
}
print0:
seq_putc(m, '0');
return 0;
}
#endif
io
读写文件的字符,次数,实际从磁盘读写的字节数。
测试命令:
dd if=/dev/zero of=/tmp/test.dat &
test:/tmp
rchar: 323934931 // 读出的总字节数,read或者pread()中的长度参数总和(pagecache中统计而来,不代表实际磁盘的读入)
wchar: 323929600 // 写入的总字节数,write或者pwrite中的长度参数总和
syscr: 632687 // read()或者pread()总的调用次数
syscw: 632675 // write()或者pwrite()总的调用次数
read_bytes: 0 // 实际从磁盘中读取的字节总数 (这里if=/dev/zero 所以没有实际的读入字节数)
write_bytes: 323932160 // 实际写入到磁盘中的字节总数
cancelled_write_bytes: 0 // 由于截断pagecache导致应该发生而没有发生的写入字节数(可能为负数)
打开io统计,要开内核CONFIG_TASK_IO_ACCOUNTING, CONFIG_TASK_XACCT宏。
#ifdef CONFIG_TASK_IO_ACCOUNTING
ONE("io", S_IRUSR, proc_tid_io_accounting),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole)
{
struct task_io_accounting acct = task->ioac;
unsigned long flags;
int result;
result = mutex_lock_killable(&task->signal->cred_guard_mutex);
if (result)
return result;
if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
result = -EACCES;
goto out_unlock;
}
if (whole && lock_task_sighand(task, &flags)) {
struct task_struct *t = task;
task_io_accounting_add(&acct, &task->signal->ioac);
while_each_thread(task, t)
task_io_accounting_add(&acct, &t->ioac);
unlock_task_sighand(task, &flags);
}
seq_printf(m,
"rchar: %llu\n"
"wchar: %llu\n"
"syscr: %llu\n"
"syscw: %llu\n"
"read_bytes: %llu\n"
"write_bytes: %llu\n"
"cancelled_write_bytes: %llu\n",
(unsigned long long)acct.rchar,
(unsigned long long)acct.wchar,
(unsigned long long)acct.syscr,
(unsigned long long)acct.syscw,
(unsigned long long)acct.read_bytes,
(unsigned long long)acct.write_bytes,
(unsigned long long)acct.cancelled_write_bytes);
result = 0;
out_unlock:
mutex_unlock(&task->signal->cred_guard_mutex);
return result;
}
task_struct 有io统计的成员。
struct task_struct {
......
struct task_io_accounting ioac;
}
struct task_io_accounting {
#ifdef CONFIG_TASK_XACCT
u64 rchar;
u64 wchar;
u64 syscr;
u64 syscw;
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
u64 read_bytes;
u64 write_bytes;
u64 cancelled_write_bytes;
#endif
};
[root@a82dc075718f fd]
total 0
lrwx------ 1 root root 64 Mar 12 03:39 0 -> /dev/pts/0
lrwx------ 1 root root 64 Mar 12 03:39 1 -> /dev/pts/0
lrwx------ 1 root root 64 Mar 12 03:39 2 -> /dev/pts/0
lrwx------ 1 root root 64 Mar 12 07:40 255 -> /dev/pts/0
进程的限制参数, 使用ulimit可以修改限制参数,如ulimit -c unlimit,设置coredump文件限制大小。
cat limits
Limit Soft Limit Hard Limit Units
Max cpu time unlimited unlimited seconds
Max file size unlimited unlimited bytes
Max data size unlimited unlimited bytes
Max stack size 8388608 unlimited bytes
Max core file size 0 unlimited bytes
Max resident set unlimited unlimited bytes
Max processes unlimited unlimited processes
Max open files 1048576 1048576 files
Max locked memory 83968000 83968000 bytes
Max address space unlimited unlimited bytes
Max file locks unlimited unlimited locks
Max pending signals 7864 7864 signals
Max msgqueue size 819200 819200 bytes
Max nice priority 0 0
Max realtime priority 0 0
Max realtime timeout unlimited unlimited us
参考资料
https://www.cnblogs.com/aspirs/p/13896571.html
|