在阅读该篇文章的前提,最好对mknod 有一定的了解,前置知识点: Linux中mknod命令实现原理以及源码分析
open 原理
我们经常在一个进程中使用open() 来获取一个文件描述符fd,然后通过该fd去进行一些write() 、read() 操作。 open() 的原理是通过给定的文件路径/dev/hello ,从而找到该文件路径所对应的inode 信息,最后生成一个struct file 结构体,该结构体在进程的打开文件列表中,返回的fd信息就是这个打开文件列表中的下标索引,所以说fd永远不会小于0。
open 源码分析
现在我们看一下open() 的实现源码,这里说明一下,我们将打开的是一个/dev/hello 的字符驱动设备文件,以这个设备文件为例进行源代码解读。
SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
{
if (force_o_largefile())
flags |= O_LARGEFILE;
return do_sys_open(AT_FDCWD, filename, flags, mode);
}
从open() 的系统调用来看,最后由do_sys_open() 函数进行处理。
do_sys_open
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
struct open_how how = build_open_how(flags, mode);
return do_sys_openat2(dfd, filename, &how);
}
do_sys_open() 函数又由do_sys_openat2() 函数处理
do_sys_openat2
static long do_sys_openat2(int dfd, const char __user *filename,
struct open_how *how)
{
struct open_flags op;
int fd = build_open_flags(how, &op);
struct filename *tmp;
if (fd)
return fd;
tmp = getname(filename);
if (IS_ERR(tmp))
return PTR_ERR(tmp);
fd = get_unused_fd_flags(how->flags);
if (fd >= 0) {
struct file *f = do_filp_open(dfd, tmp, &op);
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = PTR_ERR(f);
} else {
fsnotify_open(f);
fd_install(fd, f);
}
}
putname(tmp);
return fd;
}
do_sys_openat2() 函数为主要的处理函数,该函数的主要的处理步骤:
fd = get_unused_fd_flags(how->flags); 获取一个该进程未使用的fd struct file *f = do_filp_open(dfd, tmp, &op); 生成一个struct file fd_install(fd, f); 将fd 与struct file 进行绑定return fd; 最终将fd返回给调用者
这其中生成struct file 内容的函数为do_filp_open() ,因此我们需要继续对该函数分析。
do_filp_open
struct file *do_filp_open(int dfd, struct filename *pathname,
const struct open_flags *op)
{
struct nameidata nd;
int flags = op->lookup_flags;
struct file *filp;
set_nameidata(&nd, dfd, pathname, NULL);
filp = path_openat(&nd, op, flags | LOOKUP_RCU);
if (unlikely(filp == ERR_PTR(-ECHILD)))
filp = path_openat(&nd, op, flags);
if (unlikely(filp == ERR_PTR(-ESTALE)))
filp = path_openat(&nd, op, flags | LOOKUP_REVAL);
restore_nameidata();
return filp;
}
由do_filp_open() 函数调用到path_openat() 函数。
path_openat
static struct file *path_openat(struct nameidata *nd,
const struct open_flags *op, unsigned flags)
{
struct file *file;
int error;
file = alloc_empty_file(op->open_flag, current_cred());
if (IS_ERR(file))
return file;
if (unlikely(file->f_flags & __O_TMPFILE)) {
error = do_tmpfile(nd, flags, op, file);
} else if (unlikely(file->f_flags & O_PATH)) {
error = do_o_path(nd, flags, file);
} else {
const char *s = path_init(nd, flags);
while (!(error = link_path_walk(s, nd)) &&
(s = open_last_lookups(nd, file, op)) != NULL)
;
if (!error)
error = do_open(nd, file, op);
terminate_walk(nd);
}
if (likely(!error)) {
if (likely(file->f_mode & FMODE_OPENED))
return file;
WARN_ON(1);
error = -EINVAL;
}
fput(file);
if (error == -EOPENSTALE) {
if (flags & LOOKUP_RCU)
error = -ECHILD;
else
error = -ESTALE;
}
return ERR_PTR(error);
}
path_openat() 该函数通过file = alloc_empty_file(op->open_flag, current_cred()); 为该进程生成了一个struct file ,最后由do_open() 函数完成剩下的打开动作。
do_open
static int do_open(struct nameidata *nd,
struct file *file, const struct open_flags *op)
{
struct user_namespace *mnt_userns;
int open_flag = op->open_flag;
bool do_truncate;
int acc_mode;
int error;
if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
error = complete_walk(nd);
if (error)
return error;
}
if (!(file->f_mode & FMODE_CREATED))
audit_inode(nd->name, nd->path.dentry, 0);
mnt_userns = mnt_user_ns(nd->path.mnt);
if (open_flag & O_CREAT) {
if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED))
return -EEXIST;
if (d_is_dir(nd->path.dentry))
return -EISDIR;
error = may_create_in_sticky(mnt_userns, nd,
d_backing_inode(nd->path.dentry));
if (unlikely(error))
return error;
}
if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
return -ENOTDIR;
do_truncate = false;
acc_mode = op->acc_mode;
if (file->f_mode & FMODE_CREATED) {
open_flag &= ~O_TRUNC;
acc_mode = 0;
} else if (d_is_reg(nd->path.dentry) && open_flag & O_TRUNC) {
error = mnt_want_write(nd->path.mnt);
if (error)
return error;
do_truncate = true;
}
error = may_open(mnt_userns, &nd->path, acc_mode, open_flag);
if (!error && !(file->f_mode & FMODE_OPENED))
error = vfs_open(&nd->path, file);
if (!error)
error = ima_file_check(file, op->acc_mode);
if (!error && do_truncate)
error = handle_truncate(mnt_userns, file);
if (unlikely(error > 0)) {
WARN_ON(1);
error = -EINVAL;
}
if (do_truncate)
mnt_drop_write(nd->path.mnt);
return error;
}
do_open() 通过调用vfs_open() 函数完成。
vfs_open
int vfs_open(const struct path *path, struct file *file)
{
file->f_path = *path;
return do_dentry_open(file, d_backing_inode(path->dentry), NULL);
}
再调用do_dentry_open() 函数。
do_dentry_open
static int do_dentry_open(struct file *f,
struct inode *inode,
int (*open)(struct inode *, struct file *))
{
static const struct file_operations empty_fops = {};
int error;
path_get(&f->f_path);
f->f_inode = inode;
f->f_mapping = inode->i_mapping;
f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
f->f_sb_err = file_sample_sb_err(f);
if (unlikely(f->f_flags & O_PATH)) {
f->f_mode = FMODE_PATH | FMODE_OPENED;
f->f_op = &empty_fops;
return 0;
}
if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
error = get_write_access(inode);
if (unlikely(error))
goto cleanup_file;
error = __mnt_want_write(f->f_path.mnt);
if (unlikely(error)) {
put_write_access(inode);
goto cleanup_file;
}
f->f_mode |= FMODE_WRITER;
}
if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
f->f_mode |= FMODE_ATOMIC_POS;
f->f_op = fops_get(inode->i_fop);
if (WARN_ON(!f->f_op)) {
error = -ENODEV;
goto cleanup_all;
}
error = security_file_open(f);
if (error)
goto cleanup_all;
error = break_lease(locks_inode(f), f->f_flags);
if (error)
goto cleanup_all;
f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
if (!open)
open = f->f_op->open;
if (open) {
error = open(inode, f);
if (error)
goto cleanup_all;
}
f->f_mode |= FMODE_OPENED;
if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_inc(inode);
if ((f->f_mode & FMODE_READ) &&
likely(f->f_op->read || f->f_op->read_iter))
f->f_mode |= FMODE_CAN_READ;
if ((f->f_mode & FMODE_WRITE) &&
likely(f->f_op->write || f->f_op->write_iter))
f->f_mode |= FMODE_CAN_WRITE;
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
if (f->f_flags & O_DIRECT) {
if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
return -EINVAL;
}
if (f->f_mode & FMODE_WRITE) {
smp_mb();
if (filemap_nr_thps(inode->i_mapping)) {
struct address_space *mapping = inode->i_mapping;
filemap_invalidate_lock(inode->i_mapping);
unmap_mapping_range(mapping, 0, 0, 0);
truncate_inode_pages(mapping, 0);
filemap_invalidate_unlock(inode->i_mapping);
}
}
return 0;
cleanup_all:
if (WARN_ON_ONCE(error > 0))
error = -EINVAL;
fops_put(f->f_op);
if (f->f_mode & FMODE_WRITER) {
put_write_access(inode);
__mnt_drop_write(f->f_path.mnt);
}
cleanup_file:
path_put(&f->f_path);
f->f_path.mnt = NULL;
f->f_path.dentry = NULL;
f->f_inode = NULL;
return error;
}
do_dentry_open() 函数首先对于为进程生成的struct file 进行赋值,与该路径(/dev/hello)的inode进行绑定,除此之外,最终要的是对于struct file 的f_op 进行赋值,该f_op 的赋值会导致最终访问该文件时进行的write() 、read() 的行为,这个是重中之重。
f->f_op = fops_get(inode->i_fop);
struct file 中的f_op 的是直接赋值为inode->i_fop ,inode 则为/dev/hello 设备文件的inode ,此inode->i_fop 是def_chr_fops ,因此此时struct file 中的f_op 的值为def_chr_fops 。
接着往下看:
if (!open)
open = f->f_op->open;
if (open) {
error = open(inode, f);
if (error)
goto cleanup_all;
}
此时open = f->f_op->open ,也就是open = chrdev_open ,然后执行了chrdev_open() 函数。
chrdev_open
static int chrdev_open(struct inode *inode, struct file *filp)
{
const struct file_operations *fops;
struct cdev *p;
struct cdev *new = NULL;
int ret = 0;
spin_lock(&cdev_lock);
p = inode->i_cdev;
if (!p) {
struct kobject *kobj;
int idx;
spin_unlock(&cdev_lock);
kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
if (!kobj)
return -ENXIO;
new = container_of(kobj, struct cdev, kobj);
spin_lock(&cdev_lock);
p = inode->i_cdev;
if (!p) {
inode->i_cdev = p = new;
list_add(&inode->i_devices, &p->list);
new = NULL;
} else if (!cdev_get(p))
ret = -ENXIO;
} else if (!cdev_get(p))
ret = -ENXIO;
spin_unlock(&cdev_lock);
cdev_put(new);
if (ret)
return ret;
ret = -ENXIO;
fops = fops_get(p->ops);
if (!fops)
goto out_cdev_put;
replace_fops(filp, fops);
if (filp->f_op->open) {
ret = filp->f_op->open(inode, filp);
if (ret)
goto out_cdev_put;
}
return 0;
out_cdev_put:
cdev_put(p);
return ret;
}
由上篇mknod文章可以知道,inode->i_cdev保存的是对应驱动的设备号,通过
if (!p) {
struct kobject *kobj;
int idx;
spin_unlock(&cdev_lock);
kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
if (!kobj)
return -ENXIO;
new = container_of(kobj, struct cdev, kobj);
spin_lock(&cdev_lock);
p = inode->i_cdev;
if (!p) {
inode->i_cdev = p = new;
list_add(&inode->i_devices, &p->list);
new = NULL;
} else if (!cdev_get(p))
ret = -ENXIO;
}
根据设备号,可以找到对应的驱动设备,也就是inode->i_cdev ,这个驱动设备是由设备驱动初始化时生成的。
fops = fops_get(p->ops);
if (!fops)
goto out_cdev_put;
replace_fops(filp, fops);
紧接着将struct file 中的f_ops 替换为具体的设备的ops ,通过这种方式,struct file 的f_ops 值就为/dev/hello 设备文件所对应的设备驱动中设定的struct file_operations 值,等到后面的操作write() 、read() 就直接调用到设备驱动的回调函数了。
4. 总结
到这里对于Linux中open命令实现原理以及源码分析已经结束。
通过open() 系统调用打开/dev/hello 字符设备,最终将生成struct file ,同时将struct file 的f_ops 赋值为/dev/hello 字符设备所对应驱动的struct file_operations 。
谢谢大家浏览,本博客为博主原创,未经允许不可转载。
|