ext4文件系统挂载
大家可以使用以下命令挂载一个u盘到 /mnt目录下:
mount -t ext4 /dev/sda1 /mnt
其中mount这个应用程序就是使用了mount函数进行系统调用,其系统调用为:
SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
char __user *, type, unsigned long, flags, void __user *, data)
{
return ksys_mount(dev_name, dir_name, type, flags, data);
}
ksys_mount函数参数含义: dev_name:设备名字 dir_name:挂载目录 type:文件系统类型 flags:挂载标志位 data:挂载选项
ksys_mount函数分析,文件在fs/namespace.c中:
int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type,
unsigned long flags, void __user *data)
{
int ret;
char *kernel_type;
char *kernel_dev;
void *options;
kernel_type = copy_mount_string(type);
ret = PTR_ERR(kernel_type);
if (IS_ERR(kernel_type))
goto out_type;
kernel_dev = copy_mount_string(dev_name);
ret = PTR_ERR(kernel_dev);
if (IS_ERR(kernel_dev))
goto out_dev;
options = copy_mount_options(data);
ret = PTR_ERR(options);
if (IS_ERR(options))
goto out_data;
ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
kfree(options);
out_data:
kfree(kernel_dev);
out_dev:
kfree(kernel_type);
out_type:
return ret;
}
ksys_mount一开始从用户态复制各种信息到内核,然后调用do_mount来执行真正挂载操作:
long do_mount(const char *dev_name, const char __user *dir_name,
const char *type_page, unsigned long flags, void *data_page)
{
struct path path;
unsigned int mnt_flags = 0, sb_flags;
int retval = 0;
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
flags &= ~MS_MGC_MSK;
if (data_page)
((char *)data_page)[PAGE_SIZE - 1] = 0;
if (flags & MS_NOUSER)
return -EINVAL;
retval = user_path(dir_name, &path);
if (retval)
return retval;
retval = security_sb_mount(dev_name, &path,
type_page, flags, data_page);
if (!retval && !may_mount())
retval = -EPERM;
if (!retval && (flags & SB_MANDLOCK) && !may_mandlock())
retval = -EPERM;
if (retval)
goto dput_out;
if (!(flags & MS_NOATIME))
mnt_flags |= MNT_RELATIME;
if (flags & MS_NOSUID)
mnt_flags |= MNT_NOSUID;
if (flags & MS_NODEV)
mnt_flags |= MNT_NODEV;
if (flags & MS_NOEXEC)
mnt_flags |= MNT_NOEXEC;
if (flags & MS_NOATIME)
mnt_flags |= MNT_NOATIME;
if (flags & MS_NODIRATIME)
mnt_flags |= MNT_NODIRATIME;
if (flags & MS_STRICTATIME)
mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
if ((flags & MS_REMOUNT) &&
((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
MS_STRICTATIME)) == 0)) {
mnt_flags &= ~MNT_ATIME_MASK;
mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
}
sb_flags = flags & (SB_RDONLY |
SB_SYNCHRONOUS |
SB_MANDLOCK |
SB_DIRSYNC |
SB_SILENT |
SB_POSIXACL |
SB_LAZYTIME |
SB_I_VERSION);
if (flags & MS_REMOUNT)
retval = do_remount(&path, flags, sb_flags, mnt_flags,
data_page);
else if (flags & MS_BIND)
retval = do_loopback(&path, dev_name, flags & MS_REC);
else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
retval = do_change_type(&path, flags);
else if (flags & MS_MOVE)
retval = do_move_mount(&path, dev_name);
else
retval = do_new_mount(&path, type_page, sb_flags, mnt_flags,
dev_name, data_page);
dput_out:
path_put(&path);
return retval;
}
do_mount大部分是做安全相关的工作,然后根据flags参数来判断需要重新挂载、绑定挂载、修改挂载文件系统类型、移动挂载路径、还是我们最常用的普通挂载,普通挂载的操作函数是do_new_mount:
static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
int mnt_flags, const char *name, void *data)
{
struct file_system_type *type;
struct vfsmount *mnt;
int err;
if (!fstype)
return -EINVAL;
type = get_fs_type(fstype);
if (!type)
return -ENODEV;
mnt = vfs_kern_mount(type, sb_flags, name, data);
if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
!mnt->mnt_sb->s_subtype)
mnt = fs_set_subtype(mnt, fstype);
put_filesystem(type);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
if (mount_too_revealing(mnt, &mnt_flags)) {
mntput(mnt);
return -EPERM;
}
err = do_add_mount(real_mount(mnt), path, mnt_flags);
if (err)
mntput(mnt);
return err;
}
do_new_mount挂载函数首先根据文件系统名字查找文件系统类型,然后使用vfs_kern_mount构建一个完整的mount结构体,并且在mount_too_revealing函数中确认mount结构体的命名空间的访问权限,最后通过do_add_mount来添加到中。以下几个函数都是比较重要的:
- vfs_kern_mount
- mount_too_revealing
- do_add_mount
我们下面先分析第一个函数vfs_kern_mount:
struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
struct mount *mnt;
struct dentry *root;
if (!type)
return ERR_PTR(-ENODEV);
mnt = alloc_vfsmnt(name);
if (!mnt)
return ERR_PTR(-ENOMEM);
if (flags & SB_KERNMOUNT)
mnt->mnt.mnt_flags = MNT_INTERNAL;
root = mount_fs(type, flags, name, data);
if (IS_ERR(root)) {
mnt_free_id(mnt);
free_vfsmnt(mnt);
return ERR_CAST(root);
}
mnt->mnt.mnt_root = root;
mnt->mnt.mnt_sb = root->d_sb;
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
mnt->mnt_parent = mnt;
lock_mount_hash();
list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
unlock_mount_hash();
return &mnt->mnt;
}
vfs_kern_mount首先会配一个新的struct mount结构体,并初始化里面成员内容,然后通过mount_fs函数回调ext4这个文件系统的mount回调函数,这个回调函数在上一节说的很清楚可以回去看看,最后配置struct mount结构体参数,并且把挂载描述符添加到超级块的挂载实例链表中后返回。mount_fs函数在文件fs/super.c中:
struct dentry *
mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
{
struct dentry *root;
struct super_block *sb;
char *secdata = NULL;
int error = -ENOMEM;
if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
secdata = alloc_secdata();
if (!secdata)
goto out;
error = security_sb_copy_data(data, secdata);
if (error)
goto out_free_secdata;
}
root = type->mount(type, flags, name, data);
if (IS_ERR(root)) {
error = PTR_ERR(root);
goto out_free_secdata;
}
sb = root->d_sb;
BUG_ON(!sb);
WARN_ON(!sb->s_bdi);
smp_wmb();
sb->s_flags |= SB_BORN;
error = security_sb_kern_mount(sb, flags, secdata);
if (error)
goto out_sb;
WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
"negative value (%lld)\n", type->name, sb->s_maxbytes);
up_write(&sb->s_umount);
free_secdata(secdata);
return root;
out_sb:
dput(root);
deactivate_locked_super(sb);
out_free_secdata:
free_secdata(secdata);
out:
return ERR_PTR(error);
}
可以看到mount_fs函数首先根据超级快是否为二进制挂载数据来判断烟不要申请一块安全数据区存放数据,然后就回到file_system_type的mount函数,这个函数在上一章说得很详细,感兴趣的可以去看一下,之后就是smp屏障保证指令运行完毕,最后是一些安全相关的超级快挂载。 我们下面分析第二个函数mount_too_revealing,在fs/namespace.c文件中:
static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
{
const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
struct mnt_namespace *ns = current->nsproxy->mnt_ns;
unsigned long s_iflags;
if (ns->user_ns == &init_user_ns)
return false;
s_iflags = mnt->mnt_sb->s_iflags;
if (!(s_iflags & SB_I_USERNS_VISIBLE))
return false;
if ((s_iflags & required_iflags) != required_iflags) {
WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
required_iflags);
return true;
}
return !mnt_already_visible(ns, mnt, new_mnt_flags);
}
mount_too_revealing首先判断超级快的访问权限,如果超级块都不可以访问,则mount肯定不可以访问,直接返回失败;然后查看超级块有没有忽略物理设备或者忽略可执行文件的标志,有则直接返回成功;最后使用mnt_already_visible判断vfsmount 是否真的可以访问:
static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
int *new_mnt_flags)
{
int new_flags = *new_mnt_flags;
struct mount *mnt;
bool visible = false;
down_read(&namespace_sem);
list_for_each_entry(mnt, &ns->list, mnt_list) {
struct mount *child;
int mnt_flags;
if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type)
continue;
if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
continue;
mnt_flags = mnt->mnt.mnt_flags;
if (sb_rdonly(mnt->mnt.mnt_sb))
mnt_flags |= MNT_LOCK_READONLY;
if ((mnt_flags & MNT_LOCK_READONLY) &&
!(new_flags & MNT_READONLY))
continue;
if ((mnt_flags & MNT_LOCK_ATIME) &&
((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
continue;
list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
struct inode *inode = child->mnt_mountpoint->d_inode;
if (!(child->mnt.mnt_flags & MNT_LOCKED))
continue;
if (!is_empty_dir_inode(inode))
goto next;
}
*new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
MNT_LOCK_ATIME);
visible = true;
goto found;
next: ;
}
found:
up_read(&namespace_sem);
return visible;
}
mnt_already_visible主要遍历命名空间下的所有挂载结构体,寻找一个可以全部可见的mount结构体,其实只要找到一个就可以返回成功了。所以,他在遍历中首先查看mount结构体的根目录是不是超级快的根目录,不是则跳过;然后验证挂载标志的权限,权限不够的页跳过;再遍历其子挂载结构体,确定其子挂载结构体全部都不是空目录,就可以确定这个是可见的。这样子,新的mount结构体就可以挂载到这个命名空间的挂载树上了。mnt_already_visible的主要作用是判断新挂载的目录下是否有一个已经被挂载的目录,有的话需要进行处理,将其隐藏不可见。下面的函数,也就是第三个函数do_add_mount,他把新的mount结构体就可以挂载到这个命名空间的挂载树上的操作函数了:
static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
{
struct mountpoint *mp;
struct mount *parent;
int err;
mnt_flags &= ~MNT_INTERNAL_FLAGS;
mp = lock_mount(path);
if (IS_ERR(mp))
return PTR_ERR(mp);
parent = real_mount(path->mnt);
err = -EINVAL;
if (unlikely(!check_mnt(parent))) {
if (!(mnt_flags & MNT_SHRINKABLE))
goto unlock;
if (!parent->mnt_ns)
goto unlock;
}
err = -EBUSY;
if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
path->mnt->mnt_root == path->dentry)
goto unlock;
err = -EINVAL;
if (d_is_symlink(newmnt->mnt.mnt_root))
goto unlock;
newmnt->mnt.mnt_flags = mnt_flags;
err = graft_tree(newmnt, parent, mp);
unlock:
unlock_mount(mp);
return err;
}
do_add_mount首先通过lock_mount上锁,并且在lock_mount寻找一个合适的挂载点,然后对于以下两种情况:1.在相同挂载点上挂载相同的文件系统;2.新文件系统的挂载实例的根inode是一个符号链接,这两种情况返回错误。最后通过graft_tree把newmnt加入到全局文件系统树中,并且在graft_tree处理了mount结构体、mount的父结构体、挂载点之间的关系,让他们两两指向。 接下来我们看看lock_mount函数:
static struct mountpoint *lock_mount(struct path *path)
{
struct vfsmount *mnt;
struct dentry *dentry = path->dentry;
retry:
inode_lock(dentry->d_inode);
if (unlikely(cant_mount(dentry))) {
inode_unlock(dentry->d_inode);
return ERR_PTR(-ENOENT);
}
namespace_lock();
mnt = lookup_mnt(path);
if (likely(!mnt)) {
struct mountpoint *mp = get_mountpoint(dentry);
if (IS_ERR(mp)) {
namespace_unlock();
inode_unlock(dentry->d_inode);
return mp;
}
return mp;
}
namespace_unlock();
inode_unlock(path->dentry->d_inode);
path_put(path);
path->mnt = mnt;
dentry = path->dentry = dget(mnt->mnt_root);
goto retry;
}
lock_mount函数首先找到挂载目录的dentry,然后判断该目录是否能被挂载,如果不可以则直接返回一个错误。之后通过lookup_mnt函数查找挂载在挂载点的路径path上的第一个子mount,如果这个mount为空则,通过path寻找下一个目录dentry,重复上面的操作,直到找到一个不为空的mount最后通过get_mountpoint得到一个挂载点,并且返回挂载点。get_mountpoint是怎么从dentry目录获取挂载点:
static struct mountpoint *get_mountpoint(struct dentry *dentry)
{
struct mountpoint *mp, *new = NULL;
int ret;
if (d_mountpoint(dentry)) {
if (d_unlinked(dentry))
return ERR_PTR(-ENOENT);
mountpoint:
read_seqlock_excl(&mount_lock);
mp = lookup_mountpoint(dentry);
read_sequnlock_excl(&mount_lock);
if (mp)
goto done;
}
if (!new)
new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
if (!new)
return ERR_PTR(-ENOMEM);
ret = d_set_mounted(dentry);
if (ret == -EBUSY)
goto mountpoint;
mp = ERR_PTR(ret);
if (ret)
goto done;
read_seqlock_excl(&mount_lock);
new->m_dentry = dentry;
new->m_count = 1;
hlist_add_head(&new->m_hash, mp_hash(dentry));
INIT_HLIST_HEAD(&new->m_list);
read_sequnlock_excl(&mount_lock);
mp = new;
new = NULL;
done:
kfree(new);
return mp;
}
get_mountpoint首先设置dentry为挂载点,然后使用lookup_mountpoint函数从mountpoint hash表中查找挂载点,找到了直接返回找到的挂载点,找不到说明mountpoint hash表中没有,需要分配一个,然后设置挂载点目录项的标志,并且通过ERR_PTR这个宏确定刚刚分配到的挂载点目录项可以使用,最后将新的挂载点加入到mountpoint hash表中,并且返回刚刚分配的挂载点。 挂载操作就到此结束了。
全部调用过程:
ksys_mount
↓
do_mount
↓
user_path
↓
do_new_mount
↓
vfs_kern_mount
alloc_vfsmnt
mount_fs
mount_too_revealing
mnt_already_visible
do_add_mount
lock_mount
real_mount
graft_tree
|