Linux open系統呼叫流程(2)
1. 書結上文,繼續分析do_filp_open函式,其傳入4個引數:
dfd:相對目錄
tmp:檔案路徑名,例如要開啟/usr/src/kernels/linux-2.6.30
flags:開啟標誌
mode:開啟模式
/* * Note that while the flag value (low two bits) for sys_open means: * 00 - read-only * 01 - write-only * 10 - read-write * 11 - special * it is changed into * 00 - no permissions needed * 01 - read-permission * 10 - write-permission * 11 - read-write * for the internal routines (ie open_namei()/follow_link() etc). 00 is * used by symlinks. */ static struct file *do_filp_open(int dfd, const char *filename, int flags, int mode) { int namei_flags, error; /*建立nameidata結構體,返回的安裝點物件和目錄項物件放在此結構體*/ struct nameidata nd; namei_flags = flags; if ((namei_flags+1) & O_ACCMODE) namei_flags++; /*根據上級的dentry物件得到新的dentry結構,並從中得到相關的inode節點號,再用iget函式分配新的inode結構,將新的dentry物件與inode物件關聯起來*/ error = open_namei(dfd, filename, namei_flags, mode, &nd); /*將nameidata結構體轉化為struct file檔案物件結構體*/ if (!error) return nameidata_to_filp(&nd, flags); return ERR_PTR(error); }
初看起來,寥寥幾行程式碼,貌似簡單。其實不然,一會就知道了。此函式呼叫了open_namei和nameidata_to_filp. 後一個函式通過名字就可以猜出來,是將nameidata結構轉化為filp,也就是利用nd結構賦值給檔案指標file,然後返回這個檔案指標。而open_namei肯定是填充nd結構體,具體功能可表述為: 根據上級目錄項物件,查詢下一級的目錄項物件,如果在目錄項快取找到下一級的目錄項物件,則直接返回,並填充nd的掛載點物件和目錄項物件。否則,構建一個子目錄項物件,並利用iget函式分配一個新的inode結構,將子目錄項物件和inode結構相關聯。這樣,一直迴圈到最後一下分量。最後返回的是最後一個分量的目錄項物件和掛載點物件。可以看到,在這兩個函式中,都利用了nameidata結構,具體看一下神奇的結構:
struct nameidata { struct dentry *dentry;/*當前目錄項物件*/ struct vfsmount *mnt;/*已安裝的檔案系統物件的地址*/ struct qstr last;/*路徑名最後一部分*/ unsigned int flags;/*查詢標誌*/ int last_type;/*路徑名最後一部分的型別*/ unsigned depth;/*當前符號連結的深度,一般小於6*/ char *saved_names[MAX_NESTED_LINKS + 1];/*關聯符號連結的路徑名陣列*/ /* Intent data */ union { struct open_intent open;/*想要開啟的檔案的聯合體*/ } intent; };
struct open_intent {
int flags;/*標誌*/
int create_mode;/*建立模式*/
struct file *file;/*檔案物件指標*/
};
open_intent檔案物件就是最後返回的檔案物件。
由於namidata_to_filp比較簡單,先看一下:
/**將nameidata相關項賦值給struct file物件
* nameidata_to_filp - convert a nameidata to an open filp.
* @nd: pointer to nameidata
* @flags: open flags
*
* Note that this function destroys the original nameidata
*/
struct file *nameidata_to_filp(struct nameidata *nd, int flags)
{
struct file *filp;
/* Pick up the filp from the open intent */
/*取得檔案指標*/
filp = nd->intent.open.file;
/* Has the filesystem initialised the file for us? */
/*檔案系統是否已經初始化了dentry*/
if (filp->f_path.dentry == NULL)
filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL);
else
path_release(nd);
return filp;
}
首先取得檔案物件指標,然後判斷檔案物件是否已經初始化,如果沒有初始化,就呼叫__dentry_open函式,對檔案物件進行初始化。
/*對struct file結構體賦值*/
static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
int flags, struct file *f,
int (*open)(struct inode *, struct file *))
{
struct inode *inode;
int error;
/*設定檔案開啟標誌*/
f->f_flags = flags;
f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK |
FMODE_PREAD | FMODE_PWRITE;
/*取得inode節點*/
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = get_write_access(inode);
if (error)
goto cleanup_file;
}
/*地址空間物件*/
f->f_mapping = inode->i_mapping;
/*目錄項物件*/
f->f_path.dentry = dentry;
/*掛載點物件*/
f->f_path.mnt = mnt;
/*檔案指標位置 */
f->f_pos = 0;
/*inode節點在初始化的時候已經賦值了i_fop,現在將檔案操作賦值給f_op*/
f->f_op = fops_get(inode->i_fop);
file_move(f, &inode->i_sb->s_files);
/*檔案open操作*/
if (!open && f->f_op)/*open為NULL*/
open = f->f_op->open;
/*普通檔案open為空,如果是裝置檔案,需要開啟*/
if (open) {
error = open(inode, f);
if (error)
goto cleanup_all;
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
/*預讀初始化*/
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
/* NB: we're sure to have correct a_ops only after f_op->open */
if (f->f_flags & O_DIRECT) {
if (!f->f_mapping->a_ops ||
((!f->f_mapping->a_ops->direct_IO) &&
(!f->f_mapping->a_ops->get_xip_page))) {
fput(f);
f = ERR_PTR(-EINVAL);
}
}
return f;
cleanup_all:
fops_put(f->f_op);
if (f->f_mode & FMODE_WRITE)
put_write_access(inode);
file_kill(f);
f->f_path.dentry = NULL;
f->f_path.mnt = NULL;
cleanup_file:
put_filp(f);
dput(dentry);
mntput(mnt);
return ERR_PTR(error);
}
首先,設定檔案開啟標誌f->f_flags. 然後初始化地址空間物件,目錄項物件,掛載點物件,檔案指標位置,檔案相關操作。需要說明兩點:
(1)地址空間物件和索引節點相關聯,在構建索引節點時已經賦值了。它涉及到具體的磁碟塊操作,在後面的章節將會解釋。
(2)f_op這個非常重要,也是在構建索引節點時,將具體檔案系統的檔案操作函式集的指標賦給索引節點的i_fop域。對於開啟檔案,目錄,符號連結,對應的操作函式集是不相同的。
接下來,第31行-38行,如果是普通檔案,可能不需要開啟。如果是裝置檔案,就需要開啟操作。例如SCSI裝置的sg_open函式。
最後,對檔案預讀進行初始化。
在說完nameidata_to_filp函式之後,需要解釋open_namei函式:
/*
* open_namei()
*
* namei for open - this is in fact almost the whole open-routine.
*
* Note that the low bits of "flag" aren't the same as in the open
* system call - they are 00 - no permissions needed
* 01 - read permission needed
* 10 - write permission needed
* 11 - read/write permissions needed
* which is a lot more logical, and also allows the "no perm" needed
* for symlinks (where the permissions are checked later).
* SMP-safe
*/
int open_namei(int dfd, const char *pathname, int flag,
int mode, struct nameidata *nd)
{
int acc_mode, error;
/*定義path結構,包括安裝點物件和目錄項物件*/
struct path path;
struct dentry *dir;
int count = 0;
acc_mode = ACC_MODE(flag);
/* O_TRUNC implies we need access checks for write permissions */
/*截斷標誌,需要寫許可權*/
if (flag & O_TRUNC)
acc_mode |= MAY_WRITE;
/* Allow the LSM permission hook to distinguish append
access from general write access. */
if (flag & O_APPEND)
acc_mode |= MAY_APPEND;
/*
* The simplest case - just a plain lookup.
不需要建立檔案,直接開啟檔案即可,建立目錄項物件和掛載點物件,並將它們填充到nd結構體
*/
if (!(flag & O_CREAT)) {
error = path_lookup_open(dfd, pathname, lookup_flags(flag),
nd, flag);
if (error)
return error;
goto ok;
}
/*
* Create - we need to know the parent.
,由於是建立檔案,即檔案不存在,所以返回父目錄項物件
在建立檔案時設定 LOOKUP_PARENT
*/
error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
if (error)
return error;
/*
* We have the parent and last component. First of all, check
* that we are not asked to creat(2) an obvious directory - that
* will not do.
*/
error = -EISDIR;
if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
goto exit;
/*對於建立檔案,nd儲存了上一個分量的目錄項物件和掛載點物件。對於開啟檔案,nd儲存了最後一個分量的目錄項物件和掛載點物件*/
dir = nd->dentry;
nd->flags &= ~LOOKUP_PARENT;
mutex_lock(&dir->d_inode->i_mutex);
/*將path.dentry和mnt賦值*/
path.dentry = lookup_hash(nd);
path.mnt = nd->mnt;
do_last:
error = PTR_ERR(path.dentry);
if (IS_ERR(path.dentry)) {
mutex_unlock(&dir->d_inode->i_mutex);
goto exit;
}
if (IS_ERR(nd->intent.open.file)) {
mutex_unlock(&dir->d_inode->i_mutex);
error = PTR_ERR(nd->intent.open.file);
goto exit_dput;
}
/* Negative dentry, just create the file */
/*如果是建立檔案*/
if (!path.dentry->d_inode) {
/*建立索引節點,並標識為*/
error = open_namei_create(nd, &path, flag, mode);
if (error)
goto exit;
return 0;
}
/*
* It already exists.
*/
mutex_unlock(&dir->d_inode->i_mutex);
audit_inode_update(path.dentry->d_inode);
error = -EEXIST;
if (flag & O_EXCL)
goto exit_dput;
if (__follow_mount(&path)) {
error = -ELOOP;
if (flag & O_NOFOLLOW)
goto exit_dput;
}
error = -ENOENT;
if (!path.dentry->d_inode)
goto exit_dput;
if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
goto do_link;
/*將path的目錄項物件和掛載點物件賦給nd*/
path_to_nameidata(&path, nd);
error = -EISDIR;
if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
goto exit;
ok:
error = may_open(nd, acc_mode, flag);
if (error)
goto exit;
return 0;
exit_dput:
dput_path(&path, nd);
exit:
if (!IS_ERR(nd->intent.open.file))
release_open_intent(nd);
path_release(nd);
return error;
do_link:
error = -ELOOP;
if (flag & O_NOFOLLOW)
goto exit_dput;
/*
* This is subtle. Instead of calling do_follow_link() we do the
* thing by hands. The reason is that this way we have zero link_count
* and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
* After that we have the parent and last component, i.e.
* we are in the same situation as after the first path_walk().
* Well, almost - if the last component is normal we get its copy
* stored in nd->last.name and we will have to putname() it when we
* are done. Procfs-like symlinks just set LAST_BIND.
*/
nd->flags |= LOOKUP_PARENT;
error = security_inode_follow_link(path.dentry, nd);
if (error)
goto exit_dput;
error = __do_follow_link(&path, nd);
if (error) {
/* Does someone understand code flow here? Or it is only
* me so stupid? Anathema to whoever designed this non-sense
* with "intent.open".
*/
release_open_intent(nd);
return error;
}
nd->flags &= ~LOOKUP_PARENT;
if (nd->last_type == LAST_BIND)
goto ok;
error = -EISDIR;
if (nd->last_type != LAST_NORM)
goto exit;
if (nd->last.name[nd->last.len]) {
__putname(nd->last.name);
goto exit;
}
error = -ELOOP;
if (count++==32) {
__putname(nd->last.name);
goto exit;
}
dir = nd->dentry;
mutex_lock(&dir->d_inode->i_mutex);
path.dentry = lookup_hash(nd);
path.mnt = nd->mnt;
__putname(nd->last.name);
goto do_last;
}
首先進行檔案開啟設定工作,第40行,如果是開啟操作,則呼叫path_lookup_open函式。第53行,如果檔案不存在,就建立一個檔案,呼叫path_lookup_create函式。在第88行,如果是建立檔案,需要建立磁碟上的索引節點,即呼叫open_namei_create函式。我們逐一解釋:
首先path_lookup_open函式:
/**
* path_lookup_open - lookup a file path with open intent
* @dfd: the directory to use as base, or AT_FDCWD
* @name: pointer to file name
* @lookup_flags: lookup intent flags
* @nd: pointer to nameidata
* @open_flags: open intent flags
*/
int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags,
struct nameidata *nd, int open_flags)
{
return __path_lookup_intent_open(dfd, name, lookup_flags, nd,
open_flags, 0);
}
封裝了__path_lookup_intent_open函式。
path_lookup_create函式:
/**
* path_lookup_create - lookup a file path with open + create intent
* @dfd: the directory to use as base, or AT_FDCWD
* @name: pointer to file name
* @lookup_flags: lookup intent flags
* @nd: pointer to nameidata
* @open_flags: open intent flags
* @create_mode: create intent flags
*/
static int path_lookup_create(int dfd, const char *name,
unsigned int lookup_flags, struct nameidata *nd,
int open_flags, int create_mode)
{
return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE,
nd, open_flags, create_mode);
}
也封裝了__path_lookup_intent_open函式,只是增加了建立標誌LOOKUP_CREATE, 在create操作的lookup_flags設定了LOOKUP_PARENT,接下來,將看到這個標誌的作用。
繼續跟蹤__path_lookup_intent_open函式:
static int __path_lookup_intent_open(int dfd, const char *name,
unsigned int lookup_flags, struct nameidata *nd,
int open_flags, int create_mode)
{
/*分配struct file物件指標*/
struct file *filp = get_empty_filp();
int err;
if (filp == NULL)
return -ENFILE;
/*想要開啟的檔案*/
nd->intent.open.file = filp;
/*開啟標誌*/
nd->intent.open.flags = open_flags;
/*建立模式*/
nd->intent.open.create_mode = create_mode;
/*呼叫do_path_lookup函式,設定LOOKUP_OPEN*/
err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
if (IS_ERR(nd->intent.open.file)) {
if (err == 0) {
err = PTR_ERR(nd->intent.open.file);
path_release(nd);
}
} else if (err != 0)
release_open_intent(nd);
return err;
}
首先呼叫get_empty_flip函式分配一個空閒的檔案物件filp, 設定intent.open的相關域,包括“想要開啟的檔案”,開啟標誌和建立模式。最後,呼叫do_path_lookup對檔案路徑進行解析,並填充nd。
/*路徑查詢函式do_path_lookup*/
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
static int fastcall do_path_lookup(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
int retval = 0;
int fput_needed;
struct file *file;
struct fs_struct *fs = current->fs;
/*如果只有斜線號,設定最後一個分量的型別為LAST_ROOT*/
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags;
nd->depth = 0;
/*如果是從根目錄開始查詢*/
if (*name=='/') {
read_lock(&fs->lock);
if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
/*nd->mnt設定為根安裝點*/
nd->mnt = mntget(fs->altrootmnt);
/*nd->dentry開始目錄項物件設定為根目錄項物件*/
nd->dentry = dget(fs->altroot);
read_unlock(&fs->lock);
if (__emul_lookup_dentry(name,nd))
goto out; /* found in altroot */
read_lock(&fs->lock);
}
/*增加安裝點的引用計數*/
nd->mnt = mntget(fs->rootmnt);
/*增加目錄項的使用計數*/
nd->dentry = dget(fs->root);
read_unlock(&fs->lock);
/*如果是當前工作目錄*/
} else if (dfd == AT_FDCWD) {
read_lock(&fs->lock);
/*從程序的fs_struct物件找到當前掛載點物件*/
nd->mnt = mntget(fs->pwdmnt);
/*從程序的fs_struct物件找到當前目錄的目錄項物件*/
nd->dentry = dget(fs->pwd);
read_unlock(&fs->lock);
} else {/*當dfd!=AT_FDCWD,這種情況也是有可能出現的*/
struct dentry *dentry;
/*根據dfd得到file物件*/
file = fget_light(dfd, &fput_needed);
retval = -EBADF;
if (!file)
goto out_fail;
/*目錄項物件*/
dentry = file->f_path.dentry;
retval = -ENOTDIR;
if (!S_ISDIR(dentry->d_inode->i_mode))
goto fput_fail;
retval = file_permission(file, MAY_EXEC);
if (retval)
goto fput_fail;
/*nd->mnt賦值*/
nd->mnt = mntget(file->f_path.mnt);
/*nd->dentry賦值,f_path.dentry是和檔案相關的目錄項物件*/
nd->dentry = dget(dentry);
fput_light(file, fput_needed);
}
current->total_link_count = 0;
/*路徑分解函式,呼叫實際檔案系統操作*/
retval = link_path_walk(name, nd);
out:
if (likely(retval == 0)) {
if (unlikely(!audit_dummy_context() && nd && nd->dentry &&
nd->dentry->d_inode))
audit_inode(name, nd->dentry->d_inode);
}
out_fail:
return retval;
fput_fail:
fput_light(file, fput_needed);
goto out_fail;
}
第11-14行,設定初始化nd->last_type, flags和depth. 其中depth表示符號連結的深度。由於符號連結可以連結自己,因此需要限制連結的深度。
第16行,如果第一個字元為/,表示從根目錄開始解析,設定nd->mnt為根掛載點物件,nd->dentry為根目錄項物件,然後增加引用計數。
第34行,如果是從當前目錄開始,將nd->mnt設定為當前目錄的掛載點物件,nd->dentry設定為當前目錄的目錄項物件。
第41行,否則,將nd->mnt和nd->dentry分別設定為f_path.mnt和f_pat.dentry.
接下來,第63行,初始化符號連結總數,呼叫實際檔案系統的路徑分解函式link_path_walk.
int fastcall link_path_walk(const char *name, struct nameidata *nd)
{
struct nameidata save = *nd;
int result;
/* make sure the stuff we saved doesn't go away */
/*首先備份一下安裝點物件和目錄項物件*/
dget(save.dentry);
mntget(save.mnt);
/*真正的名稱解析函式*/
result = __link_path_walk(name, nd);
if (result == -ESTALE) {
*nd = save;
dget(nd->dentry);
mntget(nd->mnt);
nd->flags |= LOOKUP_REVAL;
result = __link_path_walk(name, nd);
}
/*減少並釋放備份的nameidata物件*/
dput(save.dentry);
mntput(save.mnt);
return result;
}
首先,備份掛載點物件和目錄項物件,然後呼叫__link_path_walk解析.
這個函式也比較複雜,在下一節中繼續分析!