1. 程式人生 > >linux讀檔案過程-3.10.0-Start From read

linux讀檔案過程-3.10.0-Start From read

從系統呼叫read開始,

其系統呼叫實現如下,傳入的引數是開啟的檔案控制代碼,使用者區緩衝池,讀取長度:

SYSCALL_DEFINE3(read, unsignedint, fd, char __user *, buf, size_t, count)

{

struct fd f = fdget_pos(fd);

ssize_t ret = -EBADF;

if (f.file) {

                loff_t pos = file_pos_read(f.file);

                ret = vfs_read(f.file, buf, count, &pos);

if

 (ret >= 0)

                        file_pos_write(f.file, pos);

                fdput_pos(f);

        }

return ret;

}

fd是檔案描述符是一個整型,對應一個檔案,其實是一個索引,關聯到一個struct file。

因為入參中是檔案描述符,所以需要通過fget_pos(fd)(->fdget->fget_light,current->files)函式來獲取file結構體。

fget_pos函式返回的是機構體struct fd,fd的定義如下,包含了file和flags:

struct

 fd {

struct file *file;

unsignedint flags;

};


看到其中呼叫了vfs_read函式,引數是file,使用者層緩衝區和檔案偏移位置。

ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)

{

ssize_t ret;

if (!(file->f_mode & FMODE_READ))

return -EBADF;

if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))

return -EINVAL;

if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))

return -EFAULT;

        ret = rw_verify_area(READ, file, pos, count);

if (ret >= 0) {

                count = ret;

if (file->f_op->read)

                        ret = file->f_op->read(file, buf, count, pos);

else

                        ret = do_sync_read(file, buf, count, pos);

if (ret > 0) {

                        fsnotify_access(file);

                        add_rchar(current, ret);

                }

                inc_syscr(current);

        }

return ret;

}

先是判斷,檔案的模式是否可讀,以及其file_operations結構體中是否定義了read後者aio_read,如果沒有則返回。如果成功返回則給程序結構的ioac成員更新io情況。

使用file的f_op函式集,ext4則是結構體ext4_file_operations,定義如下,所以file->f_op->read函式就是do_sync_read函式。

conststruct file_operations ext4_file_operations = {

        .llseek         = ext4_llseek,

        .read           = do_sync_read,

        .write          = do_sync_write,

        .aio_read       = generic_file_aio_read,

        .aio_write      = ext4_file_write,

        .unlocked_ioctl = ext4_ioctl,

#ifdef CONFIG_COMPAT

        .compat_ioctl   = ext4_compat_ioctl,

#endif

        .mmap           = ext4_file_mmap,

        .open           = ext4_file_open,

        .release        = ext4_release_file,

        .fsync          = ext4_sync_file,

        .splice_read    = generic_file_splice_read,

        .splice_write   = generic_file_splice_write,

        .fallocate      = ext4_fallocate,

};

如果是xfs檔案系統如下:

conststruct file_operations xfs_file_operations = {

        .llseek         = xfs_file_llseek,

        .read           = do_sync_read,

        .write          = do_sync_write,

        .aio_read       = xfs_file_aio_read,

        .aio_write      = xfs_file_aio_write,

        .splice_read    = xfs_file_splice_read,

        .splice_write   = xfs_file_splice_write,

        .unlocked_ioctl = xfs_file_ioctl,

#ifdef CONFIG_COMPAT

        .compat_ioctl   = xfs_file_compat_ioctl,

#endif

        .mmap           = xfs_file_mmap,

        .open           = xfs_file_open,

        .release        = xfs_file_release,

        .fsync          = xfs_file_fsync,

        .fallocate      = xfs_file_fallocate,

};

那麼,我們進入到了,函式do_sync_read如下:

ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)

{      

struct iovec iov = { .iov_base = buf, .iov_len = len };

struct kiocb kiocb;

ssize_t ret;

        init_sync_kiocb(&kiocb, filp);

        kiocb.ki_pos = *ppos;

        kiocb.ki_left = len;

        kiocb.ki_nbytes = len;

        ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);

if (-EIOCBQUEUED == ret)

                ret = wait_on_sync_kiocb(&kiocb);

        *ppos = kiocb.ki_pos;

return ret;

    do_sync_read函式中會先初始化kiocb(kernel io control block),然後呼叫f_op->aio_read函式,在ext4中generic_file_aio_read。

    在generic_file_aio_read函式呼叫generic_segment_checks檢測要寫的緩衝區是否有效,這種檢查在整個核心中都非常常見,如果不做檢測很容易導致指標異常而系統崩潰。

    然後判斷是否是直接IO,如果是則會呼叫retval = mapping->a_ops->direct_IO(READ, iocb, iov, pos, nr_segs);

該函式的a_ops是address_space_ops,ext4的如下,其呼叫的direct_IO是ext4_direct_IO.

staticconststruct address_space_operations ext4_aops = {

        .readpage               = ext4_readpage,

        .readpages              = ext4_readpages,

        .writepage              = ext4_writepage,

        .writepages             = ext4_writepages,

        .write_begin            = ext4_write_begin,

        .write_end              = ext4_write_end,

        .bmap                   = ext4_bmap,

        .invalidatepage_range   = ext4_invalidatepage,

        .releasepage            = ext4_releasepage,

        .direct_IO              = ext4_direct_IO,

        .migratepage            = buffer_migrate_page,

        .is_partially_uptodate  = block_is_partially_uptodate,

        .error_remove_page      = generic_error_remove_page,

};

     另外xfs的如下:

conststruct address_space_operations xfs_address_space_operations = {

        .readpage               = xfs_vm_readpage,

        .readpages              = xfs_vm_readpages,

        .writepage              = xfs_vm_writepage,

        .writepages             = xfs_vm_writepages,

        .set_page_dirty         = xfs_vm_set_page_dirty,

        .releasepage            = xfs_vm_releasepage,

        .invalidatepage_range   = xfs_vm_invalidatepage,

        .write_begin            = xfs_vm_write_begin,

        .write_end              = xfs_vm_write_end,

        .bmap                   = xfs_vm_bmap,

        .direct_IO              = xfs_vm_direct_IO,

        .migratepage            = buffer_migrate_page,

        .is_partially_uptodate  = block_is_partially_uptodate,

        .error_remove_page      = generic_error_remove_page,

};

如果不是,則呼叫函式do_generic_file_read,從磁碟讀取請求的頁並把它們複製到使用者態快取區。真正執行讀操作,是通過mapping->a_ops->readpage()來完成。

     Address_space物件的readpage會負責啟用磁碟到頁之間的I/O資料傳輸。Ext4的readpage函式是ext4_readpage,會呼叫函式mpage_readpage.如果塊在磁碟上是連續的,就用單個bio,如果不連續就用不同的bio描述符來讀。

完事之後,由file_read_actor函式負責把頁中的資料拷貝到使用者態緩衝區中。

     Submit_bio是一個關鍵函式,負責根據傳遞的bio例項建立一個新請求。並使用make_request_fn將請求置於驅動程式的請求佇列上。

            之後就到塊層了,塊層會對這些請求進行合併、插入,從而提高效能。