XV6學習(15)Lab mmap: Mmap
阿新 • • 發佈:2021-02-10
程式碼在[Github](https://github.com/weijunji/xv6-6.S081/tree/mmap)上。
這一個實驗是要實現最基礎的`mmap`功能。mmap即記憶體對映檔案,將一個檔案直接對映到記憶體當中,之後對檔案的讀寫就可以直接通過對記憶體進行讀寫來進行,而對檔案的同步則由作業系統來負責完成。使用`mmap`可以避免對檔案大量`read`和`write`操作帶來的核心緩衝區和使用者緩衝區之間的頻繁的資料拷貝。在Kafka訊息佇列等軟體中藉助`mmap`來實現零拷貝(zero-copy)。
首先定義`vma`結構體用於儲存記憶體對映資訊,並在`proc`結構體中加入`struct vma *vma`指標:
```c
#define NVMA 16
#define VMA_START (MAXVA / 2)
struct vma{
uint64 start;
uint64 end;
uint64 length; // 0 means vma not used
uint64 off;
int permission;
int flags;
struct file *file;
struct vma *next;
struct spinlock lock;
};
// Per-process state
struct proc {
...
struct vma *vma;
...
};
```
之後實現對`vma`分配的程式碼:
```c
struct vma vma_list[NVMA];
struct vma* vma_alloc(){
for(int i = 0; i < NVMA; i++){
acquire(&vma_list[i].lock);
if(vma_list[i].length == 0){
return &vma_list[i];
}else{
release(&vma_list[i].lock);
}
}
panic("no enough vma");
}
```
實現`mmap`系統呼叫,這個函式主要就是申請一個`vma`,之後查詢一塊空閒記憶體,填入相關資訊,將`vma`插入到程序的`vma`連結串列中去:
```c
uint64
sys_mmap(void)
{
uint64 addr;
int length, prot, flags, fd, offset;
if(argaddr(0, &addr) < 0 || argint(1, &length) < 0 || argint(2, &prot) < 0 || argint(3, &flags) < 0 || argint(4, &fd) < 0 || argint(5, &offset) < 0){
return -1;
}
if(addr != 0)
panic("mmap: addr not 0");
if(offset != 0)
panic("mmap: offset not 0");
struct proc *p = myproc();
struct file* f = p->ofile[fd];
int pte_flag = PTE_U;
if (prot & PROT_WRITE) {
if(!f->writable && !(flags & MAP_PRIVATE)) return -1; // map to a unwritable file with PROT_WRITE
pte_flag |= PTE_W;
}
if (prot & PROT_READ) {
if(!f->readable) return -1; // map to a unreadable file with PROT_READ
pte_flag |= PTE_R;
}
struct vma* v = vma_alloc();
v->permission = pte_flag;
v->length = length;
v->off = offset;
v->file = myproc()->ofile[fd];
v->flags = flags;
filedup(f);
struct vma* pv = p->vma;
if(pv == 0){
v->start = VMA_START;
v->end = v->start + length;
p->vma = v;
}else{
while(pv->next) pv = pv->next;
v->start = PGROUNDUP(pv->end);
v->end = v->start + length;
pv->next = v;
v->next = 0;
}
addr = v->start;
printf("mmap: [%p, %p)\n", addr, v->end);
release(&v->lock);
return addr;
}
```
接下來就可以在`usertrap`中對缺頁中斷進行處理:查詢程序的`vma`連結串列,判斷該地址是否為對映地址,如果不是就說明出錯,直接返回;如果在`vma`連結串列中,就可以申請並對映一個頁面,之後根據`vma`從對應的檔案中讀取資料:
```c
int
mmap_handler(uint64 va, int scause)
{
struct proc *p = myproc();
struct vma* v = p->vma;
while(v != 0){
if(va >= v->start && va < v->end){
break;
}
//printf("%p\n", v);
v = v->next;
}
if(v == 0) return -1; // not mmap addr
if(scause == 13 && !(v->permission & PTE_R)) return -2; // unreadable vma
if(scause == 15 && !(v->permission & PTE_W)) return -3; // unwritable vma
// load page from file
va = PGROUNDDOWN(va);
char* mem = kalloc();
if (mem == 0) return -4; // kalloc failed
memset(mem, 0, PGSIZE);
if(mappages(p->pagetable, va, PGSIZE, (uint64)mem, v->permission) != 0){
kfree(mem);
return -5; // map page failed
}
struct file *f = v->file;
ilock(f->ip);
readi(f->ip, 0, (uint64)mem, v->off + va - v->start, PGSIZE);
iunlock(f->ip);
return 0;
}
```
之後就是`munmap`的實現,同樣先從連結串列中找到對應的`vma`結構體,之後根據三種不同情況(頭部、尾部、整個)來寫回並釋放對應的頁面並更新`vma`資訊,如果整個區域都被釋放就將`vma`和檔案釋放。
```c
uint64
sys_munmap(void)
{
uint64 addr;
int length;
if(argaddr(0, &addr) < 0 || argint(1, &length) < 0){
return -1;
}
struct proc *p = myproc();
struct vma *v = p->vma;
struct vma *pre = 0;
while(v != 0){
if(addr >= v->start && addr < v->end) break; // found
pre = v;
v = v->next;
}
if(v == 0) return -1; // not mapped
printf("munmap: %p %d\n", addr, length);
if(addr != v->start && addr + length != v->end) panic("munmap middle of vma");
if(addr == v->start){
writeback(v, addr, length);
uvmunmap(p->pagetable, addr, length / PGSIZE, 1);
if(length == v->length){
// free all
fileclose(v->file);
if(pre == 0){
p->vma = v->next; // head
}else{
pre->next = v->next;
v->next = 0;
}
acquire(&v->lock);
v->length = 0;
release(&v->lock);
}else{
// free head
v->start -= length;
v->off += length;
v->length -= length;
}
}else{
// free tail
v->length -= length;
v->end -= length;
}
return 0;
}
```
寫回函式先判斷是否需要寫回,當需要寫回時就仿照`filewrite`的實現,將資料寫回到對應的檔案當中去,這裡的實現是直接寫回所有頁面,但實際可以根據`PTE_D`來判斷記憶體是否被寫入,如果沒有寫入就不用寫回:
```c
void
writeback(struct vma* v, uint64 addr, int n)
{
if(!(v->permission & PTE_W) || (v->flags & MAP_PRIVATE)) // no need to writeback
return;
if((addr % PGSIZE) != 0)
panic("unmap: not aligned");
printf("starting writeback: %p %d\n", addr, n);
struct file* f = v->file;
int max = ((MAXOPBLOCKS-1-1-2) / 2) * BSIZE;
int i = 0;
while(i < n){
int n1 = n - i;
if(n1 > max)
n1 = max;
begin_op();
ilock(f->ip);
printf("%p %d %d\n",addr + i, v->off + v->start - addr, n1);
int r = writei(f->ip, 1, addr + i, v->off + v->start - addr + i, n1);
iunlock(f->ip);
end_op();
i += r;
}
}
```
最後就是在`fork`當中複製`vma`到子程序,在`exit`中當前程序的`vma`連結串列釋放,在`exit`時要對頁面進行寫回:
```c
int
fork(void)
{
...
np->state = RUNNABLE;
np->vma = 0;
struct vma *pv = p->vma;
struct vma *pre = 0;
while(pv){
struct vma *vma = vma_alloc();
vma->start = pv->start;
vma->end = pv->end;
vma->off = pv->off;
vma->length = pv->length;
vma->permission = pv->permission;
vma->flags = pv->flags;
vma->file = pv->file;
filedup(vma->file);
vma->next = 0;
if(pre == 0){
np->vma = vma;
}else{
pre->next = vma;
}
pre = vma;
release(&vma->lock);
pv = pv->next;
}
...
}
void
exit(int status)
{
struct proc *p = myproc();
if(p == initproc)
panic("init exiting");
// munmap all mmap vma
struct vma* v = p->vma;
struct vma* pv;
while(v){
writeback(v, v->start, v->length);
uvmunmap(p->pagetable, v->start, PGROUNDUP(v->length) / PGSIZE, 1);
fileclose(v->file);
pv = v->next;
acquire(&v->lock);
v->next = 0;
v->length = 0;
release(&v->lock);
v = pv;
}
.