1. 程式人生 > 其它 >1. Lab util: Unix utilities

1. Lab util: Unix utilities

第一個 lab,地址 https://pdos.csail.mit.edu/6.S081/2021/labs/util.html,lab 的主要目的是熟悉 xv6 以及其系統呼叫

1. Boot xv6

這裡主要是為了啟動 xv6,流程比較簡單

git clone git://g.csail.mit.edu/xv6-labs-2021
cd xv6-labs-2021
git checkout util
make qemu

make qemu之後,就進入到 xv6 中了,如下:

2. Sleep

2.1 要求

Implement the UNIX program sleep for xv6; your sleep should pause for a user-specified number of ticks. A tick is a notion of time defined by the xv6 kernel, namely the time between two interrupts from the timer chip. Your solution should be in the file user/sleep.c.

增加系統呼叫 sleep

2.2 實現

首先需要了解系統呼叫的流程,參考 user 目錄下的程式,以 cat.c為例

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"

char buf[512];

void cat(int fd)
{
  int n;

  while((n = read(fd, buf, sizeof(buf))) > 0) {
    if (write(1, buf, n) != n) {
      fprintf(2, "cat: write error\n");
      exit(1);
    }
  }
  if(n < 0){
    fprintf(2, "cat: read error\n");
    exit(1);
  }
}

int main(int argc, char *argv[])
{
  int fd, i;

  if(argc <= 1){
    cat(0);
    exit(0);
  }

  for(i = 1; i < argc; i++){
    if((fd = open(argv[i], 0)) < 0){
      fprintf(2, "cat: cannot open %s\n", argv[i]);
      exit(1);
    }
    cat(fd);
    close(fd);
  }
  exit(0);
}

make qemu之後,cat.c 會生產對應的 _cat 可執行檔案,專門用於 xv6,並且寫入到檔案系統中。

因此增加 sleep 指令可以簡單分為如下幾步:

  1. 編寫sleep.c,主要執行系統呼叫 sleep,其宣告位於 user.h
  2. 在 Makefile 中的 UPROGS中增加 $U/_sleep\,用於生成可執行檔案
  3. make qemu會將 step 2 的檔案寫入到檔案系統中

PS:這裡需要注意的是,sleep 的系統呼叫在 user.h 中只能找到宣告,其定義位於 usys.S,該檔案由 usys.pl 生成,裡面都是系統呼叫

sleep.c 程式碼如下:

#include "kernel/types.h"
#include "user/user.h"

int main(int argc, char *argv[])
{
    if(argc < 1){
        fprintf(2, "Usage: sleep [seconds]...\n");
        exit(1);
    }

    int sleep_seconds = atoi(argv[1]);
    fprintf(2, "sleep second %d\n", sleep_seconds);
    if(sleep_seconds <= 0){
        fprintf(2, "sleep param invalid...\n");
        exit(1);
    }

    sleep(sleep_seconds);
    exit(0);
}

3. Pingpong

3.1 要求

Write a program that uses UNIX system calls to ''ping-pong'' a byte between two processes over a pair of pipes, one for each direction. The parent should send a byte to the child; the child should print ": received ping", where is its process ID, write the byte on the pipe to the parent, and exit; the parent should read the byte from the child, print ": received pong", and exit. Your solution should be in the file user/pingpong.c.

在控制檯中輸入 pingpong指令時,能夠按如下格式輸出,子程序先輸出 ${pid}: received ping,父程序再輸出 ${pid}: received pong

    $ make qemu
    ...
    init: starting sh
    $ pingpong
    4: received ping
    3: received pong
    $

3.2 實現

該 part 比較簡單,與增加 sleep流程基本一致。

#include "kernel/types.h"
#include "user/user.h"

int main(int argc, char *argv[])
{
    int fd[2];
    char buf[8] = "hello\n";

    if (pipe(fd) == -1)
    {
        fprintf(2, "pipe failed ...\n");
        exit(1);
    }

    int pid = fork();
    if (pid < 0)
    {
        fprintf(2, "fork failed ... \n");
        exit(1);
    }
    // child process
    else if (pid == 0)
    {
        if (read(fd[0], buf, sizeof(buf)))
        {
            write(fd[1], buf, sizeof(buf));
            fprintf(2, "%d: received ping\n", getpid());
        }
    }
    // parent process
    else{
        write(fd[1], buf, sizeof(buf));
        wait(0);
        if (read(fd[0], buf, sizeof(buf)))
        {
            fprintf(2, "%d: received pong\n", getpid());
        }
    }

    exit(0);
}

4. Primes

4.1 要求

Write a concurrent version of prime sieve using pipes. This idea is due to Doug McIlroy, inventor of Unix pipes. The picture halfway down this page and the surrounding text explain how to do it. Your solution should be in the file user/primes.c.

輸出 0 ~ 35 的素數,按照流水線的流程,每次輸出完當前素數後,fork 一個程序輸出下一個素數,格式大致如下:

    $ make qemu
    ...
    init: starting sh
    $ primes
    prime 2
    prime 3
    prime 5
    prime 7
    prime 11
    prime 13
    prime 17
    prime 19
    prime 23
    prime 29
    prime 31
    $

要求父程序在所有子程序結束之後,才能退出。

4.2 實現

主要需要注意等到子程序都結束了才能關閉最初的父程序

#include "kernel/types.h"
#include "user/user.h"

int main(int argc, char *argv[])
{
    int fd[2];
    if (pipe(fd) == -1)
    {
        fprintf(2, "pipe failed ...\n");
        exit(1);
    }

    int pid = 0;
    int i = 0;
    int cnt = 10;
    int primes[] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31};

    fprintf(2, "prime %d\n", primes[i]);

repeat:
    pid = fork();
    if (pid == 0)
    {
        if (read(fd[0], &i, sizeof(int)))
        {
            fprintf(2, "prime %d\n", primes[i]);
            if (i == cnt)
            {
                close(fd[1]);
                exit(0);
            }
            else
                goto repeat;
        }
    }
    else
    {
        i += 1;
        write(fd[1], &i, sizeof(int));
        wait(0);
    }

    exit(0);
}

5. find

5.1 要求

Write a simple version of the UNIX find program: find all the files in a directory tree with a specific name. Your solution should be in the file user/find.c.

實現在指定目錄中查詢指定名稱的檔案

5.2 實現

主要需要注意:

  1. 如何遍歷目錄
    1. open 路徑,獲取檔案的 fd
    2. 通過 fstat 校驗 fd 的型別
    3. 迴圈 read 獲得的 fd,強制型別轉換目錄項
  2. 需要跳過 ...,可以通過目錄項名稱 dirent.name 來判斷
#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"
#include "kernel/fs.h"

void find_file(char *buf, char *p, char *search_path, char *search_file)
{
    int fd;
    struct stat st;
    struct dirent de;
    if ((fd = open(search_path, 0)) < 0)
    {
        fprintf(2, "find: cannot open %s\n", search_path);
        return;
    }

    if (fstat(fd, &st) < 0)
    {
        fprintf(2, "find: cannot stat %s\n", search_path);
        close(fd);
        return;
    }

    strcpy(buf, search_path);
    p = buf + strlen(buf);
    *p++ = '/';

    while (read(fd, &de, sizeof(de)) == sizeof(de))
    {
        // 校驗目錄項是檔案還是目錄
        memmove(p, de.name, DIRSIZ);
        p[DIRSIZ] = 0;
        if (stat(buf, &st) < 0)
        {
            fprintf(2, "ls: cannot stat %s\n", buf);
            continue;
        }

        // 如果是檔案且名稱符合搜尋的檔案
        if (st.type == T_FILE && !strcmp(de.name, search_file))
        {
            printf("%s\n", buf);
            *(p + 1) = 0;
            continue;
        }
        // 如果是目錄,則繼續遞迴下去
        else if(st.type == T_DIR)
        {
            if (de.inum == 0)
            {
                continue;
            }

            if (!strcmp(de.name, ".") || !strcmp(de.name, ".."))
            {
                continue;
            }

            memmove(p, de.name, DIRSIZ);
            p[DIRSIZ] = 0;
            find_file(buf, p, buf, search_file);
        }
    }
    close(fd);
}

void find(char *search_path, char *search_file)
{
    char buf[512], *p;
    p = buf;
    find_file(p, buf, search_path, search_file);
}

int main(int argc, char *argv[])
{
    if (argc < 2)
    {
        fprintf(2, "find param invalid, format find [search path] [search file] \n");
        exit(0);
    }
    find(argv[1], argv[2]);
    exit(0);
}

6. xargs

6.1 要求

Write a simple version of the UNIX xargs program: read lines from the standard input and run a command for each line, supplying the line as arguments to the command. Your solution should be in the file user/xargs.c.

實現 xargs 指令,這裡的區別在於,正常情況下,當輸入的結果為多行時,會合併到一起給後面的命令作為輸入,比如 :

$ echo "1\n2" | xargs echo line
line 1 2

而這裡要求的實現版可以將多行分為多次輸入,不要求做合併優化,如下:

$ echo "1\n2" | xargs -n 1 echo line
line 1
line 2

6.2 實現

xargs 通常形式為 xargs [command] [arg1] [arg2],思路如下:

  1. 首先,xargs 輸入的獲取是通過 0 檔案描述符,也就是標準輸入
  2. 獲取輸入後便要執行 xargs 後面的 command,因此要先準備引數列表 char* argv[]
    1. 需要把 xargs 後面的引數先加入到 char* argv[]
    2. 通過 read 獲取標準輸入的內容,由於有多行輸入,故需要類似 while(readline())的操作
    3. 讀取到一行資料後,根據空格分隔,依次將每個引數寫入到 char* argv[]
  3. 該行資料讀取完畢後,執行 exec介面
    1. 其介面格式為 exec(char* path, char** argv);,其中 argv[0] 需要為 xargs 後面的 command
    2. argv 陣列中,最後一個引數的下一個元素值需要為 0 ,用於 exec 判斷有多少個引數,如 argv[argc] = 0
  4. 接著讀取下一行,然後覆蓋原先讀取的行引數
#include "kernel/types.h"
#include "kernel/param.h"
#include "user/user.h"

char *readline()
{
    char *buf = malloc(100);
    char *p = buf;
    while (read(0, p, 1) != 0)
    {
        if (*p == '\n' || *p == '\0')
        {
            *p = '\0';
            return buf;
        }
        p++;
    }
    if (p != buf)
        return buf;
    free(buf);
    return 0;
}

int main(int argc, char *argv[])
{
    if (argc < 2)
    {
        printf("Usage: xargs [command]\n");
        exit(-1);
    }

    // 先複製原有的引數,argv 要 +1 是因為要跳過第一個引數 xargs
    char* line;
    char* nargv[MAXARG];
    char** pna = nargv;
    char** pa = ++argv;
    while(*pa != 0){
        *pna = *pa;
        pna++;
        pa++;
    }

    while ((line = readline()) != 0)
    {
        //printf("read .. %s\n", line);
        char *pline = line;
        char *buf = malloc(36);
        char *pbuf = buf;
        // 每次需要重置下標,防止使用到前一行的引數
        int nargc = argc - 1;
        // 遍歷該行每個字元
        while (*pline != 0)
        {
            // 遍歷完一個引數
            if (*pline == ' ' && buf != pbuf)
            {
                *pbuf = 0;
                nargv[nargc] = buf;
                buf = malloc(36);
                pbuf = buf;
                nargc++;
            }
            // 單字元複製
            else if(*pline != ' ')
            {
                *pbuf = *pline;
                pbuf++;
            }
            pline++;
        }
        if (buf != pbuf)
        {
            nargv[nargc] = buf;
            nargc++;
        }
        // exec 要求最後一個引數的下一個元素值為 0, 便於判斷
        nargv[nargc] = 0;
        free(line);
        int pid = fork();
        if (pid == 0)
        {
            //printf("exec %s %s\n", nargv[0], nargv[1]);
            exec(nargv[0], nargv);
        }
        else
        {
            wait(0);
        }
    }
    exit(0);
}