ChCore Lab3 使用者程序和異常處理實驗筆記

阿新 • • 發佈：2021-06-10

本文為上海交大 ipads 研究所陳海波老師等人所著的《現代作業系統：原理與實現》的課程實驗（LAB）的學習筆記的第三篇：使用者程序與異常處理。所有章節的筆記可在此處檢視：chcore | 康宇PL's Blog

實現使用者程序

程序結構

ChCore 中每個程序都對應一個 process 結構體。slot_table 是一個程序佔有的核心物件陣列和元資訊組成的結構體。thread_list 是連線該程序所有執行緒的連結串列。如何通過 list_head 的得到對應的物件我們在 Lab 2 裡已經學習過了。

struct process {
	struct slot_table slot_table; // 所佔有的核心物件
	struct list_head thread_list; // 所包含的執行緒
};

每個執行緒對應一個 thread 結構體，其中 node 是連線了同屬一個程序的所有執行緒的連結串列。thread_ctx 是執行緒的上下文（context），原始碼裡對他的註釋為“執行緒控制塊”，這倒和我的理解不大相同。vmspace 管理了執行緒的虛擬地址空間。process 指向了執行緒所屬的程序。

struct thread {
	struct list_head node;	// link threads in a same process
	struct thread_ctx *thread_ctx;	// thread control block
	struct vmspace *vmspace;	// memory mapping
	struct process *process;
};

每個上下文中都包含了若干個暫存器，包含從 X1 ～ X30 的所有通用暫存器，以及 SP_EL0、 ELR_RL1、 SPSR_EL1、 TPIDR_EL0 這四個特殊的暫存器。

struct thread_ctx {
	/* Executing Context */
	arch_exec_cont_t ec;
	/* Thread Type */
	u32 type;
};

/* size in registers.h (to be used in asm) */
typedef struct arch_exec_cont {
	u64 reg[REG_NUM];
} arch_exec_cont_t;

/* Types of the registers */
enum reg_type {
	X0 = 0,			/* 0x00 */
	X1 = 1,			/* 0x08 */
	X2 = 2,			/* 0x10 */
     
    //.....
	X30 = 30,		/* 0xf0 */
	SP_EL0 = 31,		/* 0xf8 */
	ELR_EL1 = 32,		/* 0x100 NEXT PC */
	SPSR_EL1 = 33,		/* 0x108 */
	TPIDR_EL0 = 34
};

Capability

Capability 可以理解為 Linux 下的檔案描述符。它把一個資源物件和訪問許可權封裝到了一起，並對外提供一個整形 cap 做訪問的控制代碼（控制代碼就是對資源物件的指標或者引用的一種抽象）。

ChCore 中每個 capability 都屬於一個程序。cap 的值實際上就是物件在所屬的 process 的 slot_table 中的下標。

來點示例程式碼

// 僅為演示，刪掉了部分異常處理的程式碼
// 分配 cap
int sys_create_pmo(u64 size, u64 type)
{
	int cap;
	struct pmobject *pmo;

	pmo = obj_alloc(TYPE_PMO, sizeof(*pmo)); // 分配物件
	pmo_init(pmo, type, size, 0); // 初始化
	cap = cap_alloc(current_process, pmo, 0); // 掛載到程序上，分配 cap 編號
    return cap;
}

// 使用 cap
int sys_map_pmo(u64 target_process_cap, u64 pmo_cap, u64 addr, u64 perm)
{
	struct pmobject *pmo;

	// 根據 cap 獲取物件的指標
    pmo = obj_get(current_process, pmo_cap, TYPE_PMO);
    
    // 操作物件，省略之
    // ......
    
    // 宣告自己操作結束，為了併發安全準備的。
    obj_put(pmo);
}

使用者程序的建立流程

process_create_root

main
├── uart_init
├── mm_init
├── exception_init
├── process_create_root
│   ├── ramdisk_read_file
│   ├── process_create
│   └── thread_create_main
└── eret_to_thread
    └── switch_context

ChCore 啟動後會依次初始化 uart 模組、記憶體管理模組、中斷模組，而後呼叫 process_create_root 建立一個根程序，當前 Lab 中也只有這一個程序。

process_create_root 具有以下行為

1）從磁碟中載入 ELF 檔案。

2）建立程序的 process 結構體並初始化，包括 slot_table 的初始化和分配一塊虛擬地址空間 vmspace。vmspace 的初始化包括申請一個空白的物理頁做 L0 頁表，將 user_current_heap 制定為一個常量。

3）為程序建立一個主執行緒。ChCore 中每個程序至少包含一個執行緒。

/* process_create_root: create the root process */
void process_create_root(char *bin_name)
{
	struct process *root_process;
	int thread_cap;
	struct thread *root_thread;
	char *binary = NULL;
	int ret;
	
    // 從磁碟中載入 ELF 檔案。
	ret = ramdisk_read_file(bin_name, &binary);
	BUG_ON(ret < 0);
	BUG_ON(binary == NULL);

    // 建立程序的 process 結構體並初始化
	root_process = process_create();

    // 為程序建立一個主執行緒
	thread_cap = thread_create_main(root_process, ROOT_THREAD_STACK_BASE,
					ROOT_THREAD_STACK_SIZE,
					ROOT_THREAD_PRIO, TYPE_ROOT,
					0, binary, bin_name);

    // get 是為了得到指標，get 完就 put 是為了讓引用計數器恢復原值
	root_thread = obj_get(root_process, thread_cap, TYPE_THREAD);
	/* Enqueue: put init thread into the ready queue */
	obj_put(root_thread);
	current_thread = root_thread;
}

thread_create_main

thread_create_main 負責給一個程序建立主執行緒

int thread_create_main(struct process *process, u64 stack_base,
		       u64 stack_size, u32 prio, u32 type, s32 aff,
		       const char *bin_start, char *bin_name)
{
	int ret, thread_cap, stack_pmo_cap;
	struct thread *thread;
	struct pmobject *stack_pmo;
	struct vmspace *init_vmspace;
	struct process_metadata meta;
	u64 stack;
	u64 pc;
    
	init_vmspace = obj_get(process, VMSPACE_OBJ_ID, TYPE_VMSPACE);
	obj_put(init_vmspace);

	/* 分配一個棧並掛載到程序中 */
	stack_pmo = obj_alloc(TYPE_PMO, sizeof(*stack_pmo));
	pmo_init(stack_pmo, PMO_DATA, stack_size, 0);
	stack_pmo_cap = cap_alloc(process, stack_pmo, 0);

    /* 將分配好的棧對映到下虛擬地址空間裡 */
	ret = vmspace_map_range(init_vmspace, stack_base, stack_size,
				VMR_READ | VMR_WRITE, stack_pmo);

	/* init thread */
	thread = obj_alloc(TYPE_THREAD, sizeof(*thread));

	/* 棧自高向低生長，初始化時將棧指標挪到高地址處 */
	stack = stack_base + stack_size;

    /* 解析並載入 ELF 檔案 */	
	pc = load_binary(process, init_vmspace, bin_start, &meta);

    /* 把部分環境變數存入棧中 */
	prepare_env((char *)phys_to_virt(stack_pmo->start) + stack_size,
		    stack, &meta, bin_name);
	stack -= ENV_SIZE_ON_STACK;

    /* 執行緒初始化 */
	ret = thread_init(thread, process, stack, pc, prio, type, aff);

    /* 把執行緒掛載到程序上 */
	thread_cap = cap_alloc(process, thread, 0);

	/* L1 icache(instruction cache) & dcache(data cache) have no coherence */
	flush_idcache();

	// return thread;
	return thread_cap;
}

thread_init 裡指定了執行緒的 vmspace 和 process，並將 context （執行緒上下文）初始化。

static
int thread_init(struct thread *thread, struct process *process,
		u64 stack, u64 pc, u32 prio, u32 type, s32 aff)
{
	thread->process = obj_get(process, PROCESS_OBJ_ID, TYPE_PROCESS);
	thread->vmspace = obj_get(process, VMSPACE_OBJ_ID, TYPE_VMSPACE);
	obj_put(thread->process);
	obj_put(thread->vmspace);
	/* Thread context is used as the kernel stack for that thread */
	thread->thread_ctx = create_thread_ctx();
	if (!thread->thread_ctx)
		return -ENOMEM;
	init_thread_ctx(thread, stack, pc, prio, type, aff);
	/* add to process */
	list_add(&thread->node, &process->thread_list);

	return 0;
}

load_binary

load_binary：解析 ELF 檔案,並將其內容載入到新執行緒的使用者記憶體空間中。

大部分程式碼已經給出了，我們的任務是根據解析好的 ELF 檔案，把程式段從讀入的那塊記憶體區域拷貝到程式執行時應使用的那塊記憶體區域上，並且需要考慮記憶體對其的情況。可講義裡並沒有提及 ELF 頭中各個域的含義，參考前人資料後得知維基百科上有：Executable and Linkable Format - Wikipedia

關鍵內容就是下面這個表了。

Field	Purpose
p_flags	Segment-dependent flags (position for 64-bit structure).
p_offset	Offset of the segment in the file image.
p_vaddr	Virtual address of the segment in memory.
p_paddr	On systems where physical address is relevant, reserved for segment's physical address.
p_filesz	Size in bytes of the segment in the file image. May be 0.
p_memsz	Size in bytes of the segment in memory. May be 0.
p_flags	Segment-dependent flags (position for 32-bit structure).
p_align	`0` and `1` specify no alignment. Otherwise should be a positive, integral power of 2, with p_vaddr equating p_offset modulus p_align.

我們要關注的有：

p_vaddr：程式段在虛擬記憶體中的起始地址

p_memsz：程式段在虛擬記憶體裡的長度

p_offset：程式段在檔案中相對於檔案開頭位置的偏移量

p_filesz：程式段在檔案中的長度

這裡解釋下為什麼程式段在記憶體裡和在檔案里長度不一定相同。比如我定義了一個長度 100000 的未初始化的 int 陣列，如果我們在程式檔案裡真的寫 100000 個空資料就有點浪費了，所以此時我只在檔案裡記錄下 int 陣列的定義，而不會真的佔這麼大的空間。當讀到記憶體後再把這一部分資料展開。

實際分配記憶體時我們要將 p_memsz 轉換成頁對齊的 seg_map_sz。如下圖所示，p_memsz 開頭要擴充套件到前一個頁的交界處，結尾要擴充套件到後一個頁的交界處。按頁對齊的原因是為了保證 p_vddr 在虛擬頁裡的便宜和物理頁裡的便宜相等，進而提高定址速度。

┌──────┬──────┬──────┬──────┬──────┬──────┐
│ PAGE │ PAGE │ PAGE │ PAGE │ PAGE │ PAGE │
└──────┴──────┴──────┴──────┴──────┴──────┘
       |   |-------p_memsz------|  |
       |   p_vddr                  |
       |----------seg_map_sz-------|

用程式碼描述就是：

seg_sz = elf->p_headers->p_memsz;
p_vaddr = elf->p_headers->p_vaddr;
u64 vaddr_start = ROUND_DOWN(p_vaddr, PAGE_SIZE);
u64 vaddr_end = ROUND_UP(p_vaddr + seg_sz, PAGE_SIZE);
seg_map_sz = vaddr_end - vaddr_start;

在拷貝操作時則是按照 p_filesz 來複制的。

u64 start_offset = p_vaddr - vaddr_start;
char * pmo_start = (char *) phys_to_virt(pmo->start) + start_offset;
char * seg_start = bin + elf->p_headers[i].p_offset;
u64 copy_size = elf->p_headers[i].p_filesz;
for(u64 i = 0; i < copy_size; i++) {
    pmo_start[i] = seg_start[i];
}

總的來看 load_binary 的作用就是將 ELF 檔案的各個段拷貝到他們應該存在的記憶體地址處。

/* load binary into some process (process) */
static u64 load_binary(struct process *process,
		       struct vmspace *vmspace,
		       const char *bin, struct process_metadata *metadata)
{
	struct elf_file *elf;
	vmr_prop_t flags;
	int i, r;
	size_t seg_sz, seg_map_sz;
	u64 p_vaddr;

	int *pmo_cap;
	struct pmobject *pmo;
	u64 ret;

	elf = elf_parse_file(bin);
	pmo_cap = kmalloc(elf->header.e_phnum * sizeof(*pmo_cap));
	if (!pmo_cap) {
		r = -ENOMEM;
		goto out_fail;
	}

	/* load each segment in the elf binary */
	for (i = 0; i < elf->header.e_phnum; ++i) {
		pmo_cap[i] = -1;
		if (elf->p_headers[i].p_type == PT_LOAD) {
			seg_sz = elf->p_headers->p_memsz;
			p_vaddr = elf->p_headers->p_vaddr;
			u64 vaddr_start = ROUND_DOWN(p_vaddr, PAGE_SIZE);
			u64 vaddr_end = ROUND_UP(p_vaddr + seg_sz, PAGE_SIZE);
			seg_map_sz = vaddr_end - vaddr_start;

			pmo = obj_alloc(TYPE_PMO, sizeof(*pmo));
			if (!pmo) {
				r = -ENOMEM;
				goto out_free_cap;
			}
			pmo_init(pmo, PMO_DATA, seg_map_sz, 0);
			pmo_cap[i] = cap_alloc(process, pmo, 0);
			if (pmo_cap[i] < 0) {
				r = pmo_cap[i];
				goto out_free_obj;
			}

			u64 start_offset = p_vaddr - vaddr_start;
			char * pmo_start = (char *) phys_to_virt(pmo->start) + start_offset;
			char * seg_start = bin + elf->p_headers[i].p_offset;
			u64 copy_size = elf->p_headers[i].p_filesz;
			for(u64 i = 0; i < copy_size; i++) {
				pmo_start[i] = seg_start[i];
			}

			flags = PFLAGS2VMRFLAGS(elf->p_headers[i].p_flags);

			ret = vmspace_map_range(vmspace,
						ROUND_DOWN(p_vaddr, PAGE_SIZE),
						seg_map_sz, flags, pmo);

			BUG_ON(ret != 0);
		}
	}

	/* return binary metadata */
	if (metadata != NULL) {
		metadata->phdr_addr = elf->p_headers[0].p_vaddr +
		    elf->header.e_phoff;
		metadata->phentsize = elf->header.e_phentsize;
		metadata->phnum = elf->header.e_phnum;
		metadata->flags = elf->header.e_flags;
		metadata->entry = elf->header.e_entry;
	}

	kfree((void *)bin);

	/* PC: the entry point */
	return elf->header.e_entry;
}

context 的建立與初始化

建立 context 就是申請一塊記憶體。初始化 context 就是指定 SP_EL0、ELR_EL1、SPSR_EL1。

SP 作為棧頂指標暫存器，存的就是棧的虛擬地址。（Lab 2 學過了，虛擬記憶體機制啟動後都是自動用虛擬地址定址）

ELR 和 SPSR 成對存在，前者記錄的從當前特權級返回到之前特權級時的返回地址，這裡為程式入口函式的地址。後者記錄程式的各種狀態，這裡只需要更改下特權級標誌位，因為要跳到使用者態相應的低四位全設為 0 就行，有個現成的巨集就是 SPSR_EL1_USER 。設定好後用 eret 指令就會根據當前特權級的 ELR 和 SPSR 自動跳到另一個異常級別。

因為執行緒建立好，切換到它的時候是直接執行使用者態的程式碼。所以 SP 是使用者態的 EL0，而 ELR 和 SPSR 都是核心態的 EL1，為返回到使用者態做準備。

struct thread_ctx *create_thread_ctx(void)
{
	void *kernel_stack;
	kernel_stack = kzalloc(DEFAULT_KERNEL_STACK_SZ);
	return kernel_stack + DEFAULT_KERNEL_STACK_SZ -
	    sizeof(struct thread_ctx);
}

void init_thread_ctx(struct thread *thread, u64 stack, u64 func, u32 prio,
		     u32 type, s32 aff)
{
	/* Fill the context of the thread */
	thread->thread_ctx->ec.reg[SP_EL0] = stack;
	thread->thread_ctx->ec.reg[ELR_EL1] = func;
	thread->thread_ctx->ec.reg[SPSR_EL1] = SPSR_EL1_USER;

	/* Set thread type */
	thread->thread_ctx->type = type;
}

switch_context

switch_context 是為了完成上下文切換，為執行緒或者程序的切換做準備。

程式碼裡只做了兩件事：

1）切換到當前執行緒的地址空間上。實際上只是更新了 TTBR0_EL1

2）返回當前執行緒的上下文。

我們要做的也只是在已有程式碼上加一句返回上下文裡的暫存器。

/*
 * Switch vmspace and arch-related stuff
 * Return the context pointer which should be set to stack pointer register
 */
u64 switch_context(void)
{
	struct thread *target_thread;
	struct thread_ctx *target_ctx;

	target_thread = current_thread;
	BUG_ON(!target_thread);
	BUG_ON(!target_thread->thread_ctx);

	target_ctx = target_thread->thread_ctx;

	/* These 3 types of thread do not have vmspace */
	if (target_thread->thread_ctx->type != TYPE_IDLE &&
	    target_thread->thread_ctx->type != TYPE_KERNEL &&
	    target_thread->thread_ctx->type != TYPE_TESTS) {
		BUG_ON(!target_thread->vmspace);
		switch_thread_vmspace_to(target_thread);
	}
	/*
	 * Lab3: Your code here
	 * Return the correct value in order to make eret_to_thread work correctly
	 * in main.c
	 */
	return (u64) target_ctx->ec.reg;
}

在當前 Lab 中只有 eret_to_thread 呼叫了 switch_context，我們稍微研究下。

// 簡化版的 kernel 的 main 函式
void main(void *addr)
{
    uart_init();
    mm_init();
	exception_init();
	process_create_root(TEST);
	eret_to_thread(switch_context());
}

/* void eret_to_thread(u64 sp) */
BEGIN_FUNC(eret_to_thread)
	mov	sp, x0
	exception_exit
END_FUNC(eret_to_thread)

.macro	exception_exit
	ldp	x11, x12, [sp, #16 * 16]
	ldp	x30, x10, [sp, #16 * 15] 
	msr	sp_el0, x10
	msr	elr_el1, x11
	msr	spsr_el1, x12
	ldp	x0, x1, [sp, #16 * 0]
	ldp	x2, x3, [sp, #16 * 1]
	ldp	x4, x5, [sp, #16 * 2]
	ldp	x6, x7, [sp, #16 * 3]
	ldp	x8, x9, [sp, #16 * 4]
	ldp	x10, x11, [sp, #16 * 5]
	ldp	x12, x13, [sp, #16 * 6]
	ldp	x14, x15, [sp, #16 * 7]
	ldp	x16, x17, [sp, #16 * 8]
	ldp	x18, x19, [sp, #16 * 9]
	ldp	x20, x21, [sp, #16 * 10]
	ldp	x22, x23, [sp, #16 * 11]
	ldp	x24, x25, [sp, #16 * 12]
	ldp	x26, x27, [sp, #16 * 13]
	ldp	x28, x29, [sp, #16 * 14]
	add	sp, sp, #ARCH_EXEC_CONT_SIZE
	eret
.endm

可以發現 eret_to_thread 將 SP 設定為給定的 context 的暫存器域後，進而呼叫了 exception_exit。在這個過程中 context 裡儲存的資料都被讀取到了暫存器中，最後使用 eret 命令，根據當前特權級的 ELR 和 SPSR 跳轉到另一個執行緒中。

異常處理

AArch64 裡的異常分為同步異常和非同步異常。非同步異常包括 SError、FIQ、IRQ，但我們都不會在 ChCore 中涉及到。同步異常就是我們平時學作業系統時瞭解的陷入、硬體中斷、異常那幾種，也是我們要在 ChCore 中研究的重點。

異常向量表

AArch64 裡有三個異常向量基址暫存器 VBAR_EL1、VBAR_EL2、VBAR_EL3 。

異常向量表的本體放在 exception_table.S 中。名義上是要我們自己填寫，但看過配套視訊的話會發現課程裡已經講過了，這裡直接挪過來貼上就行。

.align	11
EXPORT(el1_vector)
	exception_entry sync_el1t
	exception_entry irq_el1t
	exception_entry fiq_el1t
	exception_entry error_el1t
	
	exception_entry sync_el1h
	exception_entry irq_el1h
	exception_entry fiq_el1h
	exception_entry error_el1h
	
	exception_entry sync_el0_64
	exception_entry irq_el0_64
	exception_entry fiq_el0_64
	exception_entry error_el0_64
	
	exception_entry sync_el0_32
	exception_entry irq_el0_32
	exception_entry fiq_el0_32
	exception_entry error_el0_32

異常向量表的初始化

啟用向量表的程式碼是 set_exception_vector 這個函式，只是簡單的將上面 el1_vector 的地址放入 VBAR_EL1 暫存器中。

BEGIN_FUNC(set_exception_vector)
	adr	x0, el1_vector
	msr	vbar_el1, x0
	ret
END_FUNC(set_exception_vector)

ChCore 是在 main 函式中呼叫 excpetion_init 來間接完成這一過程的，我們要在 exception_init_per_cpu 中加一句呼叫語句來補全這一過程。

void exception_init_per_cpu(void)
{
	/**
	 * Lab3: Your code here
	 * Setup the exception vector with the asm function written in exception.S
	 */
	set_exception_vector();
	disable_irq();
}

void exception_init(void)
{
	exception_init_per_cpu();
}

異常處理流程

假設程式執行時發現了一條不在指令集裡的指令，此時會觸發一個未定義指令異常。主要操作有：

1）處理器將異常原因儲存到 ESR_EL1 中，將返回地址儲存到 ELR_EL1 中。

2）處理器從 VBAR_EL1 中獲取異常向量表基址，並根據當前各種狀態資訊確定要呼叫的異常處理程式的地址。

3）處理器自動將特權級切換到 EL1，在這期間自動完成了儲存 PSTATE、啟用 SP_EL1 作為棧指標等操作，完成了從使用者態到核心態的切換。

4）執行異常處理程式。

異常向量表裡每一條的具體含義不需要深究，目前只需要知道每一項裡的 exception_entry 是一個巨集，展開來就是直接跳到後面引數對應的函式處。而這些函式幾乎都指向了 handle_entry 這個函式。

引用課程講義裡的圖：

handle_entry 中主要乾了三件事：

1）儲存上下文

2）呼叫 C 語言中的異常處理器 handle_entry_c，實現具體的異常處理邏輯

3）恢復上下文

再次引用講義中的圖：

新增對未知指令異常的處理邏輯

按照實驗手冊說的輸出一條資訊，然後呼叫 sys_exit 就行。make grade 時我還挺疑惑 sys_exit 的引數為啥是 -12，看了看知乎網友羊男的筆記後才明白這一引數定義在 errno.h 中，含義正是未支援的指令。

void handle_entry_c(int type, u64 esr, u64 address)
{
	/* ec: exception class */
	u32 esr_ec = GET_ESR_EL1_EC(esr);

	kdebug
	    ("Interrupt type: %d, ESR: 0x%lx, Fault address: 0x%lx, EC 0b%b\n",
	     type, esr, address, esr_ec);
	/* Dispatch exception according to EC */
	switch (esr_ec) {
		/*
		 * Lab3: Your code here
		 * Handle exceptions as required in the lab document. Checking exception codes in
		 * esr.h may help.
		 */
	case ESR_EL1_EC_UNKNOWN:
		kinfo("%s", UNKNOWN);
		sys_exit(-ESUPPORT);	// Indicate this is a ESR_EL1_EC_UNKNOWN
		break;
	default:
		kdebug("Unsupported Exception ESR %lx\n", esr);
		break;
	}
}

系統呼叫和缺頁異常

從異常向量到系統呼叫表

系統呼叫異常是通過 SVC 指令觸發的，使用該指令後 ESR 將被設為一個特殊值，然後按照正常的異常處理流程定位到 sync_el0_64 這個函式中。

因為很多其他的異常都會走這個函式處理，所以此時要檢查下是否是通過 SVC 指令觸發的異常。是的話走一下 el0_syscall 函式，根據 x8 暫存器裡記錄的系統呼叫號跳轉到 syscall_table 中對應的函式裡。

sync_el0_64:
	/* Since we cannot touch x0-x7, we need some extra work here */
	exception_enter
	mrs	x25, esr_el1
	lsr	x24, x25, #ESR_EL1_EC_SHIFT
	cmp	x24, #ESR_EL1_EC_SVC_64
	b.eq	el0_syscall
	/* Not supported exception */
	mov	x0, SYNC_EL0_64 
	mrs	x1, esr_el1
	mrs	x2, elr_el1
	bl	handle_entry_c
	exception_exit

el0_syscall:
	sub	sp, sp, #16 * 8
	stp	x0, x1, [sp, #16 * 0]
	stp	x2, x3, [sp, #16 * 1]
	stp	x4, x5, [sp, #16 * 2]
	stp	x6, x7, [sp, #16 * 3]
	stp	x8, x9, [sp, #16 * 4]
	stp	x10, x11, [sp, #16 * 5]
	stp	x12, x13, [sp, #16 * 6]
	stp	x14, x15, [sp, #16 * 7]
	ldp	x0, x1, [sp, #16 * 0]
	ldp	x2, x3, [sp, #16 * 1]
	ldp	x4, x5, [sp, #16 * 2]
	ldp	x6, x7, [sp, #16 * 3]
	ldp	x8, x9, [sp, #16 * 4]
	ldp	x10, x11, [sp, #16 * 5]
	ldp	x12, x13, [sp, #16 * 6]
	ldp	x14, x15, [sp, #16 * 7]
	add	sp, sp, #16 * 8

	adr	x27, syscall_table		// syscall table in x27
	uxtw	x16, w8				// syscall number in x16
	ldr	x16, [x27, x16, lsl #3]		// find the syscall entry
	blr	x16

	/* Ret from syscall */
	// bl	disable_irq
	str	x0, [sp] /* set the return value of the syscall */
	exception_exit

系統呼叫表需要我們手動完善下幾個函式與系統呼叫號間的關係

/*
 * Lab3: Your code here
 * Update the syscall table as you like to redirect syscalls
 * to functions accordingly
 */
const void *syscall_table[NR_SYSCALL] = {
	[0 ... NR_SYSCALL - 1] = sys_debug,
	[SYS_putc] = sys_putc,
	[SYS_exit] = sys_exit,
	[SYS_create_pmo] = sys_create_pmo,
	[SYS_map_pmo] = sys_map_pmo,
	[SYS_handle_brk] = sys_handle_brk,
	/* lab3 syscalls finished */
};

使用者態使用系統呼叫

使用者態提供的系統呼叫比如 usys_putc 、usys_exit 等等都是對 syscall 函式的封裝。在 syscall 裡我們需要將系統呼叫號存入 x8 暫存器，並將函式的 0 ~ 7 號引數依次存入 x0 ~ x7 的暫存器中，最後使用 SVC 指令觸發異常進入系統呼叫的處理流程。

void usys_putc(char ch)
{
	syscall(SYS_putc, ch, 0, 0, 0, 0, 0, 0, 0, 0);
}

u64 syscall(u64 sys_no, u64 arg0, u64 arg1, u64 arg2, u64 arg3, u64 arg4,
	    u64 arg5, u64 arg6, u64 arg7, u64 arg8)
{

	u64 ret = 0;
	/*
	 * Lab3: Your code here
	 * Use inline assembly to store arguments into x0 to x7, store syscall number to x8,
	 * And finally use svc to execute the system call. After syscall returned, don't forget
	 * to move return value from x0 to the ret variable of this function
	 */
	asm volatile ("mov x8, %1\n"
				"mov x0, %2\n"
				"mov x1, %3\n"
				"mov x2, %4\n"
				"mov x3, %5\n"
				"mov x4, %6\n"
				"mov x5, %7\n"
				"mov x6, %8\n"
				"svc #0\n"
				"mov %0, x0\n"
				:"=r" (ret)
				:"r"(sys_no), "r"(arg0), "r"(arg1), "r"(arg2), "r"(arg3), "r"(arg4), "r"(arg5),
				"r"(arg6), "r"(arg7), "r"(arg8)
				:"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8"
	);
	return ret;
}

sys_handle_brk 的實現

為了實現這個函式我們得複習下 vmspace 的結構。

struct vmspace {
	/* list of vmregion */
	struct list_head vmr_list;
	/* root page table */
	vaddr_t *pgtbl;
	struct vmregion *heap_vmr;
	vaddr_t user_current_heap;
};

struct vmregion {
	struct list_head node;	// vmr_list
	vaddr_t start;
	size_t size;
	vmr_prop_t perm;
	struct pmobject *pmo;
};

每個程序都有一個 vmspace 記錄了它的地址空間、頁表、堆。

vmspace->user_current_heap 記錄了堆的起始地址。

vmspace->heap_vmr->size 記錄了堆的長度。

vmspace->heap_vmr->pmo 則是堆關聯的實體記憶體物件 PMO，PMO 內部可能是好多個記憶體塊所組成的一個連結串列。

/*
 * User process heap start: 0x600000000000
 *
 * defined in mm/vmregion.c
 */

u64 sys_handle_brk(u64 addr)
{
	struct vmspace *vmspace;
	struct pmobject *pmo;
	struct vmregion *vmr;
	size_t len;
	u64 retval;
	int ret;

	vmspace = obj_get(current_process, VMSPACE_OBJ_ID, TYPE_VMSPACE);

	retval = vmspace->user_current_heap;

    // 地址為零，表明要初始化堆
	if (addr == 0) {
		pmo = obj_alloc(TYPE_PMO, sizeof(*pmo));
		if (!pmo) {
			retval = -ENOMEM;
			goto error_return;
		}
		pmo_init(pmo, PMO_ANONYM, 0, 0);
		int pmo_cap = cap_alloc(current_process, pmo, 0);
		if (pmo_cap < 0) {
			retval = pmo_cap;
			goto free_obj;
		}

		vmr = init_heap_vmr(vmspace, vmspace->user_current_heap, pmo);
		if (vmr == NULL) {
			retval = -ENOMAPPING;
			goto free_obj;
		}
		vmspace->heap_vmr = vmr;

		retval = vmspace->user_current_heap;
	}

    // vmspace->user_current_heap + vmspace->heap_vmr->size 為堆頂的地址
	else if (addr > vmspace->user_current_heap + vmspace->heap_vmr->size) {
        // 計算擴容後的堆的尺寸。
        // 不按頁對齊也行，這裡對齊主要是考慮到每次多分配點記憶體，這樣連續分配小物件時擴容次數更少
		size_t siz = ROUND_UP(addr - vmspace->user_current_heap, PAGE_SIZE);
		
        // 更新 headp_vmr 和 pmo 的 size，但只是更新了可用的地址範圍
        // 實際的記憶體分配操作將在首次訪問新擴充的地址空間觸發缺頁異常時分配
		vmspace->heap_vmr->size = siz;
		vmspace->heap_vmr->pmo->size = siz;
		retval = addr;
	}

    // 目前不支援縮容操作
	else if (addr < vmspace->user_current_heap + vmspace->heap_vmr->size) {
		retval = -EINVAL;
		goto error_return;
	}


	/*
	 * return origin heap addr on failure;
	 * return new heap addr on success.
	 */
	obj_put(vmspace);
	return retval;

free_obj:
    // 遇到異常記得釋放掉已分配但未使用的物件，否則會記憶體洩漏
	obj_free(pmo);
error_return:
	obj_put(vmspace);
	return retval;
}

使用者程式執行流程

所有使用者程式的入口都在 user/lib/libmain.S 裡的 START 函式中，該函式負責呼叫 _start_c 函式。_start_c 負責指定程式引數陣列 argv 的地址和程式的環境變數陣列 envp 的地址，然後呼叫使用者定義的 main 函式。我們需要在這之後新增一句 usys_exit(ret) 保證程式正常退出。

BEGIN_FUNC(START)
	mov x29, #0
	mov x30, #0
	mov x0, sp
	and sp, x0, #-16
	b _start_c
END_FUNC(START)
    
void _start_c(long *p)
{
	int argc = p[0];
	char **argv = (void *)(p + 1);
	char **envp = (void *)(p + 3);

	int ret = main(argc, argv, envp);
	/*
	 * Lab3: Your code here
	 * Complete the main function
	 */
	usys_exit(ret);
	return;
}

處理缺頁異常

先在 handle_entry_c 裡新增幾條跳轉語句。

void handle_entry_c(int type, u64 esr, u64 address)
{
	/* ec: exception class */
	u32 esr_ec = GET_ESR_EL1_EC(esr);

	/* Dispatch exception according to EC */
	switch (esr_ec) {
	case ESR_EL1_EC_UNKNOWN:
		kinfo("%s", UNKNOWN);
		sys_exit(-ESUPPORT);	// Indicate this is a ESR_EL1_EC_UNKNOWN
		break;
	case ESR_EL1_EC_DABT_LEL:
	case ESR_EL1_EC_DABT_CEL:
		do_page_fault(esr, address);
		break;
	default:
		kdebug("Unsupported Exception ESR %lx\n", esr);
		break;
	}
}

do_page_fault 裡做一下預處理跳轉到 handle_trans_fault。

void do_page_fault(u64 esr, u64 fault_ins_addr)
{
	vaddr_t fault_addr;
	int fsc;		// fault status code

	fault_addr = get_fault_addr();
	fsc = GET_ESR_EL1_FSC(esr);
	switch (fsc) {
	case DFSC_TRANS_FAULT_L0:
	case DFSC_TRANS_FAULT_L1:
	case DFSC_TRANS_FAULT_L2:
	case DFSC_TRANS_FAULT_L3:{
			int ret;

			ret =
			    handle_trans_fault(current_thread->vmspace,
					       fault_addr);
			if (ret != 0) {
				kinfo("pgfault at 0x%p failed\n", fault_addr);
				sys_exit(ret);
			}
			break;
		}
	default:
		kinfo("do_page_fault: fsc is unsupported (0x%b) now\n", fsc);
		BUG_ON(1);
		break;
	}
}

handle_trans_fault 裡幹三件事

1）檢查下 PMO 合法性

2）分配一個頁

3）把頁對映到發生缺頁異常的地址處。

一個容易迷惑的地方：如果分配一個的新頁，再給它對映到別處，那豈不是兩個虛擬地址指向同一個頁了？這裡要注意我們申請頁時是從直接 buddy system 裡申請的空閒的物理頁，此時它只有實體地址。能在 ChCore 裡訪問態是因為核心態下所有的實體地址與虛擬地址都可以通過一個偏移量相互轉換。在申請完後手動把它對映到指定程序的地址空間裡才算是給它分配了虛擬地址。

注意在 make grade 前請保證 CMakeLists.txt 裡的 set(CMAKE_BUILD_TYPE "Release") 為 Release 模式，用 Debug 模式評測會輸出冗餘的 debug 資訊導致評判結果出錯。

int handle_trans_fault(struct vmspace *vmspace, vaddr_t fault_addr)
{
	struct vmregion *vmr;
	struct pmobject *pmo;
	paddr_t pa;
	u64 offset;

	vmr = find_vmr_for_va(vmspace, fault_addr);
	if (vmr == NULL) {
		kdebug("Couldn't found vmr for va\n");
		return -ENOMAPPING;
	}
	if (vmr->pmo->type != PMO_ANONYM) {
		kdebug("PMO type isn't PMO_ANONYM\n");
		return -ENOMAPPING;
	}
	void *page = get_pages(0);
	if (page == NULL) {
		kdebug("Coundn't get a new page\n");
		return -ENOMAPPING;
	}
    // 這個虛擬地址是核心態的虛擬地址，我們要用它的實體地址做對映
	pa = (paddr_t)virt_to_phys(page);
    // 缺頁異常代表 fault_addr 所在的那整個頁都不存在，所以向下取整得到頁的起始地址
	offset = ROUND_DOWN(fault_addr, PAGE_SIZE);
	int ret = map_range_in_pgtbl(vmspace->pgtbl, offset, pa, PAGE_SIZE, vmr->perm);
	if (ret < 0) {
		free_pages(page);
		kdebug("Map range in pgtbl fault\n");
		return -ENOMAPPING;
	}
	kdebug("page fault success\n");

	return 0;
}

參考資料

Chcore -- 上交IPADS作業系統銀杏書配套Lab實驗筆記 - Lab3程序與異常（二） - 知乎羊男

後記

上次做 ChCore 已經是塊兩週前的事情了，最近睡眠質量不好導致老犯困，工作效率提不起來，這幾天得調整下作息加把勁兒把剩下兩個 Lab 做完才行。

ChCore Lab3 使用者程序和異常處理 實驗筆記