1. 程式人生 > 其它 >Linux0.11 paging

Linux0.11 paging

本文修改Linux0.11原始碼核心空間虛擬地址為0xC0000000,使用者虛擬空間為0x00000000-0xC0000000共3G的虛擬空間,已經在bochs和qemu測試通過,成功進入shell

參考了更高版本的Linux原始碼,

diff --git a/Makefile b/Makefile
index 38f9fff..ef2db9f 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ DRIVERS =kernel/blk_drv/blk_drv.a kernel/chr_drv/chr_drv.a
 MATH    =kernel/math/math.a
 LIBS    =lib/lib.a
 
-all: clean Image +all: Image Image: boot/bootsect boot/setup kernel.bin FORCE $(BUILD) boot/bootsect boot/setup kernel.bin Image $(Q)rm -f kernel.bin diff --git a/boot/head.s b/boot/head.s index d2cbe2b..26540f3 100644 --- a/boot/head.s +++ b/boot/head.s @@ -1,84 +1,55 @@ /* * linux/boot/head.s * - * (C) 1991 Linus Torvalds
+ * Copyright (C) 1991, 1992 Linus Torvalds */ +.code32 /* * head.s contains the 32-bit startup code. * * NOTE!!! Startup happens at absolute address 0x00000000, which is also where * the page directory will exist. The startup code will be overwritten by * the page directory. - * - * head被連結至system的開始位置,地址為0
- * */ .text -.globl idt,gdt,pg_dir,tmp_floppy_area -pg_dir: -.globl startup_32 - -/* - * 根據setup模組我們安裝了兩個段描述符,分別是程式碼段和資料段,共8MB的空間 - * 0x10的二進位制為10000, 我們可以看出DPL為0,T為0表示此描述符在全域性描述符中的第二個描述符,許可權為0最高許可權 - * 設定資料段為0x10,全域性描述第2個段 +.globl startup_32,idt,gdt,pg_dir,tmp_floppy_area,floppy_track_buffer +/* + * swapper_pg_dir is the main page directory, address 0x00000000 */ +pg_dir: startup_32: + cld movl $0x10,%eax mov %ax,%ds mov %ax,%es mov %ax,%fs mov %ax,%gs - /* - * 設定棧頂指標 - * - * 設定棧頂指標 - * - * stack_start定義在kernel/sched.c中 - * long user_stack[PAGE_SIZE >> 2]; - * struct { - *   long *a; - *   short b; - * } stack_start { &user_stack, 0x10} - * lss設定了棧頂指標和其棧的段地址,0x10表示資料段,地址為stack_start定義起大小為PAGE_SIZE>>2 = 1024位元組 - * - */ lss stack_start,%esp call setup_idt - call setup_gdt - - /* - * 重新設定資料段,因為在上面的程式中修改gdt,因此需要重新載入, - */ - movl $0x10,%eax # reload all the segment registers - mov %ax,%ds # after changing gdt. CS was already - mov %ax,%es # reloaded in 'setup_gdt' - mov %ax,%fs - mov %ax,%gs - lss stack_start,%esp - /* - * xorl為異或運算,相同為0,不同為1,此處的意思是清零暫存器 - */ + xorl %eax,%eax 1: incl %eax # check that A20 really IS enabled movl %eax,0x000000 # loop forever if it isn't cmpl %eax,0x100000 je 1b + + +/* + * NOTE! 486 should set bit 16, to check for write-protect in supervisor + * mode. Then it would be unnecessary with the "verify_area()"-calls. + * 486 users probably want to set the NE (#5) bit also, so as to use + * int 16 for math errors. + */ + + movl %cr0,%eax # check math chip + andl $0x80000011,%eax # Save PG,PE,ET + /* "orl $0x10020,%eax" here for 486 might be good */ + orl $2,%eax # set MP + movl %eax,%cr0 + call check_x87 + jmp after_page_tables -/* - * NOTE! 486 should set bit 16, to check for write-protect in supervisor - * mode. Then it would be unnecessary with the "verify_area()"-calls. - * 486 users probably want to set the NE (#5) bit also, so as to use - * int 16 for math errors. - */ - movl %cr0,%eax # check math chip - andl $0x80000011,%eax # Save PG,PE,ET -/* "orl $0x10020,%eax" here for 486 might be good */ - orl $2,%eax # set MP - movl %eax,%cr0 - call check_x87 - jmp after_page_tables /* * We depend on ET to be correct. This checks for 287/387. @@ -87,79 +58,40 @@ check_x87: fninit fstsw %ax cmpb $0,%al - je 1f /* no coprocessor: have to set bits */ - movl %cr0,%eax - xorl $6,%eax /* reset MP, set EM */ + je 1f + movl %cr0,%eax /* no coprocessor: have to set bits */ + xorl $6,%eax /* reset MP, set EM */ movl %eax,%cr0 ret .align 2 -1: .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ +1: .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ ret /* * setup_idt * * sets up a idt with 256 entries pointing to - * ignore_int, interrupt gates. It then loads - * idt. Everything that wants to install itself - * in the idt-table may do so themselves. Interrupts + * ignore_int, interrupt gates. It doesn't actually load + * idt - that can be done only after paging has been enabled + * and the kernel moved to 0xC0000000. Interrupts * are enabled elsewhere, when we can be relatively * sure everything is ok. This routine will be over- * written by the page tables. */ -/* - * - * setup_idt會將256箇中斷向量設定為ignore_int, lea執行為取有效地址指令 - * - */ setup_idt: - lea ignore_int,%edx /* address of edx */ - movl $0x00080000,%eax /* EAX is interrupt gat Blow 32bit */ - movw %dx,%ax /* selector = 0x0008 = cs */ - movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ - /* P = 1, DPL=00 D = 1, 32bits */ - /* type = 110 interrupt gate */ + lea ignore_int,%edx + movl $0x00080000,%eax + movw %dx,%ax /* selector = 0x0008 = cs */ + movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ - /*********************************************************************************************** - use EAX for interrupt description blow 32bit - Use EDX for interrupt descirpt high 32bit - EAX存放的是中斷描述符的低32位 - EAX存放的是中斷描述符的高32位 - 上述的程式碼是將中斷服務子程式處理成如下格式, CS為8表示二進位制為00001000表示段選擇子為1(程式碼段) - +----------+---+-----------+---+---+-----------+--------+--------------+-----------+----------+ - | H 16bits | P | DPL(2bit) | 0 | D | 3bit type | 3bit 0 | 5bit Reserve | 16bits CS | L 16bits | - +----------+---+-----------+---+---+-----------+--------+--------------+-----------+----------+ - | | 1 | 00 | 0 | 1 | 110 | 000 | | 8 | | - +----------+---+-----------+---+---+-----------+--------+--------------+-----------+----------+ - *************************************************************************************************/ - - lea idt,%edi /*將中斷描述符表的地址存放在edi暫存器中,一般作為目的地址暫存器*/ - mov $256,%ecx /*計數暫存器,一共256項*/ - - /* 如下程式碼將中斷描述符複製到edi暫存器的地址處,並迴圈256次 - * 使用lidt載入idt_descr的中斷描述符 - */ + lea idt,%edi + mov $256,%ecx rp_sidt: - movl %eax,(%edi) /* *edi == eax */ - movl %edx,4(%edi) /* *(edi + 4) = eds */ - addl $8,%edi /* edi = edi + 8, meas next interrupt select descript */ - dec %ecx /* ecs -- */ - jne rp_sidt /* if not 0 goto rp_sidt */ - lidt idt_descr /* load interrupt select descript */ - ret - -/* - * setup_gdt - * - * This routines sets up a new gdt and loads it. - * Only two entries are currently built, the same - * ones that were built in init.s. The routine - * is VERY complicated at two whole lines, so this - * rather long comment is certainly needed :-). - * This routine will beoverwritten by the page tables. - */ -setup_gdt: - lgdt gdt_descr + movl %eax,(%edi) + movl %edx,4(%edi) + addl $8,%edi + dec %ecx + jne rp_sidt ret /* @@ -180,6 +112,8 @@ pg2: pg3: .org 0x5000 + + /* * tmp_floppy_area is used by the floppy-driver when DMA cannot * reach to a buffer-block. It needs to be aligned, so that it isn't @@ -187,28 +121,41 @@ pg3: */ tmp_floppy_area: .fill 1024,1,0 - -/* - * 程式執行到此處時會呼叫了setup_paging在setup_paging返回時由於將main函式進行壓堆疊將執行main函式 - * L6的地址為main函式的返回地址 - * $1, $2, $3為main函式的引數 +/* + * floppy_track_buffer is used to buffer one track of floppy data: it + * has to be separate from the tmp_floppy area, as otherwise a single- + * sector read/write can mess it up. It can contain one full track of + * data (18*2*512 bytes). */ +floppy_track_buffer: + .fill 512*2*18,1,0 + after_page_tables: - pushl $1 # These are the parameters to main :-) - pushl $2 - pushl $3 - pushl $L6 # return address for main, if it decides to. - pushl $main - jmp setup_paging + call setup_paging + lgdt gdt_descr + lidt idt_descr + ljmp $0x08,$1f +1: movl $0x10,%eax # reload all the segment registers + mov %ax,%ds # after changing gdt. + mov %ax,%es + mov %ax,%fs + mov %ax,%gs + lss stack_start,%esp + pushl $0 # These are the parameters to main :-) + pushl $0 + pushl $0 + cld # gcc2 wants the direction flag cleared at all times + call main L6: jmp L6 # main should never return here, but - # just in case, we know what happens. + # just in case, we know what happens. /* This is the default interrupt "handler" :-) */ int_msg: .asciz "Unknown interrupt\n\r" .align 2 ignore_int: + cld pushl %eax pushl %ecx pushl %edx @@ -257,152 +204,64 @@ ignore_int: */ .align 2 setup_paging: - movl $1024*5,%ecx /* 5 pages - pg_dir+4 page tables */ - xorl %eax,%eax /* EAX is 0 */ - xorl %edi,%edi /* pg_dir is at 0x000, EDI is 0, start address*/ - cld;rep;stosl /* stosl *EDI = EAX */ - /********************************************************************** - 以上程式碼的意思是從0-1024*5的地址空間清零,由於此時0地址處的程式碼已經執行完畢沒有什麼用了 - 為什麼是1024*5的長度呢,我們在上面的程式碼中可以看到.org 0x1000 pg0: 從0x1000為第一個頁表一共建立了4個 - 在加上0地址到0x1000一共5個 - 問題1:分段是怎麼用的? - 答:執行發出邏輯地址後經分段部件形成線性地址,根據前面的分析我們支援邏輯地址等於線性地址 - 問題2:分頁是怎麼進行地址對映的? - 答:分頁機制將線性地址分成三個部分進行查表,高10位表示頁表目錄,中10位表示頁表項,低12位表示偏移,在定址時 - 根據高10位找到頁表目錄,頁表目錄存放了頁表的起始地址,根據中10位在頁面中找到對應的物理頁,在加上低12位的偏移形成實體地址 - 頁表目錄和頁表的格式如下(頁表目錄和頁表是4KB對齊的): - +----------------------------------------------+---------------------+ - |頁表目錄或者頁表實體地址的高位BIT(12-31) | 7 | - +----------------------------------------------+---------------------+ - 由於頁表目錄和頁表以4KB對齊,因此我們不用關心其低12位,低12位有其特殊的意義,具體可查詢intel的資料手冊 - - pg_dir表示頁表的地址,7表示為該頁存在使用者可讀可寫 - 由於目前系統的最大記憶體為16MB,因此只用到也變目錄的前4項(為什麼) - 一個頁表管理1024項4KB的地址空間既4MB,那麼16MB需要多少個頁表目錄呢,答案是4個 - ***********************************************************************/ - + movl $1024*5,%ecx /* 5 pages - swapper_pg_dir+4 page tables */ + xorl %eax,%eax + xorl %edi,%edi /* swapper_pg_dir is at 0x000 */ + cld;rep;stosl + + /* Identity-map the kernel in low 4MB memory for ease of transition */ movl $pg0+7,pg_dir /* set present bit/user r/w */ - movl $pg1+7,pg_dir+4 /* --------- " " --------- */ - movl $pg2+7,pg_dir+8 /* --------- " " --------- */ - movl $pg3+7,pg_dir+12 /* --------- " " --------- */ - - /* - 上面的程式碼執行完畢後記憶體如下 - --起始地址0x0000 - 0x00001 007 - 0x00002 007 - 0x00003 007 - 0x00004 007 - 一共四項頁表目錄 - */ - - movl $pg3+4092,%edi /* end of this page */ - movl $0xfff007,%eax /* 16Mb - 4096 + 7 (r/w user,p) */ - std /* derection -4 */ -1: stosl /* fill pages backwards - more efficient :-) *EDI=EAS */ + movl $pg1+7,pg_dir+4 /* set present bit/user r/w */ + movl $pg2+7,pg_dir+8 /* set present bit/user r/w */ + movl $pg3+7,pg_dir+12 /* set present bit/user r/w */ + + /* But the real place is at 0xC0000000 */ + movl $pg0+7,pg_dir+3072 /* set present bit/user r/w */ + movl $pg1+7,pg_dir+3076 /* --------- " " --------- */ + movl $pg2+7,pg_dir+3080 /* --------- " " --------- */ + movl $pg3+7,pg_dir+3084 /* --------- " " --------- */ + + movl $pg3+4092,%edi + movl $0xfff007,%eax /* 16Mb - 4096 + 7 (r/w user,p) */ + std +1: stosl /* fill pages backwards - more efficient :-) */ subl $0x1000,%eax jge 1b cld - /* - 以上的程式碼倒序從pg3的最後一項開始填充直到eax為0,這樣填充下來後如下圖 - 頁表目錄 - -------------------------0x0000 - 0x00001 007 - -------------------------0x0004 - 0x00002 007 - -------------------------0x0008 - 0x00003 007 - -------------------------0x000C - 0x00004 007 - ... - ... - ... -   pg0頁表0 - -------------------------0x1000 - 0x00000 007 - -------------------------0x1004 - 0x00001 007 - -------------------------0x1008 - ... - ... - ... -   pg1頁表1 - -------------------------0x2000 - 0x00400 007 - -------------------------0x2004 - 0x00401 007 - -------------------------0x2008 - ... - ... - ... -   pg2頁表2 - -------------------------0x3000 - 0x00800 007 - -------------------------0x3004 - 0x00801 007 - -------------------------0x3008 - ... - ... - ... -   pg3頁表3 - -------------------------0x4000 - 0x00c00 007 - -------------------------0x4004 - 0x00c01 007 - -------------------------0x4008 - ... - ... - ... - -------------------------0x4ff4 - 0x00ffd 007 - -------------------------0x4ff8 - 0x00ffe 007 - -------------------------0x4ffc - 0x00fff 007 - -------------------------0x5000 - 我們舉一個例子,如果要訪問0x00401555地址處的資料,我們知道系統需先經過段對映由於段對映後線性地址和邏輯地址一樣 - 因此訪問的地址還是0x00401555,段對映後需要進行頁對映,我們根據上面的問題2對地址進行分解 - 0x00401555的高10位為1尋找第一個頁表目錄值為0x00002 007,中10位為1在頁表1中的第一項值為0x00401 007,低12位為555, - 根據0x00401 007去掉007,再加上555,最終的地址為0x00401555,我們發現饒了一圈邏輯地址和實體地址一樣 - 這種對映方式成為平坦對映 - - 我們再來一個例子,地址0x00FFF111地址,我們根據規則將其分解 - 高10位為0000000011即3,第三項頁面目錄項其對應的頁面項為4,對應的地址為3*4=12, 0x0c - 中10位為1111111111即1023,每一個頁面佔4個地址即1023*4 = 4092 = 0xFFC - 低12位為111 - 如上面藍色字型的對映過程,其最終地址還是0x00fff111 - 我們可以看出頁管理後系統還是平坦對映,從0地址開始一共16MB - */ - - - xorl %eax,%eax /* pg_dir is at 0x0000 */ + xorl %eax,%eax /* swapper_pg_dir is at 0x0000 */ movl %eax,%cr3 /* cr3 - page directory start */ movl %cr0,%eax orl $0x80000000,%eax movl %eax,%cr0 /* set paging (PG) bit */ ret /* this also flushes prefetch-queue */ -.align 2 +/* + * The interrupt descriptor table has room for 256 idt's + */ +.align 4 .word 0 idt_descr: .word 256*8-1 # idt contains 256 entries - .long idt -.align 2 -.word 0 -gdt_descr: - .word 256*8-1 # so does gdt (not that that's any - .long gdt # magic number, but it works for me :^) + .long 0xc0000000+idt - .align 8 -idt: .fill 256,8,0 # idt is uninitialized +.align 4 +idt: + .fill 256,8,0 # idt is uninitialized /* - * 第二次的全域性描述符地址,這個地址是從0地址開始的, 一共16MB的空間 + * The real GDT is also 256 entries long - no real reason */ -gdt: - .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x00c09a0000000fff /* 16Mb */ - .quad 0x00c0920000000fff /* 16Mb */ +.align 4 +.word 0 +gdt_descr: + .word 256*8-1 + .long 0xc0000000+gdt + +.align 4 +gdt: + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0xc0c09a0000000fff /* 16Mb at 0xC0000000 */ + .quad 0xc0c0920000000fff /* 16Mb */ .quad 0x0000000000000000 /* TEMPORARY - don't use */ - .fill 252,8,0 /* space for LDT's and TSS's etc */ + .fill 252,8,0 /* space for LDT's and TSS */ diff --git a/fs/exec.c b/fs/exec.c index 0c4aac8..a3f3fc7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -191,32 +191,27 @@ static unsigned long copy_strings(int argc,char ** argv,unsigned long *page, return p; } +#define TASK_SIZE (0xC0000000) + static unsigned long change_ldt(unsigned long text_size,unsigned long * page) { unsigned long code_limit,data_limit,code_base,data_base; int i; - - /* code_limit不夠一頁則佔一頁 - * - */ - code_limit = text_size+PAGE_SIZE -1; - code_limit &= 0xFFFFF000; - data_limit = 0x4000000; - /* 獲取當前程式的基地址,並設定資料和程式碼的基地址一樣 - * - */ - code_base = get_base(current->ldt[1]); - data_base = code_base; - - /* 基地址和execv執行前一樣,但是limit變成了新的程式的大小 - * - */ + + code_limit = TASK_SIZE; + data_limit = TASK_SIZE; + code_base = data_base = 0; + current->start_code = code_base; +#ifdef K_DEBUG + printk("%s-%d code base %p code limit %p\n", __func__, __LINE__, code_base, code_limit); +#endif set_base(current->ldt[1],code_base); set_limit(current->ldt[1],code_limit); set_base(current->ldt[2],data_base); set_limit(current->ldt[2],data_limit); + -/* make sure fs points to the NEW data segment */ + /* make sure fs points to the NEW data segment */ __asm__("pushl $0x17\n\tpop %%fs"::); /* 指向64MB的最後一個位元組 @@ -225,15 +220,21 @@ static unsigned long change_ldt(unsigned long text_size,unsigned long * page) data_base += data_limit; for (i=MAX_ARG_PAGES-1 ; i>=0 ; i--) { + /* * data_base 64MB的地址第一個頁 */ data_base -= PAGE_SIZE; + /* 如果page[i]有效,也就是引數有效 * 則將此物理頁對映到data_base地址處 */ - if (page[i]) - put_page(page[i],data_base); + if (page[i]) { +#ifdef K_DEBUG + printk("%s-%d put_page %p to %p\n", __func__, __LINE__, page[i], data_base); +#endif + put_page(page[i], data_base); + } } /* * 返回64M @@ -402,7 +403,6 @@ restart_interp: goto exec_error2; } if (N_TXTOFF(ex) != BLOCK_SIZE) { - printk("%s: N_TXTOFF != BLOCK_SIZE. See a.out.h.", filename); retval = -ENOEXEC; goto exec_error2; } @@ -426,8 +426,7 @@ restart_interp: if ((current->close_on_exec>>i)&1) sys_close(i); current->close_on_exec = 0; - free_page_tables(get_base(current->ldt[1]),get_limit(0x0f)); - free_page_tables(get_base(current->ldt[2]),get_limit(0x17)); + clear_page_tables(current); if (last_task_used_math == current) last_task_used_math = NULL; current->used_math = 0; @@ -441,6 +440,10 @@ restart_interp: current->brk = ex.a_bss + (current->end_data = ex.a_data + (current->end_code = ex.a_text)); +#ifdef K_DEBUG + printk("%s-%d end_code is %x, end_data is %x ex.a_entry %x\n", + __func__, __LINE__, current->end_code, current->end_data, ex.a_entry); +#endif current->start_stack = p & 0xfffff000; current->euid = e_uid; current->egid = e_gid; diff --git a/include/asm/system.h b/include/asm/system.h index 9f843fd..c137f4e 100644 --- a/include/asm/system.h +++ b/include/asm/system.h @@ -213,7 +213,7 @@ __asm__ ("movw $104,%1\n\t" \ "movb $0x00,%5\n\t" \ "movb %%ah,%6\n\t" \ "rorl $16,%%eax" \ - ::"a" (addr), "m" (*(n)), "m" (*(n+2)), "m" (*(n+4)), \ + ::"a" (addr+0xc0000000), "m" (*(n)), "m" (*(n+2)), "m" (*(n+4)), \ "m" (*(n+5)), "m" (*(n+6)), "m" (*(n+7)) \ ) diff --git a/include/linux/mm.h b/include/linux/mm.h index 5a160f3..87c7abc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -5,6 +5,10 @@ extern unsigned long get_free_page(void); extern unsigned long put_page(unsigned long page,unsigned long address); +extern unsigned long put_dirty_page(unsigned long page,unsigned long address); extern void free_page(unsigned long addr); - +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif #endif + diff --git a/include/linux/sched.h b/include/linux/sched.h index 50ae3bc..90d07d5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -26,8 +26,10 @@ #define NULL ((void *) 0) #endif -extern int copy_page_tables(unsigned long from, unsigned long to, long size); -extern int free_page_tables(unsigned long from, unsigned long size); +extern int copy_page_tables(struct task_struct *tsk); +extern int free_page_tables(struct task_struct *tsk); +extern void clear_page_tables(struct task_struct * tsk); +extern void show_mem(void); extern void sched_init(void); extern void schedule(void); @@ -125,8 +127,8 @@ struct task_struct { /* filp */ {NULL,}, \ { \ {0,0}, \ -/* ldt */ {0x9f,0xc0fa00}, \ - {0x9f,0xc0f200}, \ +/* ldt */ {0xfff,0xc0c0fa00}, \ + {0xfff,0xc0c0f200} \ }, \ /*tss*/ {0,PAGE_SIZE+(long)&init_task,0x10,0,0,0,0,(long)&pg_dir,\ 0,0,0,0,0,0,0,0, \ @@ -181,7 +183,7 @@ __asm__("str %%ax\n\t" \ * This also clears the TS-flag if the task we switched to has used * tha math co-processor latest. */ -//#define CONFIG_TASK_TSS 1 +#define CONFIG_TASK_TSS 1 #ifdef CONFIG_TASK_TSS #define switch_to(n) {\ struct {long a,b;} __tmp; \ diff --git a/init/main.c b/init/main.c index cd0b564..f30b861 100644 --- a/init/main.c +++ b/init/main.c @@ -173,6 +173,7 @@ void main(int __a, int __b, int __c) /* This really IS void, no error here. */ printk("kernel fp init\n"); floppy_init(); printk("kernel move to user\n"); + show_mem(); /* * sti允許中斷 * @@ -262,43 +263,74 @@ static char * envp[] = { "HOME=/usr/root", NULL }; void init(void) { int pid,i; + int j = 0; setup((void *) &drive_info); (void) open("/dev/tty0",O_RDWR,0); (void) dup(0); (void) dup(0); - printf("init current pid is %d\n", getpid()); printf("%d buffers = %d bytes buffer space\n\r",NR_BUFFERS, NR_BUFFERS*BLOCK_SIZE); printf("Free mem: %d bytes\n\r",memory_end-main_memory_start); - + +#if 0 if (!(pid=fork())) { - printf("init fork current pid is %d\n", getpid()); + printf("%s-%d this is task %d\n", __func__, __LINE__, getpid()); close(0); if (open("/etc/rc",O_RDONLY,0)) _exit(1); execve("/bin/sh",argv_rc,envp_rc); _exit(2); } - if (pid>0) - while (pid != wait(&i)) + printf("%s-%d this is task %d\n", __func__, __LINE__, getpid()); + if (pid>0) { + while (pid != wait(&i)); + } +#endif +#if 0 + printf("%s-%d this is task %d\n", __func__, __LINE__, getpid()); + + pid = fork(); + if (pid < 0) { + printf("fork failed\n"); + return; + } - /* nothing */; + if (!pid) { + printf("this is a test pid %d\n", getpid()); + _exit(2); + } + + if (pid > 0) { + int j = wait(&i); + printf("jjjjjjjjjjjjjjjjjj is %d\n", j); + + } +#endif + /* nothing */; while (1) { - printf("start enter init while(1)\n"); if ((pid=fork())<0) { - printf("init while(1) pid is %d\n", getpid()); printf("Fork failed in init\r\n"); continue; } if (!pid) { - printf("while1 fork current pid is %d\n", getpid()); + printf("%s-%d this is task %d\n", __func__, __LINE__, getpid()); close(0);close(1);close(2); setsid(); (void) open("/dev/tty0",O_RDWR,0); (void) dup(0); (void) dup(0); - _exit(execve("/bin/sh",argv,envp)); + int t = execve("/bin/sh",argv,envp); + printf("dddddddddddddddddddd\n"); + printf("dddddddddddddddddddd\n"); + printf("dddddddddddddddddddd\n"); + printf("dddddddddddddddddddd\n"); + printf("dddddddddddddddddddd\n"); + printf("dddddddddddddddddddd\n"); + printf("dddddddddddddddddddd\n"); + printf("dddddddddddddddddddd\n"); + printf("dddddddddddddddddddd\n"); + _exit(t); } while (1) if (pid == wait(&i)) @@ -306,5 +338,7 @@ void init(void) printf("\n\rchild %d died with code %04x\n\r",pid,i); sync(); } + printf("DDDDDDDDDDDDDD\n"); _exit(0); /* NOTE! _exit, not exit() */ } + diff --git a/kernel/chr_drv/tty_io.c b/kernel/chr_drv/tty_io.c index ed14fa8..6d3a927 100644 --- a/kernel/chr_drv/tty_io.c +++ b/kernel/chr_drv/tty_io.c @@ -66,7 +66,7 @@ struct tty_struct tty_table[] = { {0, /* no translation */ 0, /* no translation */ B2400 | CS8, - 0, + ECHO, 0, INIT_C_CC}, 0, @@ -79,7 +79,7 @@ struct tty_struct tty_table[] = { {0, /* no translation */ 0, /* no translation */ B2400 | CS8, - 0, + ECHO, 0, INIT_C_CC}, 0, diff --git a/kernel/exit.c b/kernel/exit.c index b22de34..e3b009d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -102,8 +102,8 @@ static void tell_father(int pid) int do_exit(long code) { int i; - free_page_tables(get_base(current->ldt[1]),get_limit(0x0f)); - free_page_tables(get_base(current->ldt[2]),get_limit(0x17)); + + free_page_tables(current); for (i=0 ; i<NR_TASKS ; i++) if (task[i] && task[i]->father == current->pid) { task[i]->father = 1; @@ -128,7 +128,13 @@ int do_exit(long code) kill_session(); current->state = TASK_ZOMBIE; current->exit_code = code; +#ifdef K_DEBUG + printk("%s-%d state is %d code is %d current %d father %d start\n", __func__, __LINE__, current->state, code, current->pid, current->father); +#endif tell_father(current->father); +#ifdef K_DEBUG + printk("%s-%d state is %d code is %d current %d father %d end\n", __func__, __LINE__, current->state, code, current->pid, current->father); +#endif schedule(); return (-1); /* just to suppress warnings */ } @@ -142,7 +148,9 @@ int sys_waitpid(pid_t pid,unsigned long * stat_addr, int options) { int flag, code; struct task_struct ** p; - +#ifdef K_DEBUG + printk("%s-%d syswaitpid %d\n", __func__, __LINE__, current->pid); +#endif verify_area(stat_addr,4); repeat: flag=0; @@ -185,8 +193,9 @@ repeat: return 0; current->state=TASK_INTERRUPTIBLE; schedule(); - if (!(current->signal &= ~(1<<(SIGCHLD-1)))) + if (!(current->signal &= ~(1<<(SIGCHLD-1)))) { goto repeat; + } else return -EINTR; } diff --git a/kernel/fork.c b/kernel/fork.c index 25e3207..bf653d7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -42,40 +42,27 @@ int copy_mem(int nr,struct task_struct * p) unsigned long old_data_base,new_data_base,data_limit; unsigned long old_code_base,new_code_base,code_limit; - /* - * ¶Áȡϵͳµ±ǰµľֲ¿èʶ·ûµĴú«¶γ¤¶Ⱥ͊ý¾ݶ㤶Ȋ- */ - code_limit=get_limit(0x0f); - data_limit=get_limit(0x17); - - /* - * ¶Áȡµ±ǰ½ø³̵ľֲ¿èʶ·ûµĴú«¶γ¤¶Ⱥ͊ý¾ݶ㤶Ȋ- */ + code_limit = get_limit(0x0f); + data_limit = get_limit(0x17); old_code_base = get_base(current->ldt[1]); old_data_base = get_base(current->ldt[2]); - /* - * ΒÇ֪µÀ´ú«¶κ͊ý¾ݶεĻùµؖ·ʇһѹµĊ- * - */ - if (old_data_base != old_code_base) + if (old_data_base != old_code_base) { + printk("ldt[0]: %08x %08x\n",current->ldt[0].a,current->ldt[0].b); + printk("ldt[1]: %08x %08x\n",current->ldt[1].a,current->ldt[1].b); + printk("ldt[2]: %08x %08x\n",current->ldt[2].a,current->ldt[2].b); panic("We don't support separate I&D"); - /* - * ȧ¹ûʽ¾ݶεij¤¶Ȑ¡Ӛ´ú«¶εij¤¶Ȓ²²»А - */ + } if (data_limit < code_limit) panic("Bad data_limit"); - new_data_base = new_code_base = nr * 0x4000000; + new_data_base = old_data_base; + new_code_base = old_code_base; p->start_code = new_code_base; set_base(p->ldt[1],new_code_base); set_base(p->ldt[2],new_data_base); - if (copy_page_tables(old_data_base,new_data_base,data_limit)) { - printk("free_page_tables: from copy_mem\n"); - free_page_tables(new_data_base,data_limit); - return -ENOMEM; - } - return 0; + return copy_page_tables(p); } + extern void first_return_from_kernel(void); /* @@ -100,6 +87,8 @@ int copy_process(int nr,long ebp,long edi,long esi,long gs,long none, return -EAGAIN; task[nr] = p; + + // NOTE!: the following statement now work with gcc 4.3.2 now, and you // must compile _THIS_ memcpy without no -O of gcc.#ifndef GCC4_3 /* ȧ¹ûʹӃÁˠmemcpy º¯ʽ£¬ҲΪtask_structʇ¸öÁªºό廡¿½±´¶ѕ»ʽ¾݊@@ -169,6 +158,7 @@ int copy_process(int nr,long ebp,long edi,long esi,long gs,long none, #endif + if (last_task_used_math == current) __asm__("clts ; fnsave %0"::"m" (p->tss.i387)); if (copy_mem(nr,p)) { @@ -185,9 +175,13 @@ int copy_process(int nr,long ebp,long edi,long esi,long gs,long none, current->root->i_count++; if (current->executable) current->executable->i_count++; + set_tss_desc(gdt+(nr<<1)+FIRST_TSS_ENTRY,&(p->tss)); set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,&(p->ldt)); p->state = TASK_RUNNING; /* do this last, just in case */ +#ifdef K_DEBUG + printk("%s-%d set task %d state to running father %d\n", __func__, __LINE__, p->pid, p->father); +#endif return last_pid; } diff --git a/kernel/printk.c b/kernel/printk.c index fe84b2b..2a60a9b 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -34,7 +34,7 @@ int printk(const char *fmt, ...) { va_list args; int i; - int j; + int j = 0; va_start(args, fmt); j=timestamp("[%010d] ", jiffies); diff --git a/kernel/sched.c b/kernel/sched.c index 68cc50c..c1654c6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -143,9 +143,29 @@ void schedule(void) (*p)->counter = ((*p)->counter >> 1) + (*p)->priority; } + #ifndef CONFIG_TASK_TSS switch_to_by_stack((long)pnext, (long)(_LDT(next))); #else +#if 0 + printk("%s switch to %d\n", __func__, next); + if (next == 1) { + unsigned long dir = task[next]->tss.cr3; + unsigned long *page_dir; + unsigned long page_table; + printk("task1 cr3 is %p\n", dir); + page_dir = (unsigned long *) dir; + for (i = 0 ; i < 1024 ; i++,page_dir++) { + page_table = *page_dir; + if (!page_table) + continue; + if (!(1 & page_table)) { + continue; + } + printk("xxxxxxxxxxx page_table reserve %p\n", page_table); + } + } +#endif switch_to(next); #endif } diff --git a/kernel/traps.c b/kernel/traps.c index ce10949..25830d9 100644 --- a/kernel/traps.c +++ b/kernel/traps.c @@ -36,6 +36,22 @@ register unsigned short __res; \ __asm__("mov %%fs,%%ax":"=a" (__res):); \ __res;}) +#define _ss() ({ \ + register unsigned short __res; \ + __asm__("mov %%ss,%%ax":"=a" (__res):); \ + __res;}) + +#define _cs() ({ \ + register unsigned short __res; \ + __asm__("mov %%cs,%%ax":"=a" (__res):); \ + __res;}) + +#define _ds() ({ \ + register unsigned short __res; \ + __asm__("mov %%ds,%%ax":"=a" (__res):); \ + __res;}) + + int do_exit(long code); void page_exception(void); @@ -65,11 +81,16 @@ static void die(char * str,long esp_ptr,long nr) long * esp = (long *) esp_ptr; int i; + printk("esp is %p %p %p %p %p %p\n", esp, esp[0], esp[1], esp[2], esp[3], esp[4]); printk("%s: %04x\n\r",str,nr&0xffff); printk("EIP:\t%04x:%p\nEFLAGS:\t%p\nESP:\t%04x:%p\n", esp[1],esp[0],esp[2],esp[4],esp[3]); printk("fs: %04x\n",_fs()); - printk("base: %p, limit: %p\n",get_base(current->ldt[1]),get_limit(0x17)); + printk("ss: %04x\n",_ss()); + printk("cs: %04x\n",_cs()); + printk("ds: %04x\n",_ds()); + printk("base: %p, limit: %p\n",get_base(current->ldt[1]),get_limit(0x0f)); + printk("data: %p, limit: %p\n",get_base(current->ldt[2]),get_limit(0x17)); if (esp[4] == 0x17) { printk("Stack: "); for (i=0;i<4;i++) diff --git a/mm/memory.c b/mm/memory.c index c6c6ccb..e451c32 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -35,17 +35,22 @@ static inline void oom(void) } #define invalidate() \ -__asm__("movl %%eax,%%cr3"::"a" (0)) +__asm__ __volatile__("movl %%cr3,%%eax\n\tmovl %%eax,%%cr3":::"ax") + /* these are not to be changed without changing head.s etc */ -#define LOW_MEM 0x100000 -#define PAGING_MEMORY (15*1024*1024) + +#define PAGING_MEMORY (16*1024*1024) #define PAGING_PAGES (PAGING_MEMORY>>12) -#define MAP_NR(addr) (((addr)-LOW_MEM)>>12) -#define USED 100 +#define MAP_NR(addr) (((unsigned long)(addr))>>12) +#define USED (1<<7) +#define PAGE_DIRTY 0x40 +#define PAGE_ACCESSED 0x20 +#define PAGE_USER 0x04 +#define PAGE_RW 0x02 +#define PAGE_PRESENT 0x01 +#define PAGE_SHIFT 12 -#define CODE_SPACE(addr) ((((addr)+4095)&~4095) < \ -current->start_code + current->end_code) static long HIGH_MEMORY = 0; static long total_pages = 0; @@ -84,7 +89,7 @@ __asm__("std ; repne ; scasb\n\t" " movl %%edx,%%eax\n" "1: cld" :"=a" (__res) - :"0" (0),"i" (LOW_MEM),"c" (PAGING_PAGES), + :"0" (0),"i" (0),"c" (PAGING_PAGES), "D" (mem_map+PAGING_PAGES-1) ); return __res; @@ -99,335 +104,238 @@ return __res; */ void free_page(unsigned long addr) { - if (addr < LOW_MEM) { - return; - } + int i = 0; + if (addr >= HIGH_MEMORY) { panic("trying to free nonexistent page"); } - /* - * 實體記憶體減去低端記憶體然後除以4KB得到物理頁號, - * 獲取頁時mem_map[addr]設定為1,因此釋放時mem_map[addr]--為0,從而釋放頁 - * 這裡多了一個判斷是因為此頁可能被共享,如果是共享的頁只是減少計數 - * 當計數為0才真正的釋放頁 - * - */ - addr -= LOW_MEM; - addr >>= 12; - if (mem_map[addr]--) { + + i = MAP_NR(addr); + + if (mem_map[i] & USED) { + printk("system reserve mem, ignore free\n"); return; } - mem_map[addr] = 0; - panic("trying to free free page"); + + if (!mem_map[i]) { + panic("trying to free free page"); + } + + mem_map[i]--; + return; } -/* - * This function frees a continuos block of page tables, as needed - * by 'exit()'. As does copy_page_tables(), this handles only 4Mb blocks. - * - * 釋放頁表連續的記憶體塊 - * 根據線性地址和長度,釋放對應記憶體頁表所制定的記憶體塊並設定表項空閒 - * 該函式處理4MB的記憶體塊 - * - */ -int free_page_tables(unsigned long from,unsigned long size) +static void free_one_table(unsigned long * page_dir) { - unsigned long *pg_table; - unsigned long * dir, nr; - /* - * 釋放的記憶體塊地址需要以4MB為邊界 - * - */ - if (from & 0x3fffff) { - panic("free_page_tables called with wrong alignment"); - } - /* - * 0地址為核心和核心緩衝區,不能釋放 - * - */ - if (!from) { - panic("Trying to free up swapper memory space"); + int j; + unsigned long pg_table = *page_dir; + unsigned long * page_table; + + if (!pg_table) + return; + + if (pg_table >= HIGH_MEMORY|| !(pg_table & 1)) { + printk("Bad page table: [%08x]=%08x\n",page_dir,pg_table); + *page_dir = 0; + return; } - /* - * 計算所佔頁表目錄項數, - * 一個頁面目錄項大小為4MB, 一共1024個頁表目錄項,共4GB空間 - * - * size + 0x3fffff表示將size擴大到4MB然後對齊 - * 經過這樣的計算size表示的是需要釋放的頁表目錄數量 - * - */ - size = (size + 0x3fffff) >> 22; - - /* - * 計算from所在的頁面目錄項, - * 目錄項索引為form >> 22, - * 由於每項佔用4個位元組,並且頁目錄是從0地址開始, - * 因此實際的目錄項地址為(from >> 22) << 2即from >> 20 - * - */ - dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */ - - /* - * 主迴圈是頁表目錄項 - * size表示有多少個頁表目錄項 - * dir表示頁表目錄項的地址地址,每加一次就是下一個頁表目錄項 - * - */ - for ( ; size-- > 0; dir++) { - /* - * 如果該頁表目錄無效繼續,頁表目錄的低12位有特殊的意義,在計算 - * 頁表時低12位可全部看作0,因為頁表目錄,頁表,頁都要求4KB對齊 - * - */ - if (!(1 & *dir)) { + *page_dir = 0; + if (mem_map[MAP_NR(pg_table)] & USED) { + return; + } + + page_table = (unsigned long *) (pg_table & 0xfffff000); + for (j = 0 ; j < 1024 ; j++,page_table++) { + unsigned long pg = *page_table; + + if (!pg) continue; - } - /* - * 取頁表的實體地址 - * - */ - pg_table = (unsigned long *) (0xfffff000 & *dir); - - /* - * 每一個頁表有1024個頁,此處是迴圈釋放1024個頁 - * - */ - for (nr=0; nr<1024; nr++) { - /* - * 如果此頁有效,則釋放此頁 - * 0xfffff000 & *pg_table 表示頁的實體地址 - * - * - */ - if (1 & *pg_table) { - free_page(0xfffff000 & *pg_table); - } - /* - * 此頁表項清零,指向下一個頁表項 - * - */ - *pg_table = 0; - pg_table++; - } - /* 由於一個頁表也佔用了一個頁, - * 當頁表裡面的頁釋放完成後,釋放此頁表 - * 0xfffff000 & *dir 表示頁表的實體地址 - * - * - */ - free_page(0xfffff000 & *dir); - *dir = 0; + *page_table = 0; + if (1 & pg) + free_page(0xfffff000 & pg); + } + free_page(0xfffff000 & pg_table); +} + +void clear_page_tables(struct task_struct * tsk) +{ + int i; + unsigned long * page_dir; + unsigned long tmp; + + if (!tsk) + return; + if (tsk == task[0]) + panic("task[0] (swapper) doesn't support exec() yet\n"); + + + page_dir = (unsigned long *) tsk->tss.cr3; + if (!page_dir) { + printk("Trying to clear kernel page-directory: not good\n"); + return; } + for (i = 0 ; i < 768 ; i++,page_dir++) + free_one_table(page_dir); + + //tmp = (unsigned long)(page_dir + 768); invalidate(); - return 0; + return; } + /* - * Well, here is one of the most complicated functions in mm. It - * copies a range of linerar addresses by copying only the pages. - * Let's hope this is bug-free, 'cause this one I don't want to debug :-) - * - * Note! We don't copy just any chunks of memory - addresses have to - * be divisible by 4Mb (one page-directory entry), as this makes the - * function easier. It's used only by fork anyway. - * - * NOTE 2!! When from==0 we are copying kernel space for the first - * fork(). Then we DONT want to copy a full page-directory entry, as - * that would lead to some serious memory waste - we just copy the - * first 160 pages - 640kB. Even that is more than we need, but it - * doesn't take any more memory - we don't copy-on-write in the low - * 1 Mb-range, so the pages can be shared with the kernel. Thus the - * special case for nr=xxxx. + * This function frees up all page tables of a process when it exits. */ -int copy_page_tables(unsigned long from, unsigned long to, long size) +int free_page_tables(struct task_struct * tsk) { - unsigned long * from_page_table; - unsigned long * to_page_table; - unsigned long this_page; - unsigned long * from_dir, * to_dir; - unsigned long nr; + int i; + unsigned long pg_dir; + unsigned long * page_dir; - /* - * 源地址和目的地址都是要在4M的邊界上 - * - */ - if ((from&0x3fffff) || (to&0x3fffff)) { - panic("copy_page_tables called with wrong alignment"); + if (!tsk) + return 1; + if (tsk == task[0]) { + printk("task[0] (swapper) killed: unable to recover\n"); + panic("Trying to free up swapper memory space"); } + pg_dir = tsk->tss.cr3; + if (!pg_dir) { + printk("Trying to free kernel page-directory: not good\n"); + return 1; + } + tsk->tss.cr3 = (unsigned long) 0; + if (tsk == current) + __asm__ __volatile__("movl %0,%%cr3"::"a" (tsk->tss.cr3)); + page_dir = (unsigned long *) pg_dir; + for (i = 0 ; i < 1024 ; i++,page_dir++) + free_one_table(page_dir); + free_page(pg_dir); + invalidate(); + return 0; +} - /* - * 獲取源頁表目錄項和目的頁表目錄項的地址指標,以及大小size - * 如果看不懂,請看free_page_tables函式的說明 - * - * - */ - from_dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */ - to_dir = (unsigned long *) ((to>>20) & 0xffc); - size = ((unsigned) (size+0x3fffff)) >> 22; - - for( ; size-->0 ; from_dir++,to_dir++) { - /* - * 如果目的目錄項的頁表已經存在,則出錯 - * - */ - if (1 & *to_dir) { - panic("copy_page_tables: already exist"); - } - /* - * 如果源目錄項的頁表不存在,則忽略 - */ - if (!(1 & *from_dir)) { + +/* + * copy_page_tables() just copies the whole process memory range: + * note the special handling of RESERVED (ie kernel) pages, which + * means that they are always shared by all processes. + */ +int copy_page_tables(struct task_struct * tsk) +{ + int i; + int c = 0; + unsigned long old_pg_dir, *old_page_dir; + unsigned long new_pg_dir, *new_page_dir; + + old_pg_dir = current->tss.cr3; + new_pg_dir = get_free_page(); + if (!new_pg_dir) + return -1; + c++; +#ifdef K_DEBUG + printk("old_page_dir %p nr %d old pid %d\n", old_pg_dir, mem_map[MAP_NR(old_pg_dir)], current->pid); + printk("new_page_dir %p nr %d new pid %d\n", new_pg_dir, mem_map[MAP_NR(new_pg_dir)], tsk->pid); +#endif + tsk->tss.cr3 = new_pg_dir; + old_page_dir = (unsigned long *) old_pg_dir; + new_page_dir = (unsigned long *) new_pg_dir; + for (i = 0 ; i < 1024 ; i++,old_page_dir++,new_page_dir++) { + int j; + unsigned long old_pg_table, *old_page_table; + unsigned long new_pg_table, *new_page_table; + + old_pg_table = *old_page_dir; + if (!old_pg_table) + continue; + if (old_pg_table >= HIGH_MEMORY || !(1 & old_pg_table)) { + printk("copy_page_tables: bad page table: " + "probable memory corruption %d %p\n", i, old_pg_table); + *old_page_dir = 0; continue; } - - /* - * 獲取源頁表地址 - */ - from_page_table = (unsigned long *) (0xfffff000 & *from_dir); - /* - * 獲取一個空閒頁面作為目的頁表 - */ - if (!(to_page_table = (unsigned long *) get_free_page())) { - return -1; /* Out of memory, see freeing */ + if (mem_map[MAP_NR(old_pg_table)] & USED) { +#ifdef K_DEBUG + printk("%s-%d i %d new_page_dir is %p old_pg_table is %p kernel reserve\n", __func__, __LINE__, i, new_page_dir, old_pg_table); +#endif + *new_page_dir = old_pg_table; + continue; } - - /* 設定目的頁表目錄的地址為新申請的物理頁表地址並設定屬性 - * 設定頁表,並將此也變的後3位設定為111,表示(usr, RW, preset) - * - */ - *to_dir = ((unsigned long) to_page_table) | 7; - - /* from == 0表示是核心空間的頁表目錄項的起始地址 - * 如果是核心則只需拷貝160個頁,640KB的空間 - * 640KB也是定義INIT_TASK的資料段和程式碼段長度 - * 如果是其他也就是非任務0,則拷貝1024個頁,共4MB空間 - * - */ - //nr = (from == 0) ? 0xA0 :1 024; - if (from) { - nr = 1024; - } else { - nr = 160; +#ifdef K_DEBUG + printk("%s-%d i %d new_page_dir is %p old_pg_table is %p\n", __func__, __LINE__, i, new_page_dir, old_pg_table); +#endif + new_pg_table = get_free_page(); + if (!new_pg_table) { + free_page_tables(tsk); + return -1; } - - for ( ; nr-- > 0; from_page_table++, to_page_table++) { - /* - * 將當前源頁表項存到臨時變數this_page - * - */ - this_page = *from_page_table; - /* - * 如果當前的頁表項不存在則不用拷貝 - */ - if (!(1 & this_page)) { + c++; + *new_page_dir = new_pg_table | PAGE_ACCESSED | 7; + old_page_table = (unsigned long *) (0xfffff000 & old_pg_table); + new_page_table = (unsigned long *) (0xfffff000 & new_pg_table); + for (j = 0 ; j < 1024 ; j++,old_page_table++,new_page_table++) { + unsigned long pg; + pg = *old_page_table; + if (!pg) + continue; + if (!(pg & PAGE_PRESENT)) { continue; } - /* - * 設定目的頁表為只讀,我們看到目的頁表為只讀 - * - */ - this_page &= ~2; - /* - * 將臨時頁表項內容賦值給目的頁表項 - * 通過此程式碼我們看到, - * 系統只是做了頁表的設定並沒有實現真正的資料拷貝 - * - */ - *to_page_table = this_page; - /* - * 對於是1MB以下的記憶體,說明是核心頁面因此不需要對mem_map進行設定 - * - * 如果程式碼是在任務0中建立任務1,則下面的程式碼不會用到 - * 只有當呼叫者的程式碼處於主記憶體中(大於LOW_MEN)1MB時才會執行 - * 這種情況需要在程序呼叫了execve()裝載並執行了新程式碼才會出現 - * - * - */ - if (this_page > LOW_MEM) { - /* - * 下面的內容是使其源頁表項也可讀,這樣哪個程序先寫會觸發 - * 缺頁異常從而分配頁進行使用 - * - * - */ - *from_page_table = this_page; - this_page -= LOW_MEM; - this_page >>= 12; - mem_map[this_page]++; - } + pg &= ~2; + *new_page_table = pg; + if (mem_map[MAP_NR(pg)] & USED) + continue; + *old_page_table = pg; + mem_map[MAP_NR(pg)]++; } } invalidate(); +#ifdef K_DEBUG + printk("%s-%d get free pages number %d\n", __func__, __LINE__, c); +#endif return 0; } -/* - * This function puts a page in memory at the wanted address. - * It returns the physical address of the page gotten, 0 if - * out of memory (either when trying to access page-table or - * page.) - * - * 下面的程式碼將一個物理頁放在制定的address處,返回實體地址 - */ -unsigned long put_page(unsigned long page,unsigned long address) +unsigned long put_page(unsigned long page, unsigned long address) { unsigned long tmp, *page_table; + struct task_struct *tsk = current; - /* NOTE !!! This uses the fact that _pg_dir=0 */ +/* NOTE !!! This uses the fact that _pg_dir=0 */ - /* chenwg - * 如果頁小於1M或者大於HIGH_MEMORY則告警 - * 如果所在的頁的對映沒有置位則告警 - * - */ - if (page < LOW_MEM || page >= HIGH_MEMORY) { - printk("Trying to put page %p at %p\n",page,address); - } - if (mem_map[(page-LOW_MEM)>>12] != 1) { + if (page >= HIGH_MEMORY) + printk("put_dirty_page: trying to put page %p at %p\n",page,address); + + if (mem_map[MAP_NR(page)] != 1) printk("mem_map disagrees with %p at %p\n",page,address); - } + + page_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc)); +#ifdef K_DEBUG + printk("%s-%d page_table is %p page is %p address %p pid is %d\n", __func__, __LINE__, page_table, page, address, current->pid); +#endif - /* chenwg - * 根據地址計算頁表目錄項指標,在核心態中,頁表目錄從0地址開始,我們知道 - * 在啟用了CPU的頁式記憶體管理後,虛擬地址右移22位得到頁表目錄的索引號 - * 又因為每個頁表目錄佔用4個位元組因此將索引左移2位得到頁表目錄的地址, - * 頁表目錄的地址存放是頁表的地址,每個頁表要求4KB對其,每個頁也要求4KB對齊 - * 因此不管是頁表目錄存放的頁表的地址還是頁表裡存放頁的地址都低12位無效 - * 所有的計算都已頁表在0地址存放為基礎 - * - */ - page_table = (unsigned long *) ((address>>20) & 0xffc); - - /* chenwg - * *page_table獲取頁表的地址,其bit0表示該頁表是否有效 - * 如果有效獲取頁表地址 - * 否則獲取一個頁作為頁表 - * - */ - if ((*page_table)&1) { + if ((*page_table)&1) page_table = (unsigned long *) (0xfffff000 & *page_table); - } else { - if (!(tmp=get_free_page())) { + else { + if (!(tmp=get_free_page())) return 0; - } - *page_table = tmp|7; + *page_table = tmp | PAGE_ACCESSED |7; page_table = (unsigned long *) tmp; } - /* chenwg - * address>>12表示這個虛擬地址在頁表中的偏移 - * 在頁表的制定位置寫上實體地址並設定此頁有效 - * - */ - page_table[(address>>12) & 0x3ff] = page | 7; - /* - * 頁表發生變化了,不明白為什麼不用重新整理 - */ - /* no need for invalidate */ + page_table += (address >> PAGE_SHIFT) & 0x3ff; + if (*page_table) { + printk("put_dirty_page: page already exists\n"); + *page_table = 0; + invalidate(); + } + *page_table = page | (PAGE_DIRTY | PAGE_ACCESSED | 7); +/* no need for invalidate */ return page; } + + /* * table_entry頁表項指標 */ @@ -436,19 +344,26 @@ void un_wp_page(unsigned long * table_entry) unsigned long old_page,new_page; - /* 獲取此頁對應的實體地址 + /* 獲取此頁對應的實體地址,如果原頁面不是保留並且其值為1表示沒有共享,設定寫標記 * */ old_page = 0xfffff000 & *table_entry; - if (old_page >= LOW_MEM && mem_map[MAP_NR(old_page)]==1) { + + if (!(mem_map[MAP_NR(old_page)] & USED) && mem_map[MAP_NR(old_page)]==1) { *table_entry |= 2; invalidate(); return; } if (!(new_page=get_free_page())) oom(); - if (old_page >= LOW_MEM) + + /* + * 如果原頁面不是保留且不為1,表示已經被共享,給其值減一,設定新的頁表 + * + */ + if (!(mem_map[MAP_NR(old_page)] & USED)) mem_map[MAP_NR(old_page)]--; + *table_entry = new_page | 7; invalidate(); copy_page(old_page,new_page); @@ -468,18 +383,47 @@ void do_wp_page(unsigned long error_code,unsigned long address) /* stupid, stupid. I really want the libc.a from GNU */ if (CODE_SPACE(address)) do_exit(SIGSEGV); + un_wp_page((unsigned long *)address); + (((address>>10) & 0xffc) + (0xfffff000 & + *((unsigned long *) ((address>>20) &0xffc))))); #endif + + unsigned long* dir_base = (unsigned long *)current->tss.cr3; + unsigned long* dir_item = dir_base + (address >> 22); un_wp_page((unsigned long *) - (((address>>10) & 0xffc) + (0xfffff000 & - *((unsigned long *) ((address>>20) &0xffc))))); + (((address>>10) & 0xffc) + (0xfffff000 & *dir_item))); +#if 0 + unsigned long address; + + printk("%s address is %p start is %p pid %d\n", __func__, __address, current->start_code, current->pid); + /* + * 頁表地址 + */ + address = current->tss.cr3 + ((__address>>20) & 0xffc); + /* + * 頁表地址 + */ + address = address & 0xfffff000; + /* + * 頁在頁表的地址 + */ + address = address + (__address>>10) & 0xffc; + /* + * 頁的實體地址 + */ + address = *(unsigned long *)(address); + un_wp_page((unsigned long *)address); +#endif } + void write_verify(unsigned long address) { unsigned long page; - if (!( (page = *((unsigned long *) ((address>>20) & 0xffc)) )&1)) + page = *(unsigned long *) (current->tss.cr3 + ((address>>20) & 0xffc)); + if (!(page & PAGE_PRESENT)) return; page &= 0xfffff000; page += ((address>>10) & 0xffc); @@ -508,34 +452,37 @@ void get_empty_page(unsigned long address) */ static int try_to_share(unsigned long address, struct task_struct * p) { + struct task_struct *tsk = current; + unsigned long from; unsigned long to; unsigned long from_page; unsigned long to_page; unsigned long phys_addr; - from_page = to_page = ((address>>20) & 0xffc); - from_page += ((p->start_code>>20) & 0xffc); - to_page += ((current->start_code>>20) & 0xffc); -/* is there a page-directory at from? */ + from_page = p->tss.cr3 + ((address>>20) & 0xffc); + to_page = tsk->tss.cr3 + ((address>>20) & 0xffc); + /* is there a page-directory at from? */ from = *(unsigned long *) from_page; if (!(from & 1)) return 0; from &= 0xfffff000; from_page = from + ((address>>10) & 0xffc); phys_addr = *(unsigned long *) from_page; -/* is the page clean and present? */ + /* is the page clean and present? */ if ((phys_addr & 0x41) != 0x01) return 0; phys_addr &= 0xfffff000; - if (phys_addr >= HIGH_MEMORY || phys_addr < LOW_MEM) + if (phys_addr >= HIGH_MEMORY) + return 0; + if (mem_map[MAP_NR(phys_addr)] & USED) return 0; to = *(unsigned long *) to_page; if (!(to & 1)) { - if ((to = get_free_page())) - *(unsigned long *) to_page = to | 7; - else - oom(); + to = get_free_page(); + if (!to) + return 0; + *(unsigned long *) to_page = to | PAGE_ACCESSED | 7; } to &= 0xfffff000; to_page = to + ((address>>10) & 0xffc); @@ -545,10 +492,10 @@ static int try_to_share(unsigned long address, struct task_struct * p) *(unsigned long *) from_page &= ~2; *(unsigned long *) to_page = *(unsigned long *) from_page; invalidate(); - phys_addr -= LOW_MEM; - phys_addr >>= 12; + phys_addr >>= PAGE_SHIFT; mem_map[phys_addr]++; return 1; + } /* @@ -567,6 +514,7 @@ static int share_page(unsigned long address) return 0; if (current->executable->i_count < 2) return 0; + for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) { if (!*p) continue; @@ -587,8 +535,31 @@ void do_no_page(unsigned long error_code,unsigned long address) unsigned long page; int block,i; + address &= 0xfffff000; tmp = address - current->start_code; + +#ifdef K_DEBUG + printk("%s address is %p start is %p pid %d\n", __func__, address, current->start_code, current->pid); + if (current->pid == 1) { + unsigned long dir = current->tss.cr3; + unsigned long *page_dir; + unsigned long page_table; + + page_table = (dir + ((address>>20) & 0xffc)); + page_dir = (unsigned long *) dir; + for (i = 0 ; i < 1024 ; i++,page_dir++) { + page_table = *page_dir; + if (!page_table) + continue; + if (!(1 & page_table)) { + continue; + } + printk("%s xxxxxxxxxxx page_table reserve %p\n", __func__, page_table); + printk("%s xxxxxxxxxxx page reserve %p\n", __func__, (*(unsigned long*)(page_table&0xfffff000))); + } + } +#endif if (!current->executable || tmp >= current->end_data) { get_empty_page(address); return; @@ -597,7 +568,8 @@ void do_no_page(unsigned long error_code,unsigned long address) return; if (!(page = get_free_page())) oom(); -/* remember that 1 block is used for header */ + + /* remember that 1 block is used for header */ block = 1 + tmp/BLOCK_SIZE; for (i=0 ; i<4 ; block++,i++) nr[i] = bmap(current->executable,block); @@ -609,11 +581,12 @@ void do_no_page(unsigned long error_code,unsigned long address) *(char *)tmp = 0; } if (put_page(page,address)) - return; + return; free_page(page); oom(); } + long get_total_pages(void) { return total_pages; @@ -659,21 +632,26 @@ void mem_init(long start_mem, long end_mem) mem_map[i++]=0; } -void calc_mem(void) +void show_mem(void) { - int i,j,k,free=0; - long * pg_tbl; - - for(i=0 ; i<PAGING_PAGES ; i++) - if (!mem_map[i]) free++; - printk("%d pages free (of %d)\n\r", free, PAGING_PAGES); - for(i=2 ; i<1024 ; i++) { - if (1&pg_dir[i]) { - pg_tbl=(long *) (0xfffff000 & pg_dir[i]); - for(j=k=0 ; j<1024 ; j++) - if (pg_tbl[j]&1) - k++; - printk("Pg-dir[%d] uses %d pages\n",i,k); - } + int i,free = 0,total = 0,reserved = 0; + int shared = 0; + + printk("Mem-info:\n"); + + i = HIGH_MEMORY >> PAGE_SHIFT; + while (i-- > 0) { + total++; + if (mem_map[i] & USED) + reserved++; + else if (!mem_map[i]) + free++; + else + shared += mem_map[i]-1; } + printk("Buffer blocks: %6dMB\n", (nr_buffers*BLOCK_SIZE)/(1024*1024)); + printk("Tatal pages: %6dMB\n", (total*PAGE_SIZE)/(1024*1024)); + printk("Free pages: %6dMB\n", (free*PAGE_SIZE)/(1024*1024)); + printk("Reserved pages: %6dMB\n", (reserved*PAGE_SIZE)/(1024*1024)); + printk("Shared pages: %6dMB\n", (shared*PAGE_SIZE)/(1024*1024)); }