1. 程式人生 > >linux實體記憶體探測

linux實體記憶體探測

linux在被bootloader載入到記憶體後, cpu最初執行的linux核心程式碼是/header.S檔案中的start_of_setup函式,這個函式在做了一些準備工作後會跳轉到boot目下檔案main.c的main函式執行,在這個main函式中我們可以第一次看到與記憶體管理相關的程式碼,這段程式碼呼叫detect_memeory()函式檢測系統實體記憶體

在header.S中執行下面彙編程式碼:

start_of_setup:
       .....
# Jump to C code (should not return)
	calll	main
       .....
跳到boot目錄下的main.c檔案中
void main(void)
{
        ......
	/* Detect memory layout */
	detect_memory();/*記憶體探測函式*/
	......
}

int detect_memory(void)
{
	int err = -1;

	if (detect_memory_e820() > 0)
		err = 0;

	if (!detect_memory_e801())
		err = 0;

	if (!detect_memory_88())
		err = 0;

	return err;
}
由上面的程式碼可知,linux核心會分別嘗試呼叫detect_memory_e820()、detcct_memory_e801()、detect_memory_88()獲得系統實體記憶體佈局,這3個函式內部其實都會以內聯彙編的形式呼叫bios中斷以取得記憶體資訊,該中斷呼叫形式為int 0x15,同時呼叫前分別把AX暫存器設定為0xe820h、0xe801h、0x88h,關於0x15號中斷有興趣的可以去查詢相關手冊。下面分析detect_memory_e820()的程式碼,其它程式碼基本一樣。
#define SMAP	0x534d4150	/* ASCII "SMAP" */
/*由於歷史原因,一些i/o裝置也會佔據一部分記憶體
實體地址空間,因此係統可以使用的實體記憶體空
間是不連續的,系統記憶體被分成了很多段,每個段
的屬性也是不一樣的。int 0x15 查詢實體記憶體時每次
返回一個記憶體段的資訊,因此要想返回系統中所有
的實體記憶體,我們必須以迭代的方式去查詢。
detect_memory_e820()函式把int 0x15放到一個do-while迴圈裡,
每次得到的一個記憶體段放到struct e820entry裡,而
struct e820entry的結構正是e820返回結果的結構!而像
其它啟動時獲得的結果一樣,最終都會被放到
boot_params裡,e820被放到了 boot_params.e820_map。
*/
static int detect_memory_e820(void)
{
	int count = 0;/*用於記錄已檢測到的實體記憶體數目*/
	struct biosregs ireg, oreg;
	struct e820entry *desc = boot_params.e820_map;
	static struct e820entry buf; /* static so it is zeroed */

	initregs(&ireg);/*初始化ireg中的相關暫存器*/
	ireg.ax  = 0xe820;
	ireg.cx  = sizeof buf;/*e820entry資料結構大小*/
	ireg.edx = SMAP;/*標識*/
	ireg.di  = (size_t)&buf;/*int15返回值的存放處*/

	/*
	 * Note: at least one BIOS is known which assumes that the
	 * buffer pointed to by one e820 call is the same one as
	 * the previous call, and only changes modified fields.  Therefore,
	 * we use a temporary buffer and copy the results entry by entry.
	 *
	 * This routine deliberately does not try to account for
	 * ACPI 3+ extended attributes.  This is because there are
	 * BIOSes in the field which report zero for the valid bit for
	 * all ranges, and we don't currently make any use of the
	 * other attribute bits.  Revisit this if we see the extended
	 * attribute bits deployed in a meaningful way in the future.
	 */

	do {
		/*在執行這條內聯彙編語句時輸入的引數有:
		eax暫存器=0xe820
		dx暫存器=’SMAP’
		edi暫存器=desc
		ebx暫存器=next
		ecx暫存器=size
		
		返回給c語言程式碼的引數有:
		id=eax暫存器
		rr=edx暫存器
		ext=ebx暫存器
		size=ecx暫存器
		desc指向的記憶體地址在執行0x15中斷呼叫時被設定
		*/
		intcall(0x15, &ireg, &oreg);
		/*選擇下一個*/
		ireg.ebx = oreg.ebx; /* for next iteration... */

		/* BIOSes which terminate the chain with CF = 1 as opposed
		   to %ebx = 0 don't always report the SMAP signature on
		   the final, failing, probe. */
		if (oreg.eflags & X86_EFLAGS_CF)
			break;

		/* Some BIOSes stop returning SMAP in the middle of
		   the search loop.  We don't know exactly how the BIOS
		   screwed up the map at that point, we might have a
		   partial map, the full map, or complete garbage, so
		   just return failure. */
		if (oreg.eax != SMAP) {
			count = 0;
			break;
		}

		*desc++ = buf;/*將buf賦值給desc*/
		count++;/*探測數加一*/
	} 
	while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));
	/*將記憶體塊數保持到變數中*/
	return boot_params.e820_entries = count;
}
其中存放中斷返回值得結構如下
struct e820entry {
	__u64 addr;	/* start of memory segment */
	__u64 size;	/* size of memory segment */
	__u32 type;	/* type of memory segment */
} __attribute__((packed));
在核心初始化跳入start_kernel函式後執行以下初始化

start_kernel()->setup_arch()->setup_memory_map()

/*呼叫x86_init.resources.memory_setup()實現對e820記憶體圖的優化,
將e820中得值儲存在e820_saved中,列印記憶體圖
*/
void __init setup_memory_map(void)
{
	char *who;
	/*呼叫x86體系下的memory_setup函式*/
	who = x86_init.resources.memory_setup();
	/*儲存到e820_saved中*/
	memcpy(&e820_saved, &e820, sizeof(struct e820map));
	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
	/*列印輸出*/
	e820_print_map(who);
}

x86_init.c中定義了x86下的memory_setup函式

struct x86_init_ops x86_init __initdata = {

	.resources = {
		……
		.memory_setup		= default_machine_specific_memory_setup,
	},
        ……
};
char *__init default_machine_specific_memory_setup(void)
{
	char *who = "BIOS-e820";
	u32 new_nr;
	/*
	 * Try to copy the BIOS-supplied E820-map.
	 *
	 * Otherwise fake a memory map; one section from 0k->640k,
	 * the next section from 1mb->appropriate_mem_k
	 */
	new_nr = boot_params.e820_entries;
	/*將重疊的去除*/
	sanitize_e820_map(boot_params.e820_map,
			ARRAY_SIZE(boot_params.e820_map),
			&new_nr);
	/*去掉重疊的部分後得到的記憶體個數*/
	boot_params.e820_entries = new_nr;
	/*將其賦值到全域性變數e820中,小於0時,為出錯處理*/
	if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
	  < 0) {
		……
	}

	/* In case someone cares... */
	return who;
}

append_e820_map呼叫__append_e820_map實現

static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
{
	while (nr_map) {/*迴圈nr_map次呼叫,新增記憶體塊到e820*/
		u64 start = biosmap->addr;
		u64 size = biosmap->size;
		u64 end = start + size;
		u32 type = biosmap->type;
		/* Overflow in 64 bits? Ignore the memory map. */
		if (start > end)
			return -1;
		/*新增函式*/
		e820_add_region(start, size, type);
		biosmap++;
		nr_map--;
	}
	return 0;
}

void __init e820_add_region(u64 start, u64 size, int type)
{
	__e820_add_region(&e820, start, size, type);
}

e820e820map結構

struct e820map {
	__u32 nr_map;
	struct e820entry map[E820_X_MAX];
};

其中E820_X_MAX大小為128.

tatic void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
					 int type)
{
	int x = e820x->nr_map;

	if (x >= ARRAY_SIZE(e820x->map)) {
		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
		return;
	}
到這裡,實體記憶體就已經從BIOS中讀出來存放到全域性變數e820中,e820是linux核心中用於建立記憶體管理框架的基礎。在後面我們會看到,建立初始化節點、管理區會用到他。