lua5.3.1 原始碼閱讀記錄(基礎)
通用資料結構:Tvalue
// lua中所有物件的基本資料結構
union Value {
GCObject *gc; /* collectable objects */
void *p; /* light userdata */
int b; /* booleans */
lua_CFunction f; /* light C functions */
lua_Integer i; /* integer numbers */
lua_Number n; /* float numbers */
};
//Tagged Values. This is the basic representation of values in Lua, 這個巨集代表連個資料型別,以個是上面的結構一個是整數,者連個才合成了lua中的基本值;
#define TValuefields Value value_; int tt_
struct lua_TValue {
TValuefields;
};
typedef struct lua_TValue TValue;
上面的定義可以看成,TValue將是lua中的基本資料結構;
基本型別定義:basic types
下面是最基本的型別定義巨集:
/*
** basic types
*/
#define LUA_TNONE (-1)
#define LUA_TNIL 0
#define LUA_TBOOLEAN 1
#define LUA_TLIGHTUSERDATA 2
#define LUA_TNUMBER 3
#define LUA_TSTRING 4
#define LUA_TTABLE 5
#define LUA_TFUNCTION 6
#define LUA_TUSERDATA 7
#define LUA_TTHREAD 8
函式型別在lua中被稱為第一類, 其定義如下:
高位代表型別的變體,低位代表型別
/*
** LUA_TFUNCTION variants:
** 0 - Lua function
** 1 - light C function
** 2 - regular C function (closure)
*/
/* Variant tags for functions */
#define LUA_TLCL (LUA_TFUNCTION | (0 << 4)) /* Lua closure */
#define LUA_TLCF (LUA_TFUNCTION | (1 << 4)) /* light C function */
#define LUA_TCCL (LUA_TFUNCTION | (2 << 4)) /* C closure */
Table資料結構
lua的雜湊表有一個高效的實現, 幾乎可以任務操作雜湊表的時間複雜度為常數;下面是lua原始碼中對table的介紹:
/*
** Implementation of tables (aka arrays, objects, or hash tables).
** Tables keep its elements in two parts: an array part and a hash part.
** Non-negative integer keys are all candidates to be kept in the array
** part. The actual size of the array is the largest 'n' such that
** more than half the slots between 1 and n are in use.
** Hash uses a mix of chained scatter table with Brent's variation.
** A main invariant of these tables is that, if an element is not
** in its main position (i.e. the 'original' position that its hash gives
** to it), then the colliding element is in its own main position.
** Hence even when the load factor reaches 100%, performance remains good.
*/
下面就是其論文”The Implementation of lua”中給出的table結構示意圖:
總體意識就是: 整數為鍵的pair是優先儲存在陣列中,table根據內容自動並且動態的對這兩部分進行是的的分配, 圖中的string是儲存的hash中;
table的資料結構如下:
// hash表node的鍵結構
typedef union TKey {
struct {
TValuefields;
int next; /* for chaining (offset for next node) */
} nk;
TValue tvk;
} TKey;
// hash連結串列節點
typedef struct Node {
TValue i_val;
TKey i_key;
} Node;
// 表中包含有hash表node(長度lsizenode)和陣列array(長度sizearray)兩部分
typedef struct Table {
CommonHeader; // 公共頭部
lu_byte flags; /* 1<<p means tagmethod(p) is not present */
lu_byte lsizenode; /* log2 of size of 'node' array */
unsigned int sizearray; /* size of 'array' array */
TValue *array; /* array part */
Node *node;
Node *lastfree; /* any free position is before this position */
struct Table *metatable;
GCObject *gclist;
} Table;
table讀取
論文說明:
即非負整數鍵都有可能儲存在array部分, hash 使用了一個混合chained scatter table(鏈狀發散表)和Brent’s變數型別的結構; (鏈狀發散表就是指hash表頭節點指向一個連結串列, 連結串列中的鍵相同)
表讀取函式如下, 會根據具體不同的型別呼叫不同的雜湊查詢方法,比如int則是優先在array中查詢:
/* ** main search function */ const TValue *luaH_get (Table *t, const TValue *key) { switch (ttype(key)) { case LUA_TSHRSTR: return luaH_getstr(t, tsvalue(key)); case LUA_TNUMINT: return luaH_getint(t, ivalue(key)); case LUA_TNIL: return luaO_nilobject; case LUA_TNUMFLT: { lua_Integer k; if (luaV_tointeger(key, &k, 0)) /* index is int? */ return luaH_getint(t, k); /* use specialized version */ /* else... */ } /* FALLTHROUGH */ default: { Node *n = mainposition(t, key); for (;;) { /* check whether 'key' is somewhere in the chain */ if (luaV_rawequalobj(gkey(n), key)) return gval(n); /* that's it */ else { int nx = gnext(n); if (nx == 0) break; n += nx; } }; return luaO_nilobject; } } }
這裡有分short string, int, nil, double幾種查詢,如下面是short string:
/* ** search function for short strings */ const TValue *luaH_getshortstr (Table *t, TString *key) { Node *n = hashstr(t, key); // 通過鍵查詢到頭節點 lua_assert(key->tt == LUA_TSHRSTR); for (;;) { /* check whether 'key' is somewhere in the chain */ const TValue *k = gkey(n); if (ttisshrstring(k) && eqshrstr(tsvalue(k), key)) return gval(n); /* that's it */ else { int nx = gnext(n); if (nx == 0) break; n += nx; } }; return luaO_nilobject; }
下面是int的獲取方式, 可以看出, 當超出陣列範圍時就會查詢hash表:
/* ** search function for integers */ const TValue *luaH_getint (Table *t, lua_Integer key) { /* (1 <= key && key <= t->sizearray) */ if (l_castS2U(key - 1) < t->sizearray) return &t->array[key - 1]; else { Node *n = hashint(t, key); for (;;) { /* check whether 'key' is somewhere in the chain */ if (ttisinteger(gkey(n)) && ivalue(gkey(n)) == key) return gval(n); /* that's it */ else { int nx = gnext(n); if (nx == 0) break; n += nx; } }; return luaO_nilobject; } }
在論文中經常提到mainpostion, 這個是指array中的位置或hash表鍵KEY對應的連結串列的頭節點;
/* ** returns the 'main' position of an element in a table (that is, the index ** of its hash value) */ static Node *mainposition (const Table *t, const TValue *key) { /*...*/ }
table寫入
/* ** beware: when using this function you probably need to check a GC ** barrier and invalidate the TM cache. */ TValue *luaH_set (lua_State *L, Table *t, const TValue *key) { const TValue *p = luaH_get(t, key); if (p != luaO_nilobject) return cast(TValue *, p); else return luaH_newkey(L, t, key); }
重點在luaH_newkey函式裡,
/* ** inserts a new key into a hash table; first, check whether key's main ** position is free. If not, check whether colliding node is in its main ** position or not: if it is not, move colliding node to an empty place and ** put new key in its main position; otherwise (colliding node is in its main ** position), new key goes to an empty position. 檢查mainpostion是不是衝突節點, 如果不是則給衝突節點重新分配記憶體, 並把自己寫入mainpos;如果是則分配新空間把自己寫人(這裡不是特別理解) */ TValue *luaH_newkey (lua_State *L, Table *t, const TValue *key) { Node *mp; TValue aux; if (ttisnil(key)) luaG_runerror(L, "table index is nil"); else if (ttisfloat(key)) { lua_Integer k; if (luaV_tointeger(key, &k, 0)) { /* index is int? */ // float轉換為int setivalue(&aux, k); key = &aux; /* insert it as an integer */ } else if (luai_numisnan(fltvalue(key))) luaG_runerror(L, "table index is NaN"); } mp = mainposition(t, key); if (!ttisnil(gval(mp)) || isdummy(mp)) { /* main position is taken? */ Node *othern; Node *f = getfreepos(t); /* get a free place */ // 通過lastfree域來查詢新空閒節點 if (f == NULL) { /* cannot find a free place? */ rehash(L, t, key); /* grow table */ // rehash過程 /* whatever called 'newkey' takes care of TM cache and GC barrier */ return luaH_set(L, t, key); /* insert key into grown table */ } lua_assert(!isdummy(f)); othern = mainposition(t, gkey(mp)); /*.......*/ }
看rehash過程:
/* ** nums[i] = number of keys 'k' where 2^(i - 1) < k <= 2^i */ static void rehash (lua_State *L, Table *t, const TValue *ek) { unsigned int asize; /* optimal size for array part */ // 陣列中個數的最優個數 unsigned int na; /* number of keys in the array part */ // KEY個數 unsigned int nums[MAXABITS + 1]; int i; int totaluse; for (i = 0; i <= MAXABITS; i++) nums[i] = 0; /* reset counts */ // numusearray將array分為2^(i-1)~2^i個這樣的片段來統計KEY的個數, 沒有將所有的整數都存放與陣列中, 而是將多於一半的整數KEY儲存到陣列; na = numusearray(t, nums); /* count keys in array part */ totaluse = na; /* all those keys are integer keys */ totaluse += numusehash(t, nums, &na); /* count keys in hash part */ /* count extra key */ na += countint(ek, nums); totaluse++; /* compute new size for array part */ // 下面的函式保證了空間一半以上被利用 asize = computesizes(nums, &na); /* resize the table to new computed sizes */ luaH_resize(L, t, asize, totaluse - na); }
// 注: 陣列只會增大, 而hash會增大或減小
TString
字串是存放於全域性hash表裡, 存放內部化字串即短字串時也可能會需要將雜湊連結串列擴大;
/* ** Header for string value; string bytes follow the end of this structure ** (aligned according to 'UTString'; see next). 字串的頭部, 資料跟隨這個頭部 */ typedef struct TString { CommonHeader; lu_byte extra; /* reserved words for short strings; "has hash" for longs */ lu_byte shrlen; /* length for short strings */ unsigned int hash; union { size_t lnglen; /* length for long strings */ struct TString *hnext; /* linked list for hash table */ } u; } TString;
UserData
儲存形式上和字串相同, 但不追究’/0’
/* ** Header for userdata; memory area follows the end of this structure ** (aligned according to 'UUdata'; see next). */ typedef struct Udata { CommonHeader; lu_byte ttuv_; /* user value's tag */ struct Table *metatable; size_t len; /* number of bytes */ union Value user_; /* user value */ } Udata;
棧和呼叫鏈
lua執行緒資料結構如下, 每個執行緒裡都有一個指向全域性的共享lua狀態:
/* ** 'per thread' state */ struct lua_State { CommonHeader; lu_byte status; StkId top; /* first free slot in the stack */ global_State *l_G; // 所有執行緒共享的全域性狀態, 真正的lua虛擬機器 /**....**/ StkId stack_last; /* last free slot in the stack */ StkId stack; /* stack base */ UpVal *openupval; /* list of open upvalues in this stack */ // 指向棧的openupval GCObject *gclist; /// 垃圾回收 /**....**/ };
lua_State的所有的lua C API都是圍繞這個狀態機來改變狀態的, 獨立線上程棧裡操作;
而全域性共享的真正虛擬機器是如下說明的:
/* ** 'global state', shared by all threads of this state */ typedef struct global_State { /**.....**/ }
狀態機的棧資訊資料結構StkId
看到下面的定義可以知道, StkId就是一個TValuefields巨集定義的結構, 該結構包含Value value_;int tt_
兩部分, value_是聯合值,Value型別, tt_是說明聯合物件的型別; 由上面Value的結構可知, 它是一個由{垃圾回收型別;void*的light userdata; booleans; functions; integer; number;} 這些型別組合的聯合型別, 所以需要一個tt_來說明當前的TValue到底是什麼型別;
typedef TValue *StkId; /* index to stack elements */ #define BASIC_STACK_SIZE (2*LUA_MINSTACK) // 棧大小 /* minimum Lua stack available to a C function */ #define LUA_MINSTACK 20
棧的初始化
資料棧和呼叫棧共享了lua的執行緒, 同一個虛擬機器中不同執行緒共享了global_State;
// 棧這邊程式碼的還沒有仔細研究 static void stack_init (lua_State *L1, lua_State *L) { int i; CallInfo *ci; // CallInfo是當前函式的呼叫棧, 以雙向連結串列的形式存在與執行緒物件裡 /* initialize stack array */ L1->stack = luaM_newvector(L, BASIC_STACK_SIZE, TValue); // 初始化長度 L1->stacksize = BASIC_STACK_SIZE; for (i = 0; i < BASIC_STACK_SIZE; i++) setnilvalue(L1->stack + i); /* erase new stack */ L1->top = L1->stack; L1->stack_last = L1->stack + L1->stacksize - EXTRA_STACK; /* initialize first ci */ ci = &L1->base_ci; ci->next = ci->previous = NULL; ci->callstatus = 0; ci->func = L1->top; setnilvalue(L1->top++); /* 'function' entry for this 'ci' */ ci->top = L1->top + LUA_MINSTACK; L1->ci = ci; }
執行緒
資料棧和呼叫棧構成了lua的執行緒, 同一個虛擬機器中不同執行緒共享了global_State
參考lua_newthread的建立過程:
// lua_newstate建立的是lua虛擬機器 LUA_API lua_State *lua_newstate (lua_Alloc f, void *ud) { /***.....**/ } // lua_newthread是執行緒 LUA_API lua_State *lua_newthread (lua_State *L) { global_State *g = G(L); lua_State *L1; lua_lock(L); luaC_checkGC(L); /* create new thread */ // LX: thread state + extra space L1 = &cast(LX *, luaM_newobject(L, LUA_TTHREAD, sizeof(LX)))->l; L1->marked = luaC_white(g); L1->tt = LUA_TTHREAD; // 型別 /* link it on list 'allgc' */ // 掛到垃圾回收上 L1->next = g->allgc; g->allgc = obj2gco(L1); /* anchor it on L stack */ setthvalue(L, L->top, L1); api_incr_top(L); preinit_thread(L1, g); L1->hookmask = L->hookmask; L1->basehookcount = L->basehookcount; L1->hook = L->hook; resethookcount(L1); /* initialize L1 extra space */ memcpy(lua_getextraspace(L1), lua_getextraspace(g->mainthread), LUA_EXTRASPACE); luai_userstatethread(L, L1); stack_init(L1, L); /* init stack */ lua_unlock(L); return L1; }
lua C API
一般的如lua_pushstring之類的理解不難, 現在看一個lua_pushvalue的程式碼:
LUA_API void lua_pushvalue (lua_State *L, int idx) { lua_lock(L); setobj2s(L, L->top, index2addr(L, idx)); // 頂部壓值 api_incr_top(L); lua_unlock(L); } // index2addr的實現 static TValue *index2addr (lua_State *L, int idx) { CallInfo *ci = L->ci; // 呼叫棧 if (idx > 0) { // 正索引 TValue *o = ci->func + idx; // 被呼叫函式的棧底+idx索引找到對應的值 api_check(L, idx <= ci->top - (ci->func + 1), "unacceptable index"); if (o >= L->top) return NONVALIDVALUE; else return o; } else if (!ispseudo(idx)) { /* negative index */ // 負索引 api_check(L, idx != 0 && -idx <= L->top - (ci->func + 1), "invalid index"); return L->top + idx; } else if (idx == LUA_REGISTRYINDEX) // 全域性 return &G(L)->l_registry; else { /* upvalues */ idx = LUA_REGISTRYINDEX - idx; api_check(L, idx <= MAXUPVAL + 1, "upvalue index too large"); if (ttislcf(ci->func)) /* light C function? */ return NONVALIDVALUE; /* it has no upvalues */ else { CClosure *func = clCvalue(ci->func); return (idx <= func->nupvalues) ? &func->upvalue[idx-1] : NONVALIDVALUE; } } }