一步步調試,在lparser.c文件中luaY_parser函數是語法分析的重點函數,詞法分析也是在這個過程中調用的。在這個過程中,用到一些數據結構,下面會詳細說。
Proto *luaY_parser (lua_State *L, ZIO *z, Mbuffer *buff, const char *name) { struct LexState lexstate; struct FuncState funcstate; lexstate.buff = buff; luaX_setinput(L, &lexstate, z, luaS_new(L, name)); open_func(&lexstate, &funcstate);//初始化funcstate funcstate.f->is_vararg = VARARG_ISVARARG; /* main func. is always vararg */ luaX_next(&lexstate); //Luax_next用於獲取下一個字符 chunk(&lexstate);//代碼塊分析 check(&lexstate, TK_EOS);//判斷lua程序文件是否到達末尾 close_func(&lexstate);//關閉程序 lua_assert(funcstate.prev == NULL); lua_assert(funcstate.f->nups == 0); lua_assert(lexstate.fs == NULL); return funcstate.f; }
好,不着急,一步一步來看。lua_State ,LexState ,FuncState 是啥玩意呢?
lua_state是lua程序運行過程中一直存在的,並且一個運行程序只有一個lua_State實例。
struct lua_State { CommonHeader; lu_byte status; StkId top; /* first free slot in the stack */ StkId base; /* base of current function */ global_State *l_G;//全局狀態的指針 CallInfo *ci; /* call info for current function */當前函數的調用信息 const Instruction *savedpc; /* `savedpc' of current function */記錄上一個函數的pc位置 StkId stack_last; /* last free slot in the stack */ StkId stack; /* stack base */ CallInfo *end_ci; /* points after end of ci array*/函數調用棧的棧頂 CallInfo *base_ci; /* array of CallInfo's */函數調用棧的棧底 int stacksize; int size_ci; /* size of array `base_ci' */ unsigned short nCcalls; /* number of nested C calls */ lu_byte hookmask; lu_byte allowhook; int basehookcount; int hookcount; lua_Hook hook; TValue l_gt; /* table of globals */ TValue env; /* temporary place for environments */ GCObject *openupval; /* list of open upvalues in this stack */ GCObject *gclist; struct lua_longjmp *errorJmp; /* current error recover point */ ptrdiff_t errfunc; /* current error handling function (stack index) */ };
LexState是用於存儲詞法分析時的上下文數據。
typedef struct LexState { int current; /* current character (charint) */指向下一個要讀取的字符 int linenumber; /* input line counter */行號 int lastline; /* line of last token `consumed' */ Token t; /* current token */ Token lookahead; /* look ahead token */ 預讀的下一個token struct FuncState *fs; /* `FuncState' is private to the parser */函數狀態的數據結構 struct lua_State *L; ZIO *z; /* input stream */ 輸入流 Mbuffer *buff; /* buffer for tokens */ 臨時緩沖區 TString *source; /* current source name */ 源文件名 char decpoint; /* locale decimal point */ } LexState;
FuncState是用於存儲函數狀態的數據結構。
typedef struct FuncState { Proto *f; /* current function header */函數頭信息 Table *h; /* table to find (and reuse) elements in `k' */ struct FuncState *prev; /* enclosing function */指向函數鏈表的上一個函數 struct LexState *ls; /* lexical state */ struct lua_State *L; /* copy of the Lua state */ struct BlockCnt *bl; /* chain of current blocks */ int pc; /* next position to code (equivalent to `ncode') */ int lasttarget; /* `pc' of last `jump target' */ int jpc; /* list of pending jumps to `pc' */ int freereg; /* first free register */ int nk; /* number of elements in `k' */ int np; /* number of elements in `p' */ short nlocvars; /* number of elements in `locvars' */local變量個數 lu_byte nactvar; /* number of active local variables */ upvaldesc upvalues[LUAI_MAXUPVALUES]; /* upvalues */ unsigned short actvar[LUAI_MAXVARS]; /* declared-variable stack */ } FuncState;
初始化完成后,就要進行詞法分析,即讀取下一個token,調用luaX_next(&lexstate); 下面進入llex.c文件的源代碼中
void luaX_next (LexState *ls) { ls->lastline = ls->linenumber; if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ ls->t = ls->lookahead; /* use this one */ ls->lookahead.token = TK_EOS; /* and discharge it */ } else ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */調用llex函數讀取下一個token }
llex函數:里面是一大串的switch...case...語句,對各種可能的情況進行處理,正常的變量名或者保留字會進入default語句,分別處理空格,數字或者變量名。
for (;;) { switch (ls->current) { case '\n': case '\r': case '-': case '[': case '=': case '<': case '>': case '~': case '"': case '\'': case '.': case EOZ: default: { if (isspace(ls->current)) { lua_assert(!currIsNewline(ls)); next(ls); continue; } else if (isdigit(ls->current)) { read_numeral(ls, seminfo); return TK_NUMBER; } else if (isalpha(ls->current) || ls->current == '_') { /* identifier or reserved word */ TString *ts; do { save_and_next(ls); } while (isalnum(ls->current) || ls->current == '_'); ts = luaX_newstring(ls, luaZ_buffer(ls->buff), luaZ_bufflen(ls->buff)); if (ts->tsv.reserved > 0) /* reserved word? */ return ts->tsv.reserved - 1 + FIRST_RESERVED; else { seminfo->ts = ts; return TK_NAME; } } else { int c = ls->current; next(ls); return c; /* single-char tokens (+ - / ...) */ } } } }
luaX_newstring用於生成變量名,如果全局變量表中沒有該變量的字符串,則會創建新的變量字符串。對每個token,如果是保留字段,都會預先加載在全局變量表中,因此,如果不是保留字段,就會生成TK_NAME。保留字段的判定來自於if (ts->tsv.reserved > 0),關於Token的種類,定義在llex.h頭文件中。
獲取token字符串后,進入chunk代碼:
static void chunk (LexState *ls) { /* chunk -> { stat [`;'] } */ int islast = 0; enterlevel(ls);//內嵌調用層數 while (!islast && !block_follow(ls->t.token)) {//當前token既不是block的開始也不是結束 islast = statement(ls);//代碼語句分析 testnext(ls, ';'); lua_assert(ls->fs->f->maxstacksize >= ls->fs->freereg && ls->fs->freereg >= ls->fs->nactvar); ls->fs->freereg = ls->fs->nactvar; /* free registers */ } leavelevel(ls); }
statement函數用於分析語義,里面是也是大大的switch...case...語句。如果是if, while, do, for, function等等關鍵字,都會進入相應的處理函數中,在default語句中處理賦值和函數調用的分析。

static int statement (LexState *ls) { int line = ls->linenumber; /* may be needed for error messages */ switch (ls->t.token) { case TK_IF: { /* stat -> ifstat */ ifstat(ls, line); return 0; } case TK_WHILE: { /* stat -> whilestat */ whilestat(ls, line); return 0; } case TK_DO: { /* stat -> DO block END */ luaX_next(ls); /* skip DO */ block(ls); check_match(ls, TK_END, TK_DO, line); return 0; } case TK_FOR: { /* stat -> forstat */ forstat(ls, line); return 0; } case TK_REPEAT: { /* stat -> repeatstat */ repeatstat(ls, line); return 0; } case TK_FUNCTION: { funcstat(ls, line); /* stat -> funcstat */ return 0; } case TK_LOCAL: { /* stat -> localstat */ luaX_next(ls); /* skip LOCAL */ if (testnext(ls, TK_FUNCTION)) /* local function? */ localfunc(ls); else localstat(ls); return 0; } case TK_RETURN: { /* stat -> retstat */ retstat(ls); return 1; /* must be last statement */ } case TK_BREAK: { /* stat -> breakstat */ luaX_next(ls); /* skip BREAK */ breakstat(ls); return 1; /* must be last statement */ } default: { exprstat(ls); return 0; /* to avoid warnings */ } } }
語句中的表達式通過exprstat(ls)函數處理,還有lua代碼指令的生成,有時間再寫。