lua源碼學習篇二:語法分析


  

  一步步調試,在lparser.c文件中luaY_parser函數是語法分析的重點函數,詞法分析也是在這個過程中調用的。在這個過程中,用到一些數據結構,下面會詳細說。

  

Proto *luaY_parser (lua_State *L, ZIO *z, Mbuffer *buff, const char *name) {
  struct LexState lexstate;
  struct FuncState funcstate;
  lexstate.buff = buff;
  luaX_setinput(L, &lexstate, z, luaS_new(L, name));
  open_func(&lexstate, &funcstate);//初始化funcstate
  funcstate.f->is_vararg = VARARG_ISVARARG;  /* main func. is always vararg */
  luaX_next(&lexstate);  //Luax_next用於獲取下一個字符
  chunk(&lexstate);//代碼塊分析   check(&lexstate, TK_EOS);//判斷lua程序文件是否到達末尾
  close_func(&lexstate);//關閉程序
  lua_assert(funcstate.prev == NULL);
  lua_assert(funcstate.f->nups == 0);
  lua_assert(lexstate.fs == NULL);
  return funcstate.f;
}

  好,不着急,一步一步來看。lua_State ,LexState ,FuncState 是啥玩意呢?

   lua_state是lua程序運行過程中一直存在的,並且一個運行程序只有一個lua_State實例。

struct lua_State {
  CommonHeader;
  lu_byte status;
  StkId top;  /* first free slot in the stack */
  StkId base;  /* base of current function */
  global_State *l_G;//全局狀態的指針
  CallInfo *ci;  /* call info for current function */當前函數的調用信息
  const Instruction *savedpc;  /* `savedpc' of current function */記錄上一個函數的pc位置
  StkId stack_last;  /* last free slot in the stack */
  StkId stack;  /* stack base */
  CallInfo *end_ci;  /* points after end of ci array*/函數調用棧的棧頂
  CallInfo *base_ci;  /* array of CallInfo's */函數調用棧的棧底
  int stacksize;
  int size_ci;  /* size of array `base_ci' */
  unsigned short nCcalls;  /* number of nested C calls */
  lu_byte hookmask;
  lu_byte allowhook;
  int basehookcount;
  int hookcount;
  lua_Hook hook;
  TValue l_gt;  /* table of globals */
  TValue env;  /* temporary place for environments */
  GCObject *openupval;  /* list of open upvalues in this stack */
  GCObject *gclist;
  struct lua_longjmp *errorJmp;  /* current error recover point */
  ptrdiff_t errfunc;  /* current error handling function (stack index) */
};

 

LexState是用於存儲詞法分析時的上下文數據。

typedef struct LexState {
  int current;  /* current character (charint) */指向下一個要讀取的字符
  int linenumber;  /* input line counter */行號
  int lastline;  /* line of last token `consumed' */
  Token t;  /* current token */
  Token lookahead;  /* look ahead token */ 預讀的下一個token
  struct FuncState *fs;  /* `FuncState' is private to the parser */函數狀態的數據結構
  struct lua_State *L;
  ZIO *z;  /* input stream */ 輸入流
  Mbuffer *buff;  /* buffer for tokens */ 臨時緩沖區
  TString *source;  /* current source name */ 源文件名
  char decpoint;  /* locale decimal point */
} LexState;

FuncState是用於存儲函數狀態的數據結構。
typedef struct FuncState {
  Proto *f;  /* current function header */函數頭信息
  Table *h;  /* table to find (and reuse) elements in `k' */
  struct FuncState *prev;  /* enclosing function */指向函數鏈表的上一個函數
  struct LexState *ls;  /* lexical state */
  struct lua_State *L;  /* copy of the Lua state */
  struct BlockCnt *bl;  /* chain of current blocks */
  int pc;  /* next position to code (equivalent to `ncode') */
  int lasttarget;   /* `pc' of last `jump target' */
  int jpc;  /* list of pending jumps to `pc' */
  int freereg;  /* first free register */
  int nk;  /* number of elements in `k' */
  int np;  /* number of elements in `p' */
  short nlocvars;  /* number of elements in `locvars' */local變量個數
  lu_byte nactvar;  /* number of active local variables */
  upvaldesc upvalues[LUAI_MAXUPVALUES];  /* upvalues */
  unsigned short actvar[LUAI_MAXVARS];  /* declared-variable stack */
} FuncState;

初始化完成后,就要進行詞法分析,即讀取下一個token,調用
luaX_next(&lexstate); 下面進入llex.c文件的源代碼中
void luaX_next (LexState *ls) {
  ls->lastline = ls->linenumber;
  if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
    ls->t = ls->lookahead;  /* use this one */
    ls->lookahead.token = TK_EOS;  /* and discharge it */
  }
  else
    ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */調用llex函數讀取下一個token
}

llex函數:里面是一大串的switch...case...語句,對各種可能的情況進行處理,正常的變量名或者保留字會進入default語句,分別處理空格,數字或者變量名。

for (;;) {
    switch (ls->current) {
      case '\n':
      case '\r': 
      case '-': 
      case '[': 
      case '=': 
      case '<': 
      case '>': 
      case '~': 
      case '"':
      case '\'': 
      case '.': 
      case EOZ:
      default: {
        if (isspace(ls->current)) {
          lua_assert(!currIsNewline(ls));
          next(ls);
          continue;
        }
        else if (isdigit(ls->current)) {
          read_numeral(ls, seminfo);
          return TK_NUMBER;
        }
        else if (isalpha(ls->current) || ls->current == '_') {
          /* identifier or reserved word */
          TString *ts;
          do {
            save_and_next(ls);
          } while (isalnum(ls->current) || ls->current == '_');
          ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
                                  luaZ_bufflen(ls->buff));
          if (ts->tsv.reserved > 0)  /* reserved word? */
            return ts->tsv.reserved - 1 + FIRST_RESERVED;
          else {
            seminfo->ts = ts;
            return TK_NAME;
          }
        }
        else {
          int c = ls->current;
          next(ls);
          return c;  /* single-char tokens (+ - / ...) */
        }
      }
    }
  }

luaX_newstring用於生成變量名,如果全局變量表中沒有該變量的字符串,則會創建新的變量字符串。對每個token,如果是保留字段,都會預先加載在全局變量表中,因此,如果不是保留字段,就會生成TK_NAME。保留字段的判定來自於if (ts->tsv.reserved > 0),關於Token的種類,定義在llex.h頭文件中。

 

獲取token字符串后,進入chunk代碼:

static void chunk (LexState *ls) {
  /* chunk -> { stat [`;'] } */
  int islast = 0;
  enterlevel(ls);//內嵌調用層數
  while (!islast && !block_follow(ls->t.token)) {//當前token既不是block的開始也不是結束
    islast = statement(ls);//代碼語句分析
    testnext(ls, ';');
    lua_assert(ls->fs->f->maxstacksize >= ls->fs->freereg &&
               ls->fs->freereg >= ls->fs->nactvar);
    ls->fs->freereg = ls->fs->nactvar;  /* free registers */
  }
  leavelevel(ls);
}

statement函數用於分析語義,里面是也是大大的switch...case...語句。如果是if, while, do, for, function等等關鍵字,都會進入相應的處理函數中,在default語句中處理賦值和函數調用的分析。

 

static int statement (LexState *ls) {
  int line = ls->linenumber;  /* may be needed for error messages */
  switch (ls->t.token) {
    case TK_IF: {  /* stat -> ifstat */
      ifstat(ls, line);
      return 0;
    }
    case TK_WHILE: {  /* stat -> whilestat */
      whilestat(ls, line);
      return 0;
    }
    case TK_DO: {  /* stat -> DO block END */
      luaX_next(ls);  /* skip DO */
      block(ls);
      check_match(ls, TK_END, TK_DO, line);
      return 0;
    }
    case TK_FOR: {  /* stat -> forstat */
      forstat(ls, line);
      return 0;
    }
    case TK_REPEAT: {  /* stat -> repeatstat */
      repeatstat(ls, line);
      return 0;
    }
    case TK_FUNCTION: {
      funcstat(ls, line);  /* stat -> funcstat */
      return 0;
    }
    case TK_LOCAL: {  /* stat -> localstat */
      luaX_next(ls);  /* skip LOCAL */
      if (testnext(ls, TK_FUNCTION))  /* local function? */
        localfunc(ls);
      else
        localstat(ls);
      return 0;
    }
    case TK_RETURN: {  /* stat -> retstat */
      retstat(ls);
      return 1;  /* must be last statement */
    }
    case TK_BREAK: {  /* stat -> breakstat */
      luaX_next(ls);  /* skip BREAK */
      breakstat(ls);
      return 1;  /* must be last statement */
    }
    default: {
      exprstat(ls);
      return 0;  /* to avoid warnings */
    }
  }
}
View Code

 

語句中的表達式通過exprstat(ls)函數處理,還有lua代碼指令的生成,有時間再寫。



 
       


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM