前端編譯原理 parser.js源碼解讀


前面已經介紹了一個jison的使用,在正常開發中其實已經夠用下,下面主要是看了下parser.js代碼解讀下,作為一些了解。

     下面以最簡單的文法產生的parser做一些代碼注釋

下面是一些注釋,標示了編譯過程,能夠了解jison產生的編譯器有哪些處理

var a = (function() {
    var o = function(k, v, o, l) {
        for (o = o || {}, l = k.length; l--; o[k[l]] = v);
        return o;
    };
    var parser = {
        trace: function trace() {},
        //編譯期共享對象
        yy: {},
        //標示符,$accept: 0, $end: 1,error: 2是默認的符號,E: 3, NAT: 4是分析文法獲得的
        symbols_: { error: 2, E: 3, NAT: 4, $accept: 0, $end: 1 },
        //終結符
        terminals_: { 2: 'error', 4: 'NAT' },
        //對應文法規則
        productions_: [
            0,  // e` => E $end
            [
                3, //規約成符號2(E) 
                1 //堆棧移除一個狀態即移除NAT,壓入E
            ] // E => NAT
        ],
        //規約的時候執行的代碼
        performAction: function anonymous(
            yytext,
            yyleng,
            yylineno,
            yy,
            yystate /* action[1] */,
            $$ /* vstack */,
            _$ /* lstack */
        ) {
            var $0 = $$.length - 1;
            switch (yystate) {
                case 1:
                    this.$ = $$[$0] * 1;
                    break;
            }
        },
        //移入規約自動機 lalr(1)分析表,每一值表示一個狀態
        table: [
            /*狀態0 I0*/{ 3: 1/**goto 遇到符號3(E),跳轉到狀態1 */, 4: [1, 2] /**移入,遇到符號4(NAT),移入進入狀態2 */}, 
            /*狀態1 I1*/{ 1: [3] /**接受 遇到符號1($end),結束程序,完成編譯 */}, 
            /*狀態2 I2*/{ 1: [2, 1] /**規約 遇到符號1($end),按照productions[1],進行規則 */ }
        ],
        defaultActions: { 
            2: [2, 1] //狀態2進行規則規約,不用向前看符號是什么(邏輯優化)
        },
        //錯誤提示
        parseError: function parseError(str, hash) {
            if (hash.recoverable) {
                this.trace(str);
            } else {
                var error = new Error(str);
                error.hash = hash;
                throw error;
            }
        },
        //編譯 input為處理的字符串
        parse: function parse(input) {
            var self = this,
                stack = [0],
                tstack = [],
                vstack = [null],
                lstack = [],
                table = this.table,
                yytext = '',
                yylineno = 0,
                yyleng = 0,
                recovering = 0,
                TERROR = 2,
                EOF = 1;
            var args = lstack.slice.call(arguments, 1);
            var lexer = Object.create(this.lexer);
            var sharedState = { yy: {} };
            for (var k in this.yy) {
                if (Object.prototype.hasOwnProperty.call(this.yy, k)) {
                    sharedState.yy[k] = this.yy[k];
                }
            }
            lexer.setInput(input, sharedState.yy);
            sharedState.yy.lexer = lexer;
            sharedState.yy.parser = this;
            if (typeof lexer.yylloc === 'undefined') {
                lexer.yylloc = {};
            }
            var yyloc = lexer.yylloc;
            lstack.push(yyloc);
            //位置信息采用區間的形式顯示
            var ranges = lexer.options && lexer.options.ranges;
            if (typeof sharedState.yy.parseError === 'function') {
                this.parseError = sharedState.yy.parseError;
            } else {
                this.parseError = Object.getPrototypeOf(this).parseError;
            }
            //規約的時候需要彈出堆棧
            function popStack(n) {
                //stack [0,標識符,狀態,標識符,狀態...],所以出棧是2倍
                stack.length = stack.length - 2 * n;
                vstack.length = vstack.length - n;
                lstack.length = lstack.length - n;
            }
            //詞法分析,返回token即標示符
            var lex = function() {
                var token;
                token = lexer.lex() || EOF;
                if (typeof token !== 'number') {
                    token = self.symbols_[token] || token;
                }
                return token;
            };
            var symbol,
                preErrorSymbol,
                state,
                action,
                a,
                r,
                yyval = {},
                p,
                len,
                newState,
                expected;
            while (true) {
                state = stack[stack.length - 1];
                if (this.defaultActions[state]) {
                    action = this.defaultActions[state];
                } else {
                    if (symbol === null || typeof symbol === 'undefined') {
                        //通過正則獲取下一個標示符
                        symbol = lex();
                    }
                    //action對應的動作
                    action = table[state] && table[state][symbol];
                }
                //動作錯誤,語法分析錯誤
                if (typeof action === 'undefined' || !action.length || !action[0]) {
                    var errStr = '';
                    expected = [];
                    for (p in table[state]) {
                        if (this.terminals_[p] && p > TERROR) {
                            expected.push("'" + this.terminals_[p] + "'");
                        }
                    }
                    if (lexer.showPosition) {
                        errStr =
                            'Parse error on line ' +
                            (yylineno + 1) +
                            ':\n' +
                            lexer.showPosition() +
                            '\nExpecting ' +
                            expected.join(', ') +
                            ", got '" +
                            (this.terminals_[symbol] || symbol) +
                            "'";
                    } else {
                        errStr =
                            'Parse error on line ' +
                            (yylineno + 1) +
                            ': Unexpected ' +
                            (symbol == EOF ? 'end of input' : "'" + (this.terminals_[symbol] || symbol) + "'");
                    }
                    this.parseError(errStr, {
                        text: lexer.match,
                        token: this.terminals_[symbol] || symbol,
                        line: lexer.yylineno,
                        loc: yyloc,
                        expected: expected
                    });
                }
                //規約-規約沖突,移入規約沖突
                if (action[0] instanceof Array && action.length > 1) {
                    throw new Error('Parse Error: multiple actions possible at state: ' + state + ', token: ' + symbol);
                }
                switch (action[0]) {
                    //移入
                    case 1:
                        stack.push(symbol);
                        vstack.push(lexer.yytext);
                        lstack.push(lexer.yylloc);
                        stack.push(action[1]);
                        symbol = null;
                        if (!preErrorSymbol) {
                            yyleng = lexer.yyleng;
                            yytext = lexer.yytext;
                            yylineno = lexer.yylineno;
                            yyloc = lexer.yylloc;
                            if (recovering > 0) {
                                recovering--;
                            }
                        } else {
                            symbol = preErrorSymbol;
                            preErrorSymbol = null;
                        }
                        break;
                    //規約
                    case 2:
                        len = this.productions_[action[1]][1];
                        yyval.$ = vstack[vstack.length - len];
                        yyval._$ = {
                            first_line: lstack[lstack.length - (len || 1)].first_line,
                            last_line: lstack[lstack.length - 1].last_line,
                            first_column: lstack[lstack.length - (len || 1)].first_column,
                            last_column: lstack[lstack.length - 1].last_column
                        };
                        if (ranges) {
                            yyval._$.range = [
                                lstack[lstack.length - (len || 1)].range[0],
                                lstack[lstack.length - 1].range[1]
                            ];
                        }
                        //執行文法定義的代碼
                        r = this.performAction.apply(
                            yyval,
                            [yytext, yyleng, yylineno, sharedState.yy, action[1], vstack, lstack].concat(args)
                        );
                        if (typeof r !== 'undefined') {
                            return r;
                        }
                        if (len) {
                            stack = stack.slice(0, -1 * len * 2);
                            vstack = vstack.slice(0, -1 * len);
                            lstack = lstack.slice(0, -1 * len);
                        }
                        stack.push(this.productions_[action[1]][0]);
                        vstack.push(yyval.$);
                        lstack.push(yyval._$);
                        newState = table[stack[stack.length - 2]][stack[stack.length - 1]];
                        stack.push(newState);
                        break;
                    //接受
                    case 3:
                        return true;
                }
            }
            return true;
        }
    };
    /* 詞法分析 */
    var lexer = (function() {
        var lexer = {
            EOF: 1,

            parseError: function parseError(str, hash) {
                if (this.yy.parser) {
                    this.yy.parser.parseError(str, hash);
                } else {
                    throw new Error(str);
                }
            },

            // resets the lexer, sets new input
            setInput: function(input, yy) {
                this.yy = yy || this.yy || {};
                this._input = input;
                this._more = this._backtrack = this.done = false;
                this.yylineno = this.yyleng = 0;
                this.yytext = this.matched = this.match = '';
                this.conditionStack = ['INITIAL'];
                this.yylloc = {
                    first_line: 1,
                    first_column: 0,
                    last_line: 1,
                    last_column: 0
                };
                if (this.options.ranges) {
                    this.yylloc.range = [0, 0];
                }
                this.offset = 0;
                return this;
            },
            input: function() {
                var ch = this._input[0];
                this.yytext += ch;
                this.yyleng++;
                this.offset++;
                this.match += ch;
                this.matched += ch;
                var lines = ch.match(/(?:\r\n?|\n).*/g);
                if (lines) {
                    this.yylineno++;
                    this.yylloc.last_line++;
                } else {
                    this.yylloc.last_column++;
                }
                if (this.options.ranges) {
                    this.yylloc.range[1]++;
                }

                this._input = this._input.slice(1);
                return ch;
            },
            unput: function(ch) {
                var len = ch.length;
                var lines = ch.split(/(?:\r\n?|\n)/g);

                this._input = ch + this._input;
                this.yytext = this.yytext.substr(0, this.yytext.length - len);
                //this.yyleng -= len;
                this.offset -= len;
                var oldLines = this.match.split(/(?:\r\n?|\n)/g);
                this.match = this.match.substr(0, this.match.length - 1);
                this.matched = this.matched.substr(0, this.matched.length - 1);

                if (lines.length - 1) {
                    this.yylineno -= lines.length - 1;
                }
                var r = this.yylloc.range;

                this.yylloc = {
                    first_line: this.yylloc.first_line,
                    last_line: this.yylineno + 1,
                    first_column: this.yylloc.first_column,
                    last_column: lines
                        ? (lines.length === oldLines.length ? this.yylloc.first_column : 0) +
                          oldLines[oldLines.length - lines.length].length -
                          lines[0].length
                        : this.yylloc.first_column - len
                };

                if (this.options.ranges) {
                    this.yylloc.range = [r[0], r[0] + this.yyleng - len];
                }
                this.yyleng = this.yytext.length;
                return this;
            },
            more: function() {
                this._more = true;
                return this;
            },
            reject: function() {
                if (this.options.backtrack_lexer) {
                    this._backtrack = true;
                } else {
                    return this.parseError(
                        'Lexical error on line ' +
                            (this.yylineno + 1) +
                            '. You can only invoke reject() in the lexer when the lexer is of the backtracking persuasion (options.backtrack_lexer = true).\n' +
                            this.showPosition(),
                        {
                            text: '',
                            token: null,
                            line: this.yylineno
                        }
                    );
                }
                return this;
            },
            less: function(n) {
                this.unput(this.match.slice(n));
            },
            pastInput: function() {
                var past = this.matched.substr(0, this.matched.length - this.match.length);
                return (past.length > 20 ? '...' : '') + past.substr(-20).replace(/\n/g, '');
            },
            upcomingInput: function() {
                var next = this.match;
                if (next.length < 20) {
                    next += this._input.substr(0, 20 - next.length);
                }
                return (next.substr(0, 20) + (next.length > 20 ? '...' : '')).replace(/\n/g, '');
            },
            showPosition: function() {
                var pre = this.pastInput();
                var c = new Array(pre.length + 1).join('-');
                return pre + this.upcomingInput() + '\n' + c + '^';
            },
            test_match: function(match, indexed_rule) {
                var token, lines, backup;

                if (this.options.backtrack_lexer) {
                    // save context
                    backup = {
                        yylineno: this.yylineno,
                        yylloc: {
                            first_line: this.yylloc.first_line,
                            last_line: this.last_line,
                            first_column: this.yylloc.first_column,
                            last_column: this.yylloc.last_column
                        },
                        yytext: this.yytext,
                        match: this.match,
                        matches: this.matches,
                        matched: this.matched,
                        yyleng: this.yyleng,
                        offset: this.offset,
                        _more: this._more,
                        _input: this._input,
                        yy: this.yy,
                        conditionStack: this.conditionStack.slice(0),
                        done: this.done
                    };
                    if (this.options.ranges) {
                        backup.yylloc.range = this.yylloc.range.slice(0);
                    }
                }

                lines = match[0].match(/(?:\r\n?|\n).*/g);
                if (lines) {
                    this.yylineno += lines.length;
                }
                this.yylloc = {
                    first_line: this.yylloc.last_line,
                    last_line: this.yylineno + 1,
                    first_column: this.yylloc.last_column,
                    last_column: lines
                        ? lines[lines.length - 1].length - lines[lines.length - 1].match(/\r?\n?/)[0].length
                        : this.yylloc.last_column + match[0].length
                };
                this.yytext += match[0];
                this.match += match[0];
                this.matches = match;
                this.yyleng = this.yytext.length;
                if (this.options.ranges) {
                    this.yylloc.range = [this.offset, (this.offset += this.yyleng)];
                }
                this._more = false;
                this._backtrack = false;
                this._input = this._input.slice(match[0].length);
                this.matched += match[0];
                token = this.performAction.call(
                    this,
                    this.yy,
                    this,
                    indexed_rule,
                    this.conditionStack[this.conditionStack.length - 1]
                );
                if (this.done && this._input) {
                    this.done = false;
                }
                if (token) {
                    return token;
                } else if (this._backtrack) {
                    for (var k in backup) {
                        this[k] = backup[k];
                    }
                    return false;
                }
                return false;
            },
            next: function() {
                if (this.done) {
                    return this.EOF;
                }
                if (!this._input) {
                    this.done = true;
                }

                var token, match, tempMatch, index;
                if (!this._more) {
                    this.yytext = '';
                    this.match = '';
                }
                //獲取詞法規則
                //沒看出來conditionStack有什么用,可能跟我的文法定義比較簡單,沒有使用一些內置的begin,pushState, popState之類的有點關系
                var rules = this._currentRules();
                for (var i = 0; i < rules.length; i++) {
                    tempMatch = this._input.match(this.rules[rules[i]]);
                    if (tempMatch && (!match || tempMatch[0].length > match[0].length)) {
                        match = tempMatch;
                        index = i;
                        //backtrack_lexer,會先嘗試返回token,如果想嘗試下面的規則的話,通過reject()方法
                        if (this.options.backtrack_lexer) {
                            token = this.test_match(tempMatch, rules[i]);
                            if (token !== false) {
                                return token;
                            } else if (this._backtrack) {
                                match = false;
                                continue;
                            } else {
                                return false;
                            }
                        //flex 設置true 會匹配符合正則多的文本
                        //flex 設置false 根據文法順序返回結果
                        } else if (!this.options.flex) {
                            break;
                        }
                    }
                }
                if (match) {
                    token = this.test_match(match, rules[index]);
                    if (token !== false) {
                        return token;
                    }
                    return false;
                }
                if (this._input === '') {
                    return this.EOF;
                } else {
                    //輸入字符串不存在詞法對應
                    return this.parseError(
                        'Lexical error on line ' + (this.yylineno + 1) + '. Unrecognized text.\n' + this.showPosition(),
                        {
                            text: '',
                            token: null,
                            line: this.yylineno
                        }
                    );
                }
            },
            lex: function lex() {
                var r = this.next();
                if (r) {
                    return r;
                } else {
                    return this.lex();
                }
            },
            begin: function begin(condition) {
                this.conditionStack.push(condition);
            },
            popState: function popState() {
                var n = this.conditionStack.length - 1;
                if (n > 0) {
                    return this.conditionStack.pop();
                } else {
                    return this.conditionStack[0];
                }
            },
            _currentRules: function _currentRules() {
                if (this.conditionStack.length && this.conditionStack[this.conditionStack.length - 1]) {
                    return this.conditions[this.conditionStack[this.conditionStack.length - 1]].rules;
                } else {
                    return this.conditions['INITIAL'].rules;
                }
            },
            topState: function topState(n) {
                n = this.conditionStack.length - 1 - Math.abs(n || 0);
                if (n >= 0) {
                    return this.conditionStack[n];
                } else {
                    return 'INITIAL';
                }
            },
            pushState: function pushState(condition) {
                this.begin(condition);
            },
            stateStackSize: function stateStackSize() {
                return this.conditionStack.length;
            },
            options: {},
            performAction: function anonymous(yy, yy_, $avoiding_name_collisions, YY_START) {
                var YYSTATE = YY_START;
                switch ($avoiding_name_collisions) {
                    case 0 /* skip whitespace */:
                        break;
                    case 1:
                        return 4;
                        break;
                    case 2:
                        return '+';
                        break;
                }
            },
            //詞法中的規則
            rules: [
                /^(?:\s+)/, 
                /^(?:[0-9]+)/, 
                /^(?:\+)/
            ],
            conditions: { 
                INITIAL: { rules: [0, 1, 2], inclusive: true } 
            }
        };
        return lexer;
    })();
    parser.lexer = lexer;
    function Parser() {
        this.yy = {};
    }
    Parser.prototype = parser;
    parser.Parser = Parser;
    return new Parser();
})();

 

     

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM