// 實驗存檔
輸入示例
main() { int a, b; a = 10; b = a + 20; }
效果圖
全部代碼
編輯一份.html文件,將代碼拷入,作為網頁打開即可使用。
<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>Lexical_Analysis</title> <link href="https://fonts.googleapis.com/css?family=Noto+Serif+SC" rel="stylesheet"> <style> main { /*對子元素開啟彈性布局*/ display: flex; /*彈性元素在必要的時候換行*/ flex-wrap: wrap; /*將彈性元素居中*/ justify-content: center; } textarea, button { font-family: 'Noto Serif SC', STFangSong, serif; font-size: 17px; } </style> </head> <body> <main> <textarea name="input" rows="20" cols="40"></textarea> <textarea name="output" rows="20" cols="40"></textarea> <button name="compile">Lexical Analysis</button> </main>s <script> let inputBox = document.querySelector("textarea[name=input]"); let outputBox = document.querySelector("textarea[name=output]"); let btnCompile = document.querySelector("button[name=compile]"); btnCompile.addEventListener("click", event => { let inputCode = inputBox.value; outputBox.value = JSON.stringify(Lexical_Analysis(inputCode)); }); /* * 規則: 識別保留字:if、int、for、while、do、return、break、continue; 單詞種別碼為1。 其他的都識別為標識符;單詞種別碼為2。 常數為無符號整形數;單詞種別碼為3。 運算符包括:+、-、*、/、=、>、<、>=、<=、!= ;單詞種別碼為4。 分隔符包括:,、;、{、}、(、); 單詞種別碼為5。 */ const reservedWords = ['if', 'int', 'for', 'while', 'do', 'return', 'break', 'continue']; const operators = ['+', '-', '*', '/', '=', '<', '>', '!', '>=', '<=', '!=']; const separators = [',', ';', '{', '}', '(', ')']; function Lexical_Analysis(str) { /** * current用於標識當前字符位置, * str[cur]即為當前字符 */ let cur = 0; /** * tokens存儲詞法分析的最終結果 */ let tokens = []; while(cur < str.length) { if(/\s/.test(str[cur])) { // 跳過空格 cur++; } else if(/[a-z]/i.test(str[cur])) { // 讀單詞 debugger; let word = "" + str[cur++]; // 測試下一位字符,如果不是字母直接進入下一次循環(此時cur已經右移) // 如果是則繼續讀字母,並將cur向右移動 while(cur < str.length && /[a-z]/i.test(str[cur])) { // cur < str.length防止越界 word += str[cur++]; } if(reservedWords.includes(word)) { tokens.push({ type: 1, value: word, }); // 存儲保留字(關鍵字) } else { tokens.push({ type: 2, value: word, }); // 存儲普通單詞 } } else if(separators.includes(str[cur])) { tokens.push({ type: 5, value: str[cur++], }); // 存儲分隔符並將cur向右移動 } else if(operators.includes(str[cur])) { let operator = "" + str[cur++]; if(['>', '<', '!'].includes(operator)) { // 如果下一個字符是=就添加到operator並再次向右移動cur if(str[cur] = '=') { operator += str[cur++]; } } tokens.push({ type: 4, value: operator, }); // 存儲運算符 } else if(/[0-9]/.test(str[cur])) { let val = "" + str[cur++]; // cur < str.length防止越界 while(cur < str.length && /[0-9]/.test(str[cur])) { val += str[cur++]; } tokens.push({ type: 3, value: val, }); // 存儲整數數字 } else { return "包含非法字符:" + str[cur]; } } return tokens; } </script> </body> </html>
附件,龍書2.6節練習:

<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title></title> <link href="https://fonts.googleapis.com/css?family=Noto+Serif+SC" rel="stylesheet"> <style> main { /*對子元素開啟彈性布局*/ display: flex; /*彈性元素在必要的時候換行*/ flex-wrap: wrap; /*將彈性元素居中*/ justify-content: center; } textarea, button { font-family: 'Noto Serif SC', STFangSong, serif; font-size: 17px; } </style> </head> <body> <main> <textarea name="input" rows="20" cols="40"></textarea> <textarea name="output" rows="20" cols="40"></textarea> <button name="execute">Execute</button> </main> <script> let inputBox = document.querySelector("textarea[name=input]"); let outputBox = document.querySelector("textarea[name=output]"); let btnExecute = document.querySelector("button[name=execute]"); btnExecute.addEventListener("click", event => { let tokens = tokenizer(inputBox.value); console.log(tokens); }); function tokenizer(input) { let s = input; let cur = 0; let peek = ' '; let line = 1; let words = new Map(); let readChar = () => s[cur++]; let undo = () => cur--; let scan = () => { // 每次scan返回一個Token // 略過空格,上次設置的peek值並不會被清空 for (;; peek = readChar()) { if (peek == undefined) { return null; // 讀完了 } else if (peek == ' ' || peek == '\t') { continue; // 略過空格和Tab } else if (peek == '\n') { line++; // 記錄當前行 } else { break; } } // 略過注釋 if ('/' == peek) { peek = readChar(); if ('/' == peek) { // 注釋類型1 peek = readChar(); for (;; peek = readChar()) { if (peek == '\n') { break; // 正常退出 } else if (peek == undefined) { return null; // 讀完了,正常退出 } } } else if ('*' == peek) { peek = readChar(); // 注釋類型2 let lastAsterisk = false; for (;; peek = readChar()) { if (peek == undefined) { console.log("注釋語法錯誤01"); return null; // 語法錯誤 } else if (peek == '\n') { lastAsterisk = false; line++; // 記錄當前行 } else if (peek == '*') { lastAsterisk = true; } else if (lastAsterisk && peek == '/') { peek = readChar(); break; // 正常退出 } else { lastAsterisk = false; } } } else { // 語法錯誤 console.log("注釋語法錯誤02"); return null; } } // 略過空格,上次設置的peek值並不會被清空 for (;; peek = readChar()) { if (peek == undefined) { return null; // 讀完了 } else if (peek == ' ' || peek == '\t') { continue; // 略過空格和Tab } else if (peek == '\n') { line++; // 記錄當前行 } else { break; } } if (/[0-9.]/.test(peek)) { let temp = peek; let hasPoint = false; if (peek == '.') hasPoint = true; while (/[0-9.]/.test(peek = readChar())) { if (peek == '.' && hasPoint) { console.log("語法錯誤3,包含多個小數點"); return null; } else if (peek == '.') { hasPoint = true; temp += peek; } else { temp += peek; } } return { tag: 'NUM', value: Number(temp), }; } if (/[a-zA-z]/.test(peek)) { let temp = peek; while ((peek = readChar()) && /[a-zA-z]/.test(peek)) { // 經測試,null和undefined都能通過/\w/以及/[a-zA-z]/,並可以轉化為字面字符串 temp += peek; } let w = words.get(temp); if (w != undefined) { return w; } else { w = { tag: 'ID', lexeme: temp, }; words.set(temp, w); return w; } } if (/[><=!]/.test(peek)) { let first = peek; peek = readChar(); if (peek == '=') { peek = readChar(); // 避免重復處理 return { tag: '邏輯運算符', value: first + '=', }; } else if (first != '=') { return { tag: '邏輯運算符', value: first, }; } else { // 單個=的情況,回溯 undo(); peek = first; } } let res = { tag: peek, }; peek = ' '; return res; }; let tokens = []; let token; while (token = scan()) { tokens.push(token); } return tokens; } </script> </body> </html>