PL/0語言詞法分析器

本文轉載自查看原文 2019-03-25 22:37 2293 編譯原理

前言：關於詞法分析的基礎知識的介紹可以看一下這篇博客，我再累述估計也不會有這篇講的清楚QAQ。 https://www.cnblogs.com/yanlingyin/archive/2012/04/17/2451717.html 默認大家已經對詞法分析有了基本的了解了。

一：下面討論PL/0語言的詞法分析器的單詞結構

1、關鍵字

　　關鍵字(共11個)：空格分隔列表如下

　　begin end if then while do const var call procedure odd

2、運算符和界符

　　算符和界符（14個）：空格分隔列表如下

　　+ - * / = # < > := ( ) , . ;

3、標示符

PL/0語言中標示符的定義為：開頭可以是下划線或字母，后面可以是下划線、字母或數字。

4、常數

　　整形數和浮點數

5、空白符

　　PL/0語言中的空白符有：空格符、制表符、換行符，這些空白符在詞法分析階段可以被忽略。

6、注釋

PL/0語言中的注釋形式為//，可以多行注釋（*…*）。

二：PL/0的語言的詞法分析器將要完成以下工作

（1）跳過分隔符（如空格，回車，制表符）；

（2）識別諸如begin，end，if，while等關鍵字；

（3）識別非關鍵字的一般標識符。

（4）識別常數數字序列。

（5）識別前面列出的單字符操作符和:=雙字符特殊符號。

（6）詞法分析器的輸出形式（種別，屬性值）其中：種別在“2、單詞的種別”中進行了定義；

屬性值：若單詞種別只代表唯一單詞，屬性值為空；

若單詞種別是標識符，為該單詞在標識符表中的位置；

若單詞種別是常數，屬性值為對應常數值。

三：代碼實現

測試程序如下（我放置的路徑為D:/b.txt,根據自己情況自行修改）

 1 // PL/0 語法示例程序 
 2 
 3 (*
 4     計算1～10的階乘
 5     多行注釋    
 6 *)
 7 
 8 var n, f;
 9 begin
10      n := 0;
11      f := 1;
12      while n # 10 do
13      begin
14           n := n + 1;
15           f := f * n;
16      end;
17      call print;// 用於輸出結果，假設預先聲明
18 end.

View Code

種別碼如下（我放置的路徑為D:/a.txt,根據自己情況自行修改）

 1 begin  1
 2 end  2
 3 if  3
 4 then 4
 5 while  5
 6 do  6
 7 const  7
 8 var  8
 9 call  9
10 procedure  10
11 odd  11
12 +  12
13 -  13
14 *  14
15 /  15
16 =  16
17 #  17
18 <  18
19 >  19
20 :=  20
21 (  21
22 )  22
23 ,  23
24 .  24
25 ;  25
26 (*多行注釋*)    26
27 //單行注釋  27
28 常數    28
29 標識符    29

View Code

實現代碼如下

  1 // pL/0語言詞法分析器
  2 #include<bits/stdc++.h>
  3 using namespace std;
  4 struct _2tup
  5 {
  6     string token;
  7     int id;
  8 };
  9 bool is_blank(char ch)
 10 {
 11     return ch == ' ' || ch == '    ';//空格或控制字符
 12 }
 13 bool gofor(char& ch, string::size_type& pos, const string& prog)//返回指定位置的字符
 14 {
 15     ++pos;
 16     if (pos >= prog.size())
 17     {
 18         return false;
 19     }
 20     else
 21     {
 22         ch = prog[pos];
 23         return true;
 24     }
 25 }
 26 
 27 _2tup scanner(const string& prog, string::size_type& pos, const map<string, int>& keys, int& row)
 28 {
 29     /*
 30     if
 31         標示符
 32     else if
 33         數字
 34     else
 35         符號
 36     */
 37     _2tup ret;
 38     string token;
 39     int id = 0;
 40 
 41     char ch;
 42     ch = prog[pos];
 43 
 44     while(is_blank(ch))
 45     {
 46         ++pos;
 47         ch = prog[pos];
 48     }
 49     // 判斷標示符、關鍵字
 50     if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')
 51     {
 52         //保證讀取一個單詞
 53         while((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')
 54         {
 55             token += ch;//追加標示符、關鍵字
 56             if (!gofor(ch, pos, prog))
 57             {
 58                 break;
 59             }
 60         }
 61         // 這里先看做都是其他標示符
 62         id = keys.size();
 63 
 64         // 驗證是否是關鍵字
 65         map<string, int>::const_iterator cit = keys.find(token);//根據string類型的token返回int類型的id賦值給cit
 66         if (cit != keys.end())
 67         {
 68             id = cit->second;//此時是關鍵字，記錄他的id
 69         }
 70     }
 71     // 識別常數
 72     else if ((ch >= '0' && ch <= '9') || ch == '.')
 73     {
 74         while (ch >= '0' && ch <= '9' || ch == '.')
 75         {
 76             token += ch;
 77             if (!gofor(ch, pos, prog))
 78             {
 79                 break;
 80             }
 81         }
 82         id = keys.size() - 1;
 83         int dot_num = 0;
 84         for (string::size_type i = 0; i != token.size(); ++i)
 85         {
 86             if (token[i] == '.')
 87             {
 88                 ++dot_num;
 89             }
 90         }
 91         if (dot_num > 1)
 92         {
 93             id = -1;
 94         }
 95     }
 96     else
 97     {
 98         map<string, int>::const_iterator cit;
 99         switch (ch)
100         {
101         case '-': // - 操作符
102             token += ch;
103             if (gofor(ch, pos, prog))
104             {
105                 if (ch == '-' || ch == '=' || ch == '>') // -- 操作符
106                 {
107                     token += ch;
108                     gofor(ch, pos, prog);
109                 }
110             }
111             cit = keys.find(token);
112             if (cit != keys.end())
113             {
114                 id = cit->second;
115             }
116             break;
117         case ':':
118              token += ch;
119             if (gofor(ch, pos, prog))
120             {
121                 if (ch == '=') // -- 操作符
122                 {
123                     token += ch;
124                     gofor(ch, pos, prog);
125                 }
126             }
127             cit = keys.find(token);
128             if (cit != keys.end())
129             {
130                 id = cit->second;
131             }
132             break;
133 
134         case '=':
135             token += ch;
136             if (gofor(ch, pos, prog))
137             {
138                 if (ch == '=') // !% %= 操作符
139                 {
140                     token += ch;
141                     gofor(ch, pos, prog);
142                 }
143             }
144             cit = keys.find(token);
145             if (cit != keys.end())
146             {
147                 id = cit->second;
148             }
149             break;
150 
151         case '/': // / 操作符
152             token += ch;
153             if (gofor(ch, pos, prog))
154             {
155                 if (ch == '=') // /= 操作符
156                 {
157                     token += ch;
158                     gofor(ch, pos, prog);
159                 }
160                 else if (ch == '/') // 單行注釋
161                 {
162                     token += ch;
163                     ++pos;
164                     while (pos < prog.size())
165                     {
166                         ch = prog[pos];
167                         if (ch == '\n')
168                         {
169                             break;
170                         }
171                         token += ch;
172                         ++pos;
173                     }
174                     if (pos >= prog.size())
175                     {
176                         ;
177                     }
178                     else
179                     {
180                         ;
181                     }
182                     id = keys.size() - 2;
183                     break;
184                 }
185                 else if (ch == '*') // 注釋
186                 {
187                     token += ch;
188                     if (!gofor(ch, pos, prog))
189                     {
190                         token += "\n!!!注釋錯誤!!!";
191                         id = -10;
192                         break;
193                     }
194                     if (pos + 1 >= prog.size())
195                     {
196                         token += ch;
197                         token += "\n!!!注釋錯誤!!!";
198                         id = -10;
199                         break;
200                     }
201                     char xh = prog[pos + 1];
202                     while (ch != '*' || xh != '/')
203                     {
204                         token += ch;
205                         if (ch == '\n')
206                         {
207                             ++row;
208                         }
209                         //++pos;
210                         if (!gofor(ch, pos, prog))
211                         {
212                             token += "\n!!!注釋錯誤!!!";
213                             id = -10;
214                             ret.token = token;
215                             ret.id    = id;
216                             return ret;
217                         }
218                         //ch = prog[pos];
219                         if (pos + 1 >= prog.size())
220                         {
221                             token += ch;
222                             token += "\n!!!注釋錯誤!!!";
223                             id = -10;
224                             ret.token = token;
225                             ret.id    = id;
226                             return ret;
227                         }
228                         xh = prog[pos + 1];
229                     }
230                     token += ch;
231                     token += xh;
232                     pos += 2;
233                     ch = prog[pos];
234                     id = keys.size() - 2;
235                     break;
236                 }
237             }
238             cit = keys.find(token);
239             if (cit != keys.end())
240             {
241                 id = cit->second;
242             }
243             break;
244         case '+':
245             token += ch;
246             cit = keys.find(token);
247             if (cit != keys.end())
248             {
249                 id = cit->second;
250             }
251             gofor(ch, pos, prog);
252             break;
253 
254         case '<':
255             token += ch;
256             if (gofor(ch, pos, prog))
257             {
258                 if (ch == '<')
259                 {
260                     token += ch;
261                     if (gofor(ch, pos, prog))
262                     {
263                         if (ch == '=')
264                         {
265                             token += ch;
266                             gofor(ch, pos, prog);
267                         }
268                     }
269                 }
270                 else if (ch == '=')
271                 {
272                     token += ch;
273                     gofor(ch, pos, prog);
274                 }
275             }
276             cit = keys.find(token);
277             if (cit != keys.end())
278             {
279                 id = cit->second;
280             }
281             break;
282 
283         case '>':
284             token += ch;
285             if (gofor(ch, pos, prog))
286             {
287                 if (ch == '>')
288                 {
289                     token += ch;
290                     if (gofor(ch, pos, prog))
291                     {
292                         if (ch == '=')
293                         {
294                             token += ch;
295                             gofor(ch, pos, prog);
296                         }
297                     }
298                 }
299                 else if (ch == '=')
300                 {
301                     token += ch;
302                     gofor(ch, pos, prog);
303                 }
304             }
305             cit = keys.find(token);
306             if (cit != keys.end())
307             {
308                 id = cit->second;
309             }
310             break;
311          case '(': // / 操作符
312             token += ch;
313             if (gofor(ch, pos, prog))
314 
315             {
316                  if (ch == '*') // 注釋
317                 {
318                     token += ch;
319                     if (!gofor(ch, pos, prog))
320                     {
321                         token += "\n!!!注釋錯誤!!!";
322                         id = -10;
323                         break;
324                     }
325                     if (pos + 1 >= prog.size())
326                     {
327                         token += ch;
328                         token += "\n!!!注釋錯誤!!!";
329                         id = -10;
330                         break;
331                     }
332                     char xh = prog[pos + 1];
333                     while (ch != '*' || xh != ')')
334                     {
335                         token += ch;
336                         if (ch == '\n')
337                         {
338                             ++row;
339                         }
340                         //++pos;
341                         if (!gofor(ch, pos, prog))
342                         {
343                             token += "\n!!!注釋錯誤!!!";
344                             id = -10;
345                             ret.token = token;
346                             ret.id    = id;
347                             return ret;
348                         }
349                         //ch = prog[pos];
350                         if (pos + 1 >= prog.size())
351                         {
352                             token += ch;
353                             token += "\n!!!注釋錯誤!!!";
354                             id = -10;
355                             ret.token = token;
356                             ret.id    = id;
357                             return ret;
358                         }
359                         xh = prog[pos + 1];
360                     }
361                     token += ch;
362                     token += xh;
363                     pos += 2;
364                     ch = prog[pos];
365                     id = keys.size() - 2;
366                     break;
367                 }
368             }
369             cit = keys.find(token);
370             if (cit != keys.end())
371             {
372                 id = cit->second;
373             }
374             break;
375 
376         case '*':
377             token += ch;
378             cit = keys.find(token);
379             if (cit != keys.end())
380             {
381                 id = cit->second;
382             }
383              gofor(ch, pos, prog);
384             break;
385 
386         case ',':
387         case '#':
388         case '.':
389         case ';':
390             token += ch;
391             gofor(ch, pos, prog);
392             //++pos;
393             //ch = prog[pos];
394             cit = keys.find(token);
395             if (cit != keys.end())
396             {
397                 id = cit->second;
398             }
399             break;
400 
401         case '\n':
402             token += "換行";
403             ++pos;
404             ch = prog[pos];
405             id = -2;
406             break;
407         default:
408             token += "錯誤";
409             ++pos;
410             ch = prog[pos];
411             id = -1;
412             break;
413         }
414     }
415     ret.token = token;
416     ret.id    = id;
417 
418     return ret;
419 }
420 
421 void init_keys(const string& file, map<string, int>& keys)//讀取單詞符號和種別碼
422 {
423     ifstream fin(file.c_str());//.c_str返回的是當前字符串的首地址
424     if (!fin)
425     {
426         cerr << file << " doesn't exist!" << endl;//cerr不經過緩沖而直接輸出,一般用於迅速輸出出錯信息
427       //  exit(1);
428     }
429     keys.clear();//清空map對象里面的內容
430     string line;
431     string key;
432     int id;
433     while (getline(fin, line))//這個函數接收兩個參數：一個輸入流對象和一個string對象，getline函數從輸入流的下一行讀取，並保存讀取的內容到string中
434     {
435         istringstream sin(line);//istringstream sin(s);定義一個字符串輸入流的對象sin,並調用sin的復制構造函數，將line中所包含的字符串放入sin 對象中！
436         sin >> key >> id;//讀取里面的字符串每一行一個key id
437         keys[key] = id;
438     }
439 }
440 
441 void read_prog(const string& file, string& prog){//讀取代碼，並追加到prog上
442     ifstream fin(file.c_str());
443     if (!fin)
444     {
445         cerr << file << " error!" << endl;
446       //  exit(2);
447     }
448     prog.clear();
449     string line;
450     while (getline(fin, line))
451     {
452         prog += line + '\n';
453     }
454 }
455 int main()
456 {
457     map<string, int> keys;
458     init_keys("D:/Test/a.txt", keys);
459 
460     string prog;
461     read_prog("D:/Test/b.txt", prog);
462 
463     vector< _2tup > tups;
464     string token, id;
465 
466     string::size_type pos = 0;//size_type屬於string標准庫，作用可看做相當於unsigned·int
467     int row  = 1;
468 
469     _2tup tu;
470     cout << "------------------" << "要分析的代碼如下"<< "---------------" << endl;
471     cout << prog << endl << endl;
472 
473     // prog += "#"; // 標識終止，其實可以檢測 pos 來判別是否終止
474 
475     int no = 0;
476  cout << "------------------" << "分析的結果如下如下"<< "---------------" << endl;
477     do
478     {
479         tu = scanner(prog, pos, keys, row);
480 
481         switch (tu.id)
482         {
483         case -1://返回的是錯誤
484             ++no;
485             cout << no << ": ";
486             cout << "Error in row" << row << "!" << '<' << tu.token<< "," << tu.id << '>' << endl;
487             tups.push_back(tu);
488             break;
489         case -2:
490             ++row;
491             // cout << '<' << tu.token<< "," << tu.id << '>' << endl;
492             break;
493         default:
494             ++no;
495             cout << no << ": ";
496             if(tu.id==28 || tu.id==29){
497                 cout << '<' << tu.id<< "," << tu.token << '>' << endl;
498             }
499             else{
500                 cout << '<' << tu.id<< "," << "-" << '>' << endl;
501             }
502             tups.push_back(tu);
503             break;
504         }
505     } while (pos < prog.size());
506 
507     cout << endl << "--------------------------------結果總行數------------"<<tups.size() << endl;
508     return 0;
509 }

View Code

四：運行截圖

看懂代碼之后，便可以很容易擴展寫出其他語言的詞法分析器。

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 Java語言詞法分析器編譯原理解釋器（一）C語言詞法分析器的實現 Python版C語言詞法分析器 Java語言的詞法分析器的Java實現詞法分析器設計詞法分析器詞法分析器的實現詞法分析器的作用 sizzle分析記錄：詞法分析器(tokenize) java詞法分析器簡單實現