一、實驗目標
從左至右逐個字符地對源程序進行掃描,產生一個個的單詞符號,把作為字符串的源程序改造成為單詞符號串的中間程序。詞法分析器的功能是輸入源程序,輸出單詞符號,並保存token的內容。程序語言的單詞符號分為以下六種:iT標識符、cT字符、sT字符串、CT常數、KT關鍵字、PT界符。
二、實驗內容
2.1概要設計
詞法分析器,實現固定語法的識別,就要明白什么是詞法分析器,它的功能是什么。詞法分析是編譯程序進行編譯時第一個要進行的任務,主要是對源程序進行編譯預處理(去除注釋、無用的回車換行找到包含的文件等)之后,對整個源程序進行分解,分解成一個個單詞,這些單詞有且只有6類,分別是標識符、保留字、常數、字符、字符串、界符。以便為下面的語法分析和語義分析做准備。詞法分析是所有后續工作的基礎,如果這一步出錯,比如明明是‘<=’卻被拆分成‘<’和‘=’就會對下文造成不可挽回的影響。因此,在進行詞法分析的時候一定要定義好這6種符號的集合。
詞法分析器的設計包括:讀取文件里的內容、有限自動機的設計、將讀取的token序列存入字典之后按照文件里單詞的順序輸出。其中有限自動機的設計為詞法分析器的核心,實現了對預處理之后的文件中的token的識別。有限自動機通過分析當前讀入的字符,跳轉到下一狀態,直到進入終止狀態。並且根據當前token的終止狀態,判斷出token所屬的類型碼,存入相應的符號類型表。
2.2數據結構
(1)Dict為token序列建立字典,鍵是分割出的單詞,值是單詞所屬的類型碼
(2)詞法分析類Scanner:
表 3 Scanner的數據成員
數據成員 |
|
countt, IDentifierTbl[1000][20] |
i標識符表和其對應的計數器 |
countct, SingleChar[1000] |
c字符表和其對應的計數器 |
counts, StringChar[1000][20] |
S字符串表和其對應的計數器 |
countc, ConstantTbl[1000][20] |
CT常數表和其對應的計數器 |
operatorOrDelimiter[36][10] |
P界符表 |
reserveWord[32][20] |
K關鍵字表 |
resourceProject[10000] |
輸入源程序的存放處 |
Token[] |
每次分析出來的單詞 |
|
|
表 4 關鍵函數
成員函數 |
|
int searchReserve(char reserve[][20],char s[]) |
搜索解析出來的單詞在二維字符數組中是否出現 |
int searchRReserve(char reserve[],char s) |
搜索解析出來的單詞在一維字符數組中是否出現 |
bool IsLetter(char letter) bool IsDigit(char digit) |
判斷當前字符是否是數字或字符,返回布爾型 |
void filterResource(char r[],int pProject) |
過濾掉注釋的部分,得到一個純凈的代碼 |
void Scanner(int &syn,char resourceProject[],char token[],int &pProject) |
生成token序列,同時將序列輸出到文件里 |
int main() |
運行主函數,進行文件數據輸入 |
初始化標識符表,字符表,字符串表,字符表,界符表,關鍵字表和定義存儲字符數組表稱全局變量,所以不需要初始化函數,變量的調用也方便;
searchRReserve(char reserve[],char s), searchReserve(char reserve[][20],char s[])函數用於避免出現重復,既每出現一個token類別都到原有的類別數組中搜索一下。;輸出在Scanner()函數中直接傳輸到文件。
2.3 流程圖
圖 3 token序列識別流程圖
三、源程序代碼:(加入注釋)
#include <iostream>
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
using namespace std;
//保留字表
static char reserveWord[32][20] =
{
"auto", "break", "case", "char", "const", "continue",
"default", "do", "double", "else", "enum", "extern",
"float", "for", "goto", "if", "int", "long",
"register", "return", "short", "signed", "sizeof", "static",
"struct", "switch", "typedef", "union", "unsigned", "void",
"volatile", "while"
};
//界運算符
static char operatorOrDelimiter[36][10]=
{
"+","-","*","/","<","<=",">",">=","=","==",
"!=",";","(",")","^",",","\"","\'","#","&",
"&&","|","||","%","~","<<",">>","[","]","{",
"}","\\",".","\?",":","!"
};
static char IDentifierTbl[1000][20]= {""}; //標識符表i
static char SingleChar[1000]= {""}; //單個字符表ct
static char StringChar[1000][20]= {""}; //字符串表S
static char ConstantTbl[1000][20]= {""}; //常數表C
int countct=0;//單個字符表ct
int countc=0;//常數表C
int counts=0; //字符串表S
int countt=0;//標識符表i
char resourceProject[10000];//輸入的源程序存放處,最大可以存放10000個字符。
//查找保留字,若成功查找,則返回種別碼
//否則返回-1,代表查找不成功,即為標識符
int searchReserve(char reserveWord[ ][20], char s[]);
bool IsLetter(char letter);
bool IsDigit(char digit);
void filterResource(char r[],int pProject);
void Scanner(int &syn,char resourceProject[],char token[],int &pProject);
int searchReserve(char reserve[][20],char s[])
{
for(int i=0; reserve[i][0]!='\0'; i++)
{
if(strcmp(reserve[i],s)==0)
return i+1;
}
return -1;
}
int searchRReserve(char reserve[],char s)
{
for(int i=0; reserve[i]!='\0'; i++)
{
if(reserve[i]==s)
return i+1;
}
return -1;
}
bool IsLetter(char letter)
{
if(letter>='a'&&letter<='z'||letter>='A'&&letter<='Z'||letter=='_')
return true;
else
return false;
}
bool IsDigit(char digit)
{
if(digit>='0'&&digit<='9')
return true;
else
return false;
}
void filterResource(char r[],int pProject)
{
char tempString[10000];
int count=0;
for(int i=0; i<=pProject; i++)
{
if(r[i]=='/'&&r[i+1]=='/')
while(r[i]!='\n')
i++;
if(r[i]=='/'&&r[i+1]=='*')
{
i+=2;
while(r[i]!='*'||r[i+1]!='/')
{
i++;
if(r[i]=='$')
{
cout<<"注釋出錯"<<endl;
exit(0);
}
}
i+=2;
}
if(r[i]!='\n'&&r[i]!='\t'&&r[i]!='\v'&&r[i]!='\r')
{
tempString[count++]=r[i];
}
}
tempString[count]='\0';
strcpy(r,tempString);
}
void Scanner(int &syn,char resourceProject[],char token[],int &pProject)
{
FILE *fp1;
if((fp1=fopen("E:\\2017.txt","at"))==NULL)
{
cout<<"cam not open";
exit(0);
}
int i,count=0;//count用來做token指示器,收集有用字符
char ch;
ch=resourceProject[pProject];
while(ch==' ')
{
pProject++;
ch=resourceProject[pProject];
}
for(i=0; i<20; i++)
{
token[i]='\0';//收集前先清零
}
if(IsLetter(resourceProject[pProject]))
{
token[count++]=resourceProject[pProject];
pProject++;
while(IsLetter(resourceProject[pProject])||IsDigit(resourceProject[pProject]))
{
token[count++]=resourceProject[pProject];
pProject++;
}
token[count]='\0';
syn=searchReserve(reserveWord,token);
if(syn!=-1)
{
cout<<"{"<<"k"<<","<<syn<<","<<reserveWord[syn-1]<<"}"<<endl;
fprintf(fp1, "{k , %d ,%s }\n", syn,reserveWord[syn-1]);
}
if(syn==-1)
{
syn=searchReserve(IDentifierTbl,token);
if(syn==-1)
{
strcpy(IDentifierTbl[countt++],token);
syn=countt;
}
cout<<"{"<<"i"<<","<<syn<<","<<IDentifierTbl[syn-1]<<"}"<<endl;
fprintf(fp1, "{i , %d ,%s }\n", syn,IDentifierTbl[syn-1]);
}
}
else if(IsDigit(resourceProject[pProject]))
{
while(IsDigit(resourceProject[pProject]))
{
token[count++]=resourceProject[pProject];
pProject++;
}
token[count]='\0';
syn=searchReserve(ConstantTbl,token);
if(syn==-1)
{
strcpy(ConstantTbl[countc++],token);
syn=countc;
}
cout<<"{"<<"c"<<","<<syn<<","<<ConstantTbl[syn-1]<<"}"<<endl;
fprintf(fp1, "{c , %d ,%s }\n", syn,ConstantTbl[syn-1]);
}
else if(resourceProject[pProject]=='\'')
{
pProject++;
if(IsLetter(resourceProject[pProject]))
{
pProject++;
if(resourceProject[pProject]=='\'')
{
pProject--;
syn=searchRReserve(SingleChar,resourceProject[pProject]);
if(syn==-1)
{
SingleChar[countct++]=resourceProject[pProject];
syn=countct;
}
}
}
cout<<"{"<<"CT"<<","<<syn<<","<<resourceProject[syn-1]<<"}"<<endl;
fprintf(fp1, "{CT , %d ,%c }\n", syn,resourceProject[syn-1]);
pProject+=2;
}
else if(resourceProject[pProject]=='"')
{
pProject++;
while(resourceProject[pProject]!='"')
{
token[count++]=resourceProject[pProject];
pProject++;
}
token[count]='\0';
syn=searchReserve(StringChar,token);
if(syn==-1)
{
strcpy(StringChar[counts++],token);
syn=counts;
}
cout<<"{"<<"S"<<","<<syn<<","<<StringChar[syn-1]<<"}"<<endl;
fprintf(fp1, "{S , %d ,%s }\n", syn,StringChar[syn-1]);
pProject++;
}
else if (ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == ';' || ch == '(' || ch == ')' || ch == '^'
|| ch == ',' || ch == '\"' || ch == '\'' || ch == '~' || ch == '#' || ch == '%' || ch == '['
|| ch == ']' || ch == '{' || ch == '}' || ch == '\\' || ch == '.' || ch == '\?' || ch == ':')
{
//若為運算符或者界符,查表得到結果
token[0] = resourceProject[pProject];
token[1] = '\0';//形成單字符串
for (i = 0; i<36; i++)
{
//查運算符界符表
if (strcmp(token, operatorOrDelimiter[i]) == 0)
{
syn = i+1;//獲得種別碼,使用了一點技巧,使之呈線性映射
break;//查到即推出
}
}
cout<<"{"<<"p"<<","<<syn<<","<<operatorOrDelimiter[syn-1]<<"}"<<endl;
fprintf(fp1, "{p , %d ,%s }\n", syn,operatorOrDelimiter[syn-1]);
pProject++;//指針下移,為下一掃描做准備
}
else if(resourceProject[pProject]=='<')
{
//<,<=,<<
pProject++;
if(resourceProject[pProject]=='=') syn=6;
else if(resourceProject[pProject]=='<')
syn=26;
else
{
pProject--;
syn=5;
}
cout<<"{"<<"p"<<","<<syn<<","<<operatorOrDelimiter[syn-1]<<"}"<<endl;
fprintf(fp1, "{p , %d ,%s }\n", syn,operatorOrDelimiter[syn-1]);
pProject++;
}
else if(resourceProject[pProject]=='>')
{
//>,>=,>>
pProject++;
if(resourceProject[pProject]=='=') syn=8;
else if(resourceProject[pProject]=='>') syn=27;
else
{
pProject--;
syn=7;
}
cout<<"{"<<"p"<<","<<syn<<","<<operatorOrDelimiter[syn-1]<<"}"<<endl;
fprintf(fp1, "{p , %d ,%s }\n", syn,operatorOrDelimiter[syn-1]);
pProject++;
}
else if(resourceProject[pProject]=='=')
{
//=,==
pProject++;
if(resourceProject[pProject]=='=') syn=10;
else
{
pProject--;
syn=9;
}
cout<<"{"<<"p"<<","<<syn<<","<<operatorOrDelimiter[syn-1]<<"}"<<endl;
fprintf(fp1, "{p , %d ,%s }\n", syn,operatorOrDelimiter[syn-1]);
pProject++;
}
else if(resourceProject[pProject]=='!')
{
// !,!=
pProject++;
if(resourceProject[pProject]=='=') syn=11;
else
{
pProject--;
syn=36;
}
cout<<"{"<<"p"<<","<<syn<<","<<operatorOrDelimiter[syn-1]<<"}"<<endl;
fprintf(fp1, "{p , %d ,%s }\n", syn,operatorOrDelimiter[syn-1]);
pProject++;
}
else if(resourceProject[pProject]=='&')
{
//&,&&
pProject++;
if(resourceProject[pProject]=='&') syn=21;
else
{
pProject--;
syn=20;
}
cout<<"{"<<"p"<<","<<syn<<","<<operatorOrDelimiter[syn-1]<<"}"<<endl;
fprintf(fp1, "{p , %d ,%s }\n", syn,operatorOrDelimiter[syn-1]);
pProject++;
}
else if(resourceProject[pProject]=='|')
{
//|,||
pProject++;
if(resourceProject[pProject]=='|') syn=23;
else
{
pProject--;
syn=22;
}
cout<<"{"<<"p"<<","<<syn<<","<<operatorOrDelimiter[syn-1]<<"}"<<endl;
fprintf(fp1, "{p , %d ,%s }\n", syn,operatorOrDelimiter[syn-1]);
pProject++;
}
else if(resourceProject[pProject]=='$')
{
//$
syn=0;
}
else
{
cout<<"error: no exist "<<resourceProject[pProject]<<endl;
exit(0);
}
fclose(fp1);
}
int main()
{
char token[20]= {0}; //每次掃描的時候存儲已經掃描的結果。
int syn=-1;//syn即為種別碼,約定‘$’的種別碼為0,為整個源程序的結束符號一旦掃描到這個字符代表掃描結束
int pProject = 0;//源程序指針,始終指向當前源程序待掃描位置。
FILE *fp;
if((fp=fopen("E:\\2018.txt","r"))==NULL)
{
cout<<"cam not open";
exit(0);
}
resourceProject[pProject]=fgetc(fp);
while(resourceProject[pProject]!='$')
{
pProject++;
resourceProject[pProject]=fgetc(fp);
}
resourceProject[++pProject]='\0';
fclose(fp);
cout<<endl<<"源程序為"<<endl;
cout<<resourceProject<<endl;
//過濾
filterResource(resourceProject, pProject);
cout<<endl<<"過濾之后"<<endl;
cout<<resourceProject<<endl;
pProject=0;
while(syn!=0)
{
Scanner(syn,resourceProject,token,pProject);
}
return 0;
}
四、程序運行結果:(截屏)
五:畫蛇添足
有什么問題歡迎大家互相學習討論~~