詞法分析-----詞法掃描器的設計實現


一、實驗目標

 

從左至右逐個字符地對源程序進行掃描,產生一個個的單詞符號,把作為字符串的源程序改造成為單詞符號串的中間程序。詞法分析器的功能是輸入源程序,輸出單詞符號,並保存token的內容。程序語言的單詞符號分為以下六種:iT標識符、cT字符、sT字符串、CT常數、KT關鍵字、PT界符。

二、實驗內容

2.1概要設計

 

詞法分析器,實現固定語法的識別,就要明白什么是詞法分析器,它的功能是什么。詞法分析是編譯程序進行編譯時第一個要進行的任務,主要是對源程序進行編譯預處理(去除注釋、無用的回車換行找到包含的文件等)之后,對整個源程序進行分解,分解成一個個單詞,這些單詞有且只有6類,分別是標識符、保留字、常數、字符、字符串、界符。以便為下面的語法分析和語義分析做准備。詞法分析是所有后續工作的基礎,如果這一步出錯,比如明明是<=’卻被拆分成‘<’和‘=’就會對下文造成不可挽回的影響。因此,在進行詞法分析的時候一定要定義好這6種符號的集合。

詞法分析器的設計包括:讀取文件里的內容、有限自動機的設計、將讀取的token序列存入字典之后按照文件里單詞的順序輸出。其中有限自動機的設計為詞法分析器的核心,實現了對預處理之后的文件中的token的識別。有限自動機通過分析當前讀入的字符,跳轉到下一狀態,直到進入終止狀態。並且根據當前token的終止狀態,判斷出token所屬的類型碼,存入相應的符號類型表。

  

 

 

 


  

2.2數據結構

1Dicttoken序列建立字典,鍵是分割出的單詞,值是單詞所屬的類型碼

2)詞法分析類Scanner:

 

3 Scanner的數據成員

數據成員

countt,  IDentifierTbl[1000][20]

i標識符表和其對應的計數器

countct, SingleChar[1000]

c字符表和其對應的計數器

counts, StringChar[1000][20]

S字符串表和其對應的計數器

countc, ConstantTbl[1000][20]

CT常數表和其對應的計數器

operatorOrDelimiter[36][10]

P界符表

reserveWord[32][20]

K關鍵字表

resourceProject[10000]

輸入源程序的存放處

Token[]

每次分析出來的單詞

 

 

 

4 關鍵函數

成員函數

int searchReserve(char reserve[][20],char s[])

搜索解析出來的單詞在二維字符數組中是否出現

int searchRReserve(char reserve[],char s)

搜索解析出來的單詞在一維字符數組中是否出現

bool IsLetter(char letter)

bool IsDigit(char digit)

判斷當前字符是否是數字或字符,返回布爾型

void filterResource(char r[],int pProject)

過濾掉注釋的部分,得到一個純凈的代碼

void Scanner(int &syn,char resourceProject[],char token[],int &pProject)

生成token序列,同時將序列輸出到文件里

int main()

運行主函數,進行文件數據輸入

 

  初始化標識符表,字符表,字符串表,字符表,界符表,關鍵字表和定義存儲字符數組表稱全局變量,所以不需要初始化函數,變量的調用也方便

searchRReserve(char reserve[],char s), searchReserve(char reserve[][20],char s[])函數用於避免出現重復,既每出現一個token類別都到原有的類別數組中搜索一下。輸出在Scanner()函數中直接傳輸到文件。

 

2.3 流程圖

 

3 token序列識別流程圖

 

 

三、源程序代碼:(加入注釋)

#include <iostream>

#include<stdio.h>

#include<string.h>

#include<stdlib.h>

 

 

using namespace std;

//保留字表

static char reserveWord[32][20] =

{

    "auto", "break", "case", "char", "const", "continue",

    "default", "do", "double", "else", "enum", "extern",

    "float", "for", "goto", "if", "int", "long",

    "register", "return", "short", "signed", "sizeof", "static",

    "struct", "switch", "typedef", "union", "unsigned", "void",

    "volatile", "while"

};

//界運算符

static char operatorOrDelimiter[36][10]=

{

    "+","-","*","/","<","<=",">",">=","=","==",

    "!=",";","(",")","^",",","\"","\'","#","&",

    "&&","|","||","%","~","<<",">>","[","]","{",

    "}","\\",".","\?",":","!"

};

static  char IDentifierTbl[1000][20]= {""}; //標識符表i

static  char SingleChar[1000]= {""}; //單個字符表ct

static  char StringChar[1000][20]= {""}; //字符串表S

static  char ConstantTbl[1000][20]= {""}; //常數表C

int countct=0;//單個字符表ct

int countc=0;//常數表C

int counts=0; //字符串表S

int countt=0;//標識符表i

char resourceProject[10000];//輸入的源程序存放處,最大可以存放10000個字符。

 

//查找保留字,若成功查找,則返回種別碼

//否則返回-1,代表查找不成功,即為標識符

int searchReserve(char reserveWord[ ][20], char s[]);

 

bool IsLetter(char letter);

 

bool IsDigit(char digit);

 

void filterResource(char r[],int pProject);

 

void Scanner(int &syn,char resourceProject[],char token[],int &pProject);

 

int searchReserve(char reserve[][20],char s[])

{

    for(int i=0; reserve[i][0]!='\0'; i++)

    {

        if(strcmp(reserve[i],s)==0)

            return i+1;

    }

    return -1;

}

 

int searchRReserve(char reserve[],char s)

{

    for(int i=0; reserve[i]!='\0'; i++)

    {

        if(reserve[i]==s)

            return i+1;

    }

    return -1;

}

bool IsLetter(char letter)

{

    if(letter>='a'&&letter<='z'||letter>='A'&&letter<='Z'||letter=='_')

        return true;

    else

        return false;

}

 

bool IsDigit(char digit)

{

    if(digit>='0'&&digit<='9')

        return true;

    else

        return false;

}

void filterResource(char r[],int pProject)

{

    char tempString[10000];

    int count=0;

    for(int i=0; i<=pProject; i++)

    {

        if(r[i]=='/'&&r[i+1]=='/')

            while(r[i]!='\n')

                i++;

        if(r[i]=='/'&&r[i+1]=='*')

        {

            i+=2;

            while(r[i]!='*'||r[i+1]!='/')

            {

                i++;

                if(r[i]=='$')

                {

                    cout<<"注釋出錯"<<endl;

                    exit(0);

                }

            }

            i+=2;

        }

        if(r[i]!='\n'&&r[i]!='\t'&&r[i]!='\v'&&r[i]!='\r')

        {

            tempString[count++]=r[i];

        }

    }

    tempString[count]='\0';

    strcpy(r,tempString);

}

 

void Scanner(int &syn,char resourceProject[],char token[],int &pProject)

{

    FILE *fp1;

    if((fp1=fopen("E:\\2017.txt","at"))==NULL)

    {

        cout<<"cam not open";

        exit(0);

    }

 

    int i,count=0;//count用來做token指示器,收集有用字符

    char ch;

    ch=resourceProject[pProject];

    while(ch==' ')

    {

        pProject++;

        ch=resourceProject[pProject];

    }

    for(i=0; i<20; i++)

    {

        token[i]='\0';//收集前先清零

    }

    if(IsLetter(resourceProject[pProject]))

    {

        token[count++]=resourceProject[pProject];

        pProject++;

        while(IsLetter(resourceProject[pProject])||IsDigit(resourceProject[pProject]))

        {

            token[count++]=resourceProject[pProject];

            pProject++;

        }

        token[count]='\0';

        syn=searchReserve(reserveWord,token);

        if(syn!=-1)

        {

            cout<<"{"<<"k"<<","<<syn<<","<<reserveWord[syn-1]<<"}"<<endl;

            fprintf(fp1, "{k   ,   %d   ,%s }\n", syn,reserveWord[syn-1]);

        }

 

        if(syn==-1)

        {

 

            syn=searchReserve(IDentifierTbl,token);

            if(syn==-1)

            {

                strcpy(IDentifierTbl[countt++],token);

                syn=countt;

            }

            cout<<"{"<<"i"<<","<<syn<<","<<IDentifierTbl[syn-1]<<"}"<<endl;

            fprintf(fp1, "{i   ,   %d   ,%s }\n", syn,IDentifierTbl[syn-1]);

        }

    }

    else if(IsDigit(resourceProject[pProject]))

    {

        while(IsDigit(resourceProject[pProject]))

        {

            token[count++]=resourceProject[pProject];

            pProject++;

        }

        token[count]='\0';

        syn=searchReserve(ConstantTbl,token);

        if(syn==-1)

        {

            strcpy(ConstantTbl[countc++],token);

            syn=countc;

        }

        cout<<"{"<<"c"<<","<<syn<<","<<ConstantTbl[syn-1]<<"}"<<endl;

        fprintf(fp1, "{c   ,   %d   ,%s }\n", syn,ConstantTbl[syn-1]);

    }

    else if(resourceProject[pProject]=='\'')

    {

        pProject++;

        if(IsLetter(resourceProject[pProject]))

        {

            pProject++;

            if(resourceProject[pProject]=='\'')

            {

                pProject--;

                syn=searchRReserve(SingleChar,resourceProject[pProject]);

                if(syn==-1)

                {

                    SingleChar[countct++]=resourceProject[pProject];

                    syn=countct;

                }

            }

        }

        cout<<"{"<<"CT"<<","<<syn<<","<<resourceProject[syn-1]<<"}"<<endl;

        fprintf(fp1, "{CT   ,   %d  ,%c }\n", syn,resourceProject[syn-1]);

        pProject+=2;

    }

    else if(resourceProject[pProject]=='"')

    {

        pProject++;

        while(resourceProject[pProject]!='"')

        {

            token[count++]=resourceProject[pProject];

            pProject++;

        }

        token[count]='\0';

        syn=searchReserve(StringChar,token);

        if(syn==-1)

        {

            strcpy(StringChar[counts++],token);

            syn=counts;

        }

        cout<<"{"<<"S"<<","<<syn<<","<<StringChar[syn-1]<<"}"<<endl;

        fprintf(fp1, "{S   ,   %d   ,%s }\n", syn,StringChar[syn-1]);

        pProject++;

    }

    else if (ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == ';' || ch == '(' || ch == ')' || ch == '^'

             || ch == ',' || ch == '\"' || ch == '\'' || ch == '~' || ch == '#' || ch == '%' || ch == '['

             || ch == ']' || ch == '{' || ch == '}' || ch == '\\' || ch == '.' || ch == '\?' || ch == ':')

    {

        //若為運算符或者界符,查表得到結果

        token[0] = resourceProject[pProject];

        token[1] = '\0';//形成單字符串

        for (i = 0; i<36; i++)

        {

            //查運算符界符表

            if (strcmp(token, operatorOrDelimiter[i]) == 0)

            {

                syn = i+1;//獲得種別碼,使用了一點技巧,使之呈線性映射

                break;//查到即推出

            }

        }

        cout<<"{"<<"p"<<","<<syn<<","<<operatorOrDelimiter[syn-1]<<"}"<<endl;

        fprintf(fp1, "{p   ,   %d   ,%s }\n", syn,operatorOrDelimiter[syn-1]);

        pProject++;//指針下移,為下一掃描做准備

    }

    else if(resourceProject[pProject]=='<')

    {

        //<,<=,<<

        pProject++;

        if(resourceProject[pProject]=='=') syn=6;

        else if(resourceProject[pProject]=='<')

            syn=26;

        else

        {

            pProject--;

            syn=5;

        }

        cout<<"{"<<"p"<<","<<syn<<","<<operatorOrDelimiter[syn-1]<<"}"<<endl;

        fprintf(fp1, "{p   ,   %d   ,%s }\n", syn,operatorOrDelimiter[syn-1]);

        pProject++;

    }

    else if(resourceProject[pProject]=='>')

    {

        //>,>=,>>

        pProject++;

        if(resourceProject[pProject]=='=') syn=8;

        else if(resourceProject[pProject]=='>') syn=27;

        else

        {

            pProject--;

            syn=7;

        }

        cout<<"{"<<"p"<<","<<syn<<","<<operatorOrDelimiter[syn-1]<<"}"<<endl;

        fprintf(fp1, "{p   ,   %d   ,%s }\n", syn,operatorOrDelimiter[syn-1]);

        pProject++;

    }

    else if(resourceProject[pProject]=='=')

    {

        //=,==

        pProject++;

        if(resourceProject[pProject]=='=') syn=10;

        else

        {

            pProject--;

            syn=9;

        }

        cout<<"{"<<"p"<<","<<syn<<","<<operatorOrDelimiter[syn-1]<<"}"<<endl;

        fprintf(fp1, "{p   ,   %d   ,%s }\n", syn,operatorOrDelimiter[syn-1]);

        pProject++;

    }

    else if(resourceProject[pProject]=='!')

    {

        // !,!=

        pProject++;

        if(resourceProject[pProject]=='=') syn=11;

        else

        {

            pProject--;

            syn=36;

        }

       cout<<"{"<<"p"<<","<<syn<<","<<operatorOrDelimiter[syn-1]<<"}"<<endl;

        fprintf(fp1, "{p   ,   %d   ,%s }\n", syn,operatorOrDelimiter[syn-1]);

        pProject++;

    }

 

    else if(resourceProject[pProject]=='&')

    {

        //&,&&

        pProject++;

        if(resourceProject[pProject]=='&') syn=21;

        else

        {

            pProject--;

            syn=20;

        }

        cout<<"{"<<"p"<<","<<syn<<","<<operatorOrDelimiter[syn-1]<<"}"<<endl;

        fprintf(fp1, "{p   ,   %d   ,%s }\n", syn,operatorOrDelimiter[syn-1]);

        pProject++;

    }

    else if(resourceProject[pProject]=='|')

    {

        //|,||

        pProject++;

        if(resourceProject[pProject]=='|') syn=23;

        else

        {

            pProject--;

            syn=22;

        }

        cout<<"{"<<"p"<<","<<syn<<","<<operatorOrDelimiter[syn-1]<<"}"<<endl;

        fprintf(fp1, "{p   ,   %d   ,%s }\n", syn,operatorOrDelimiter[syn-1]);

        pProject++;

    }

 

    else if(resourceProject[pProject]=='$')

    {

        //$

        syn=0;

    }

    else

    {

        cout<<"error: no exist "<<resourceProject[pProject]<<endl;

        exit(0);

    }

    fclose(fp1);

}

int main()

{

    char token[20]= {0}; //每次掃描的時候存儲已經掃描的結果。

    int syn=-1;//syn即為種別碼,約定‘$’的種別碼為0,為整個源程序的結束符號一旦掃描到這個字符代表掃描結束

    int pProject = 0;//源程序指針,始終指向當前源程序待掃描位置。

    FILE *fp;

    if((fp=fopen("E:\\2018.txt","r"))==NULL)

    {

        cout<<"cam not open";

        exit(0);

    }

    resourceProject[pProject]=fgetc(fp);

    while(resourceProject[pProject]!='$')

    {

        pProject++;

        resourceProject[pProject]=fgetc(fp);

    }

    resourceProject[++pProject]='\0';

    fclose(fp);

    cout<<endl<<"源程序為"<<endl;

    cout<<resourceProject<<endl;

    //過濾

    filterResource(resourceProject, pProject);

    cout<<endl<<"過濾之后"<<endl;

    cout<<resourceProject<<endl;

    pProject=0;

 

    while(syn!=0)

    {

        Scanner(syn,resourceProject,token,pProject);

    }

 

    return 0;

}

四、程序運行結果:(截屏)

 

 

五:畫蛇添足


 

有什么問題歡迎大家互相學習討論~~


 

 

 

 

 

 

 

 

 

 

 

 

 

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM