編譯原理-詞法分析器(DFA,C語言描述,可分析C/C++詞法)


 

 

 

 

 

單詞”分類說明 

標識符(Identifier):變量名和函數名(字母或下划線開頭);

關鍵字(Keyword):系統保留字;

運算符(Operator): + - * / % = == != < <= > >= 等;

分隔符(Separator): , ; . ' " ( ) [ ] { } // /* */ #等;

常量(Constant): 字符串或字符常量;

注釋(Note): 注釋不參與編譯。

 

 

 

程序清單:

/*

     ========================================================

                        詞法分析器V1.0

         (DFA,C語言描述,可分析C/C++詞法)

                       Author:Estrong

                          2012.05.10 

    ========================================================

*/

 

/*

說明:

    標識符(Identifier):變量名和函數名(字母或下划線開頭);

    關鍵字(Keyword):具有特定的功能系統保留字,不能移作他用;

    運算符(Operator):   + - * / % = == != < <= > >= 等;

    分隔符(Separator):  , ; . ' " ( ) [ ] { } // /米 米/ #等;

    常量(Constant):字符串或字符常量;

    注釋(Note): 注釋不參與編譯。

*/

#include "stdio.h"

#include "string.h"

 

#define MAX_STR 256

#define MAX_KEYWORDS 62

#define MAX_OPERATORS 12

#define MAX_SEPARATORS 15

 

/* 定義分析狀態 */

#define STA_START 1

#define STA_IDorKEYWORD 2 /*   IDENTIFIER: 標識符   */

#define STA_NUMBER 3      /*   NUMBER:     數字     */

#define STA_NOTE 4        /*   NOTE:        注釋     */

#define STA_CONSTANT 5    /*   CONSTANT:    常量     */

#define STA_DONE  6       /*   DONE:       完成     */

 

/* 定義所屬類型 */

#define TYPE_KEYWORD 1    /*   KEYWORD:    保留字   */

#define TYPE_IDENTIFIER 2 /*   IDENTIFIER: 標識符   */

#define TYPE_NUMBER 3     /*   NUMBER:     數字     */

#define TYPE_NOTE 4       /*   NOTE:        注釋     */

#define TYPE_CONSTANT 5   /*   CONSTANT:    常量     */

#define TYPE_OPERATOR 6   /*   OPERATOR:    運算符   */

#define TYPE_SEPARATOR 7  /*   SEPARATOR:   分隔符   */

#define TYPE_ERROR 8      /*   ERROR:       錯誤     */

#define TYPE_UNKNOWN 9    /*   UNKNOWN:     未知     */

#define TYPE_ENDFILE 10   /*   ENDFILE:     文件結束 */

 

char *Operators[MAX_OPERATORS] = {"+","-","*","/","%","=","==","!=","<","<=",">",">="};

char *Separators[MAX_SEPARATORS] ={",",";",".","\'","\"","(",")","[","]","{","}","//","/*","*/","#"};

char *Keywords[MAX_KEYWORDS] = {"include","define","auto","bool","break","case","catch","char","class",

                                "const","const_cast","continue","default","delete","do","double",

                                "dynamic_cast","else","enum","explicit","extern","false","float","for",

                                "friend","goto","if","inline","int","long","mutable","namespace","new",

                                "operator","private","protected","public","register","reinterpret_cast",

                                "return","short","signed","sizeof","static","static_cast","struct",

                                "switch","template","this","throw","true","try","typedef","typeid",

                                "typename","union","unsigned","using","virtual","void","volatile","while"};

 

/* 是否為運算符 */

int IsOperator(char c)

{

    int i;

    for(i=0;i<MAX_OPERATORS;i++)

        if(Operators[i][0]==c)

            return 1;

    return 0;

}

 

/* 是否為分隔符 */

int IsSeparator(char c)

{

    int i;

    for(i=0;i<MAX_SEPARATORS;i++)

        if(Separators[i][0]==c)

            return 1;

    return 0;

}

 

/* 是否為保留字 */

int IsKeyword(char *str)

{

    int i;

    for(i=0;i<MAX_KEYWORDS;i++)

        if(strcmp(Keywords[i],str)==0)

            return 1;

    return 0;

}

 

/* DONE一次輸出一次 */

void OutputOneDone(FILE *outf,int type,char *str)

{

    if(IsKeyword(str)==1) type=TYPE_KEYWORD;

    switch(type)

    {

        case TYPE_KEYWORD:    fprintf(outf,"       KEYWORD:     ");break;

        case TYPE_IDENTIFIER: fprintf(outf,"       IDENTIFIER:  ");break;

        case TYPE_NUMBER:     fprintf(outf,"       NUMBER:      ");break;

        case TYPE_NOTE:       fprintf(outf,"       NOTE:        ");break;

        case TYPE_CONSTANT:   fprintf(outf,"       CONSTANT:    ");break;

        case TYPE_OPERATOR:   fprintf(outf,"       OPERATOR:    ");break;

        case TYPE_SEPARATOR:  fprintf(outf,"       SEPARATOR:   ");break;

        case TYPE_ERROR:      fprintf(outf,"       ERROR:       ");break;

        case TYPE_UNKNOWN:    fprintf(outf,"       UNKNOWN:     ");break;

        default:break;

    }

    fprintf(outf,"%s\n",str);

}

 

/* DFA詞法分析函數 */

void LexAnalyse(FILE *inf,FILE *outf)

{

   char c;

   char str[MAX_STR];/* 過程字符串 */

   int i;

   int line_no=1;/* 行號 */

   int state,type;

   char flag_limit_one_line;/* 標志為 / * 注釋 范圍是一行 */

   char flag_had_got_dot;/* 用於限定小數中只能有一個小數點 */

   fprintf(outf,"Line %d--------------------------------------\n",line_no);

   while(!feof(inf))

   {

        i=0;

        state=STA_START;

        flag_limit_one_line=0;

        flag_had_got_dot=0;

        while(state!=STA_DONE)

        {

            c=fgetc(inf);

            switch(state)

            {

                case STA_START:

                                if( c==' ' || c=='\t');

                                else if(c=='\n')

                                {

                                    line_no++;

                                    fprintf(outf,"Line %d--------------------------------------\n",line_no);

                                }

                                else if( (c>='a' && c<='z') || (c>='A' && c<='Z') || c=='_')

                                {

                                    state=STA_IDorKEYWORD;

                                    type=TYPE_IDENTIFIER;

                                    str[i]=c; i++;

                                }

                                else if(c>='0' && c<='9')

                                {

                                    state=STA_NUMBER;

                                    type=TYPE_NUMBER;

                                    str[i]=c; i++;

                                }

                                else if(c=='/')

                                {

                                    str[i]=c; i++;

                                    c=fgetc(inf);

                                    if(c=='/')   /*   //注釋    */

                                    {

                                        flag_limit_one_line=1;

                                        state=STA_NOTE;

                                        type=TYPE_NOTE;

                                        str[i]=c; i++;

 

                                    }

                                    else if(c=='*')       /*   / * 注釋開始    */

                                    {

                                        state=STA_NOTE;

                                        type=TYPE_NOTE;

                                        str[i]=c; i++;

                                    }

                                    else

                                    {

                                        state=STA_DONE;

                                        type=TYPE_OPERATOR;

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指針前移1個字節  */

                                        i=1;/* str[1]='\0'; */

                                    }

                                }

                                else if(c=='<' || c=='>')

                                {

                                    state=STA_DONE;

                                    type=TYPE_OPERATOR;

                                    str[0]=c;

                                    c=fgetc(inf);

                                    if(c=='=')

                                    {

                                        str[1]='=';i=2;/* str[2]='\0'    */

                                    }

                                    else/* 暫未考慮<<,>>位移運算符的識別 */

                                    {

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指針前移1個字節  */

                                        i=1;/* str[1]='\0'; */

                                    }

                                }

                                else if(c=='!')

                                {

                                    state=STA_DONE;

                                    str[0]=c;

                                    c=fgetc(inf);

                                    if(c=='=')  /*    是!= */

                                    {

                                        type=TYPE_OPERATOR;

                                        str[1]='=';i=2;/* str[2]='\0'    */

                                    }

                                    else        /*    非!= */

                                    {

                                        type=TYPE_UNKNOWN;

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指針前移1個字節  */

                                        i=1;/* str[1]='\0'; */

                                    }

                                }

                                else if(c=='\"' || c=='\'')

                                {

                                    state=STA_CONSTANT;

                                    type=TYPE_CONSTANT;

                                    str[0]=c; i=1;

                                }

                                else if(IsOperator(c))

                                {

                                    state=STA_DONE;

                                    type=TYPE_OPERATOR;

                                    str[0]=c; i=1;/* str[1]='\0'; */

                                }

                                else if(IsSeparator(c))

                                {

                                    state=STA_DONE;

                                    type=TYPE_SEPARATOR;

                                    str[0]=c; i=1;/* str[1]='\0'; */

                                }

                                else if(c==EOF)

                                {

                                    state=STA_DONE;

                                    type=TYPE_ENDFILE;

                                }

                                else

                                {

                                    state=STA_DONE;

                                    type=TYPE_UNKNOWN;

                                }

                                break;/* case STA */

                case STA_IDorKEYWORD:

                                if((c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9') || c=='_' )

                                {

                                    str[i]=c; i++;

                                }

                                else if(c=='.')

                                {

                                    str[i]=c; i++;

                                    c=fgetc(inf);

                                    if((c>='a' && c<='z') || (c>='A' && c<='Z') || c=='_')

                                    {

                                        str[i]=c; i++;

                                        /* type=TYPE_IDENTIFIER;     */

                                    }

                                    else if(c==' ' || c=='\t' || c=='\n' || IsOperator(c) || IsSeparator(c))

                                    {

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指針前移1個字節  */

                                        state=STA_DONE;

                                        type=TYPE_ERROR;         /*  如 date. 或 date.1 標記為ERROR類型 */

                                    }

                                    else

                                    {

                                        str[i]=c; i++;

                                        type=TYPE_ERROR;

                                    }

                                }

                                else if(c==' ' || c=='\t' || c=='\n' || IsOperator(c) || IsSeparator(c))

                                {

                                    state=STA_DONE;

                                    fseek(inf, -1, SEEK_CUR);/* 文件流指針前移1個字節  */

                                }

                                else

                                {

                                    state=STA_DONE;

                                    type=TYPE_ERROR;

                                }

                                break;/* case STA */

                case STA_NUMBER:

                                if(c>='0' && c<='9')

                                {

                                    str[i]=c; i++;

                                }

                                else if(c=='.')   /* 小數識別  */

                                {

                                    str[i]=c; i++;

                                    c=fgetc(inf);

                                    if(flag_had_got_dot==0)

                                    {

                                    if(c>='0' && c<='9')

                                    {

                                        str[i]=c; i++;

                                        /* type=TYPE_NUMBER; */

                                        flag_had_got_dot=1;

                                    }

                                    else /* if(c==' ' || c=='\t' || c=='\n' || c>='0' && c<='9' || ...) */

                                    {

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指針前移1個字節  */

                                        state=STA_DONE;

                                        type=TYPE_ERROR;         /*  如 date. 或 date.1 標記為ERROR類型 */

                                    }

                                    }

                                    else

                                    {

                                        type=TYPE_ERROR;

                                        str[i]=c; i++;

                                    }

                                }

                                else if(c==' ' || c=='\t' || c=='\n' || IsOperator(c) || IsSeparator(c))

                                {

                                    state=STA_DONE;

                                    fseek(inf, -1, SEEK_CUR);/* 文件流指針前移1個字節  */

                                }

                                else

                                {

                                    str[i]=c; i++;

                                    type=TYPE_ERROR;

                                }

                                break;/* case STA */

                case STA_NOTE:

                                if(flag_limit_one_line==1)   /*      是/ * 注釋, 限定一行 */

                                {

                                    if(c=='\n')

                                    {

                                        state=STA_DONE;

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指針前移1個字節 */

                                    }

                                    else

                                    {

                                        str[i]=c; i++;

                                    }

                                }

                                else /*      是  / *   注釋 */

                                {

                                    if(feof(inf))

                                    {

                                        state=STA_DONE;

                                        type=TYPE_ERROR;

                                    }

                                    else if(c=='\n')

                                    {

                                        line_no++;

                                        str[i]=c; i++;

                                    }

                                    else if(c=='*')

                                    {

                                        str[i]=c; i++;

                                        c=fgetc(inf);

                                        if(c=='/')

                                        {

                                            state=STA_DONE;

                                            str[i]=c; i++;

                                        }

                                        else

                                        {

                                            if(feof(inf))

                                            {

                                                state=STA_DONE;

                                                type=TYPE_ERROR;

                                            }

                                            fseek(inf, -1, SEEK_CUR);/* 文件流指針前移1個字節  */

                                        }

                                    }

                                    else

                                    {

                                        str[i]=c; i++;

                                    }

                                }

                                break;/* case STA */

                case STA_CONSTANT:

                                    if(feof(inf))

                                    {

                                        state=STA_DONE;

                                        type=TYPE_ERROR;

                                    }

                                    else if(c=='\n')

                                    {

                                        line_no++;

                                        str[i]=c; i++;

                                    }

                                    else if(c=='\"' || c=='\'')

                                    {

                                        state=STA_DONE;

                                        str[i]=c; i++;

                                    }

                                    else if(c=='\\')

                                    {

                                        str[i]=c; i++;

                                        c=fgetc(inf);

                                        str[i]=c; i++;

                                    }

                                    else

                                    {

                                        str[i]=c; i++;

                                    }

                                    break;/* case STA */

                case STA_DONE:  break;/* case STA */

                default:        break;/* case STA */

            }

        }/* state=STA_DONE */

        str[i]='\0';

        OutputOneDone(outf,type,str);/* DONE一次輸出一次 */

   }/* feof(inf) */

}

 

/* 主函數 */

main()

{

    FILE *input,*output;

    if((input=fopen("input.c","r"))==NULL)

    {

        printf("Cannot find the file!\nStrike any key to exit!\n");

        system("pause");

        exit(1);

    }

    else

    {

        output=fopen("output.c","w");

        LexAnalyse(input,output);

        fprintf(output,"----------------------------------------------END OF FILE!\n");

        fclose(input);

        fclose(output);

        printf("Lexical Analyzer has finished the analyzation!\nFor more information please see the file output.txt.\n");

        system("pause");

    }

}

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM