PASCAL語言子集的詞法、語法分析器之實現


針對簡單的文法(PASCAL語言子集),制作相應的詞法分析器和遞歸下降的語法分析器。

       文法要求如下:

1、  關鍵字、標識符、數字等:

1.begin         2.if             3.then         4.while       5.do            6.end          10.標識符           11.數字

13.+             14.-            15.*            16./             17.:             18.:=          20.<                     21.<>

22.<=          23.>           24.>=         25.=           26.;             27.(            28.)

2、  文法規則:

程序 → begin 語句串 end

語句串 → 語句 { ; 語句 }

語句 → 賦值語句 | 條件語句 | 循環語句

賦值語句 → 變量 := 表達式

條件語句 → if 條件 then ( 語句 | 程序 )

循環語句 → while 條件 do ( 語句 | 程序 )

表達式 → 項 { + 項 | - 項 }

條件 → 表達式 關系符 表達式

關系符 → < | <> | <= | > | >= | =

項 → 因子 { * 因子 | / 因子 }

因子 → 變量 | 數字 | ( 表達式 )

變量 → 標識符


一、  詞法分析器

詞法分析器的任務是清除源文件中多余的空格、換行、制表符等,識別文法符號。按順序輸出識別的標識符及其種別編號,供語法分析器調用。

 

代碼如下:

 

#include<stdio.h>

#include<string.h>

#include<stdlib.h>

#define BOOL int

#define TRUE 1

#define FALSE 0

#define MAXSIZE 50

 

 

typedef char datatype;

 

typedef struct                              /*定義緩沖隊列*/

{

  datatype data[MAXSIZE*2];

  int front,rear;

}Queue;

void setnull(Queue *q)                      /*隊列初始化*/

{

  q->front = MAXSIZE*2 - 1;

  q->rear = MAXSIZE*2 - 1;

}

BOOL empty(Queue *q)                   /*判隊空*/

{

  if(q->front==q->rear)

       return TRUE;

  return FALSE;

}

BOOL full(Queue *q)                         /*判隊滿*/

{

  if(q->front == (q->rear+1) % (MAXSIZE*2))

       return TRUE;

  return FALSE;

}

int quantity(Queue *q)                      /*求隊列中元素個數*/

{

  int n;

  n = q->rear - q->front;

    if(n<0)

       n += MAXSIZE*2;

  return n;

}

datatype front(Queue *q)               /*取隊頭元素*/

{

  int n;

  if(empty(q))

       return 0;

  n = (q->front+1) % (MAXSIZE*2);

  return q->data[n];

}

BOOL enqueue(Queue *q,datatype x)      /*入隊*/

{

  if(full(q))

       return FALSE;

  q->rear = ++q->rear % (MAXSIZE*2);

  q->data[q->rear]=x;

  return TRUE;

}

datatype dequeue(Queue *q)                  /*出隊*/

{

  if(empty(q))

       return 0;

  q->front = ++q->front % (MAXSIZE*2);

  return q->data[q->front];

}

 

char token[MAXSIZE];

char* rwtab[6]={"begin","if","then","while","do","end"};

int syn;

Queue prog;

 

BOOL letter(char ch)                   /*判斷是否字母*/

{

  if(ch>='a'&&ch<='z' || ch>='A'&&ch<='Z')

       return TRUE;

  return FALSE;

}

BOOL digit(char ch)                         /*判斷是否數字*/

{

  if(ch>='0'&&ch<='9')

       return TRUE;

  return FALSE;

}

void saner()                                /*掃描器*/

{

  int i;

  char ch;

  for(i=0;i<50;i++)

       token[i]=0;

  i=0;

  do                                        /*去除多余空格、換行及制表符*/

  {

       ch=dequeue(&prog);

  }while(ch==' ' || ch=='\n' || ch=='\t');

  if(letter(ch))                            /*識別標識符(編號10)*/

  {

       while(1)

       {

           token[i++]=ch;

           ch=front(&prog);

           if(letter(ch) || digit(ch))

                dequeue(&prog);

           else

                break;

       }

       token[i]='\0';

       syn=10;

       for(i=0;i<6;i++)

           if(!strcmp(token,rwtab[i]))

                syn=i+1;                    /*識別關鍵字(編號1到6)*/

  }

  else if(digit(ch))                   /*識別無符號整數(編號11)*/

  {

       while(1)

       {

           token[i++]=ch;

           ch=front(&prog);

           if(digit(ch))

                dequeue(&prog);

           else

                break;

       }

       token[i]='\0';

       syn=11;

  }

  else

       switch(ch)

       {

       case '#':                        /*識別結束符‘#’(編號0)*/

           syn=0;

           token[i++]='#';

           token[i]='\0';

           break;

       case '+':                        /*識別‘+’(編號13)*/

           syn=13;

           token[i++]='+';

           token[i]='\0';

           break;

       case '-':                        /*識別‘-’(編號14)*/

           syn=14;

           token[i++]='-';

           token[i]='\0';

           break;

       case '*':                        /*識別‘*’(編號15)*/

           syn=15;

           token[i++]='*';

           token[i]='\0';

           break;

       case '/':                        /*識別‘/’(編號16)*/

           syn=16;

           token[i++]='/';

           token[i]='\0';

           break;

       case ':':

           token[i++]=':';

           ch=front(&prog);

           switch(ch)

           {

           case '=':                   /*識別‘:=’(編號18)*/

                syn=18;

                token[i++]='=';

                token[i]='\0';

                dequeue(&prog);

                break;

           default:                    /*識別‘:’(編號17)*/

                syn=17;

                token[i]='\0';

                break;

           }

           break;

       case '<':

           token[i++]='<';

           ch=front(&prog);

           switch(ch)

           {

           case '>':                   /*識別‘<>’(編號21)*/

                syn=21;

                token[i++]='>';

                token[i]='\0';

                dequeue(&prog);

                break;

           case '=':                   /*識別‘<=’(編號22)*/

                syn=22;

                token[i++]='=';

                token[i]='\0';

                dequeue(&prog);

                break;

           default:                    /*識別‘<’(編號20)*/

                syn=20;

                token[i]='\0';

                break;

           }

           break;

       case '>':

           token[i++]='>';

           ch=front(&prog);

           switch(ch)

           {

           case '=':                   /*識別‘>=’(編號24)*/

                syn=24;

                token[i++]='=';

                token[i]='\0';

                dequeue(&prog);

                break;

           default:                    /*識別‘>’(編號23)*/

                syn=23;

                token[i]='\0';

                break;

           }

           break;

       case '=':                        /*識別‘=’(編號25)*/

           syn=25;

           token[i++]='=';

           token[i]='\0';

           break;

       case ';':                        /*識別‘;’(編號26)*/

           syn=26;

           token[i++]=';';

           token[i]='\0';

           break;

       case '(':                        /*識別‘(’(編號27)*/

           syn=27;

           token[i++]='(';

           token[i]='\0';

           break;

       case ')':                        /*識別‘)’(編號28)*/

           syn=28;

           token[i++]=')';

           token[i]='\0';

           break;

       default:                         /*出錯!*/

           syn=-1;

           break;

       }

}

 

main(int argc,char* argv[])

{

  FILE *in,*out;

  int i;

  char ch;

  const char ofname[]="scaned.txt";

  setnull(&prog);                           /*緩沖隊列初始化*/

  switch(argc)

  {

  case 2:

       if(!(in=fopen(argv[1],"r")))

       {

           printf("The file is not exist!");

           exit(1);

       }

       out=fopen(ofname,"w");

       break;

  case 3:

       if(!(in=fopen(argv[1],"r")))

       {

           printf("The file is not exist!");

           exit(1);

       }

       out=fopen(argv[2],"w");

       break;

  }

  do

  {

       switch(argc)

       {

       case 1:

           do

           {

                ch=getchar();

                enqueue(&prog,ch);

           }while(ch!='#' && !full(&prog));

           if(!(out=fopen(ofname,"a")))

                out=fopen(ofname,"w");

           break;

       case 2:

           do

           {

                ch=fgetc(in);

                enqueue(&prog,ch);

           }while(ch!='#' && !full(&prog));         

           if(ch=='#')

                fclose(in);

           break;

       case 3:

           do

           {

                ch=fgetc(in);

                enqueue(&prog,ch);

           }while(ch!='#' && !full(&prog));         

           if(ch=='#')

                fclose(in);

           break;

       default:

           printf("Input error!!");

           break;

       }

       do

       {

           saner();

           switch(syn)

           {

           case 0:

                fputc('#',out);

                fputc(',',out);

                fputc('0',out);

                fputc('\n',out);

                break;

           case -1:

                fprintf(out,"Error!\n");

                break;

           default:

                i=0;

                do

                {

                     fputc(token[i++],out);

                }while(token[i]!='\0');

                fputc(',',out);

                i=syn/10;

                if(i!=0)

                     fputc(i+48,out);

                fputc(syn%10+48,out);

                fputc('\n',out);

                break;

           }

       }while(syn!=0 && (quantity(&prog) > MAXSIZE || ch=='#'));

  }while(ch!='#');

  fclose(out);

}

 

 
二、  語法分析器

語法分析器的任務是根據詞法分析的結果判斷是否符合文法規則,並以一定形式輸出語法樹。(這里按逆波蘭式輸出。其中符號“!”代表條件語句運算符;符號“@”代表循環語句運算符)

代碼如下:

 

#include<stdio.h>

#include<stdlib.h>

#include<conio.h>

#define BOOL int

#define TRUE 1

#define FALSE 0

#define MAXSIZE 50

 

typedef struct

{

    int no;

    char str[MAXSIZE];

}Element;

 

Element ch;

FILE *in,*out;

 

void scan();      /* 掃描 */

void error(int error); /* 報錯 */

void P();         /* 程序 */         /* P → begin S end */

void S();         /* 語句串 */       /* S → SS { ; SS } */

void SS();        /* 語句 */         /* SS → S1 | S2 | S3 */

void S1();        /* 賦值語句 */     /* S1 → V := E */

void S2();        /* 條件語句 */     /* S2 → if SS2 then ( SS | P ) */

void S3();        /* 循環語句 */     /* S3 → while SS2 do ( SS | P ) */

void E();         /* 表達式 */       /* E → T { + T | - T } */

void SS2();       /* 條件 */         /* SS2 → E R E */

void R();         /* 關系符 */       /* R → < | <> | <= | > | >= | = */

void T();         /* 項 */           /* T → F { * F | / F } */

void F();         /* 因子 */         /* F → V | N | ( E ) */

void V();         /* 變量 */         /* V → W */

void W();         /* 標識符 */

void N();         /* 數字 */

 

void scan()

{

    char buffer;

    int i=0;

    int temp=0;

    do

    {

         buffer=fgetc(in);

         ch.str[i++]=buffer;

    }while(buffer!=',');

    while(TRUE)

    {

         buffer=fgetc(in);

         if(buffer!='\n')

         {

             temp=10*temp+buffer-48;

         }

         else

             break;

    }

    ch.no = temp;

    ch.str[--i]='\0';

}

 

void error(int n)

{

    switch(n)

    {

    case 1:

         printf("標識符begin錯誤!");

         break;

    case 2:

         printf("標識符if錯誤!");

         break;

    case 3:

         printf("標識符then錯誤!");

         break;

    case 4:

         printf("標識符while錯誤!");

         break;

    case 5:

         printf("標識符do錯誤!");

         break;

    case 6:

         printf("標識符end錯誤!");

         break;

    case 7:

         printf("不是有效的句子!");

         break;

    case 8:

         printf("判斷語句出錯!");

         break;

    case 9:

         printf("循環語句出錯!");

         break;

    case 10:

         printf("不是正確的標識符!");

         break;

    case 11:

         printf("條件表達式錯誤!");

         break;

    case 12:

         printf("算術表達式錯誤!");

         break;

    case 18:

         printf("賦值語句錯誤!");

         break;

    case 28:

         printf("缺少')'!");

         break;

    default:

         printf("Compile failed!");

         break;

    }

    getch();

    exit(1);

}

 

void P()

{

    if(ch.no==1)

    {

         scan();

         S();

         if(ch.no==6)

             scan();

         else

             error(6);

    }

    else

         error(1);

}

 

void S()

{

    SS();

    while(ch.no==26)

    {

         scan();

         SS();

    }

}

 

void SS()

{

    switch(ch.no)

    {

    case 10:

         S1();

         break;

    case 2:

         S2();

         break;

    case 4:

         S3();

         break;

    default:

         error(7);

         break;

    }

}

 

void S1()

{

    V();

    if(ch.no==18)

    {

         scan();

         E();

         fputc(':',out);

         fputc('=',out);

         fputc(' ',out);

    }

    else

         error(18);

}

 

void S2()

{

    if(ch.no==2)

    {

         scan();

         SS2();

         if(ch.no==3)

         {

             scan();

             switch(ch.no)

             {

             case 10:

             case 2:

             case 4:

                  SS();

                  break;

             case 1:

                  P();

                  break;

             default:

                  error(8);

                  break;

             }

             fputc('!',out);

             fputc(' ',out);

         }

         else

             error(3);

    }

    else

         error(2);

}

 

void S3()

{

    if(ch.no==4)

    {

         scan();

         SS2();

         if(ch.no==5)

         {

             scan();

             switch(ch.no)

             {

             case 10:

             case 2:

             case 4:

                  SS();

                  break;

             case 1:

                  P();

                  break;

             default:

                  error(9);

                  break;

             }

             fputc('@',out);

             fputc(' ',out);

         }

         else

             error(5);

    }

    else

         error(4);

}

 

void V()

{

    W();

}

 

void E()

{

    T();

    while(ch.no==13)

    {

         scan();

         T();

         fputc('+',out);

         fputc(' ',out);

    }

    while(ch.no==14)

    {

         scan();

         T();

         fputc('-',out);

         fputc(' ',out);

    }

}

 

void SS2()

{

    int temp;

    E();

    temp=ch.no;

    R();

    E();

    switch(temp)

    {

    case 20:

         fputc('<',out);

         fputc(' ',out);

         break;

    case 21:

         fputc('<',out);

         fputc('>',out);

         fputc(' ',out);

         break;

    case 22:

         fputc('<',out);

         fputc('=',out);

         fputc(' ',out);

         break;

    case 23:

         fputc('>',out);

         fputc(' ',out);

         break;

    case 24:

         fputc('>',out);

         fputc('=',out);

         fputc(' ',out);

         break;

    case 25:

         fputc('=',out);

         fputc(' ',out);

         break;

    }

}

 

void R()

{

    switch(ch.no)

    {

    case 20:

         scan();

         break;

    case 21:

         scan();

         break;

    case 22:

         scan();

         break;

    case 23:

         scan();

         break;

    case 24:

         scan();

         break;

    case 25:

         scan();

         break;

    default:

         error(11);

         break;

    }

}

 

void W()

{

    int i=0;

    if(ch.no==10)

    {

         while(ch.str[i]!='\0')

             fputc(ch.str[i++],out);

         fputc(' ',out);

         scan();

    }

    else

         error(10);

}

 

void T()

{

    F();

    while(ch.no==15)

    {

         scan();

         F();

         fputc('*',out);

         fputc(' ',out);

    }

    while(ch.no==16)

    {

         scan();

         F();

         fputc('/',out);

         fputc(' ',out);

    }

}

 

void F()

{

    switch(ch.no)

    {

    case 10:

         V();

         break;

    case 11:

         N();

         break;

    case 27:

         scan();

         E();

         if(ch.no==28)

             scan();

         else

             error(28);

         break;

    default:

         error(12);

         break;

    }

}

 

void N()

{

    int i=0;

    if(ch.no==11)

    {

         while(ch.str[i]!='\0')

             fputc(ch.str[i++],out);

         fputc(' ',out);

         scan();

    }

    else

         error(12);

}

 

void main()

{

    const char* input="scaned.txt";

    const char* output="compiled.txt";

    if(!(in=fopen(input,"r")))

         error(-1);

    out=fopen(output,"w");

    scan();

    P();

    if(ch.no==0)

    {

         printf("Success!");

         fputc('#',out);

    }

    else

         error(-1);

    fclose(in);

    fclose(out);

    getch();

}

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM