小女也愛葵花寶典---讀懂編譯原理之詞法分析(2)


      上一編文章中的詞法分析沒有寫全,還不能識別關鍵字,小女繼續添加識別關鍵字的功能.

void toker(char* cinput ,scrWord *wordTable)

{// cinput輸入的單詞, wordTable單詞表之后講解

    int Wordlen=strlen(cinput);//得到輸入串的長度

    scrWord *lpWordTable=wordTable;

    char* lexemBegin=cinput;//串的開始指針

    char* lexemEnd=cinput; //串的結束指針

    bool isNewWord=false;//是否是新詞單

    bool isBreak=true;//是否是空格

    int count=0;//記錄一共有幾個單

    while(*lexemEnd!=';')

    {

        if((*(lexemEnd-1))!=' ' &&  (*lexemEnd==' ' || *lexemEnd=='(' || *lexemEnd==')'))//這里之后會改進

            isNewWord=true;//

        else

            isNewWord=false;

        if((*lexemEnd==' ' || *lexemEnd=='(' || *lexemEnd==')') && isNewWord==true)

        {

            scrWord *wordTable2=new scrWord();//單詞表

            memset(wordTable2->data,0,100);//內存初始化

            cpystr(wordTable2->data,lexemBegin,lexemEnd-lexemBegin);//單詞表

            lpWordTable->next=wordTable2;//用鏈表生成的單詞表

            lpWordTable->len=lexemEnd-lexemBegin;// 單詞長度

            lpWordTable=lpWordTable->next;

            lexemBegin=lexemEnd;
    //新添加的代碼
             if(cmptostr("create",lexemBegin,lexemEnd-1-lexemBegin))
             {
 //識別后關鍵字create后添加到單詞表,這種識別關鍵字的方法是小女自己想出
//來的,這種寫法不太好,多個子關鍵字就要加多個if
             }
    //新添加的代碼
        }

        lexemEnd++;

        if(lexemEnd-lexemBegin>Wordlen)

        {

            return;

        }

    }

}

以上代碼添加了關鍵字create識別,這種方法不好,下面把代碼改進成龍書中說的:狀態圖識別

這張狀態圖包今了sql語法中以T打頭的關鍵字:then,table,temp,to ,transaction這5個關鍵字

if(cmptostr("create",lexemBegin,lexemEnd-1-lexemBegin))
             {
 //識別后關鍵字create后添加到單詞表,這種識別關鍵字的方法是小女自己想出
//來的,這種寫法不太好,多個子關鍵字就要加多個if
             }

 

這段代碼替換成:

 int k=-1;
            switch(tolower(*lexemBegin))
            {//大家可以自己完成C打頭的關鍵字
                case 't':
//實現T打頭的關鍵字的識別,真返回1,假返回-1.
                    k=GetKeyThen(lexemBegin,lexemEnd,360);
                    break;
            }
GetKeyThen函數實現如下:
int GetKeyThen(char* lexemBegin,char* lexemEnd,int state)
{//代碼要結合上圖來看
    char *lexemBegin1=lexemBegin;//單詞的開始
    char *lexemEnd1=lexemEnd; //單詞的結束
    int j=lexemEnd1-lexemBegin1+1; //單詞的長度
    for(int i=0;i<j;i++)
    {
        char* nextstr=lexemBegin1+1;//取下一個單詞
        switch(state)
        {
            case 360:// 關鍵字的識別碼自己定
                if(tolower(*nextstr)=='h')
                    state=7;
                else if (tolower(*nextstr)=='a')
                    state=3;
                else if (tolower(*nextstr)=='e')
                    state=4;
                else if (tolower(*nextstr)=='o')
                    state=5;
                else if (tolower(*nextstr)=='r')
                    state=6;
                else
                    return -1;
                break;
            case 7:
                if(tolower(*nextstr)=='e')
                    state=12;
                break;
            case 12:
                if(tolower(*nextstr)=='n')
                    state=13;
                break;
            case 13:
                    return 1;
                break;
            case 3:
                if(tolower(*nextstr)=='b')
                    state=8;
                break;
            case 8:
                if(tolower(*nextstr)=='l')
                    state=10;
                break;
            case 10:
                if(tolower(*nextstr)=='e')
                    state=11;
                break;
            case 11:
                return 1;
                break;
            case 4:
                if(tolower(*nextstr)=='m')
                    state=9;
                break;
            case 9:
                if(tolower(*nextstr)=='p')
                    state=14;
                break;
            case 14:
                return 1;
                break;
            case 5:
                 return 1;
                 break;
            case 6:
                if(tolower(*nextstr)=='a')
                    state=15;
                break;
            case 15:
                if(tolower(*nextstr)=='n')
                    state=16;
                break;
            case 16:
                if(tolower(*nextstr)=='s')
                    state=17;
                break;
            case 17:
                if(tolower(*nextstr)=='a')
                    state=18;
                break;
            case 18:
                if(tolower(*nextstr)=='c')
                    state=19;
                break;
            case 19:
                if(tolower(*nextstr)=='t')
                    state=20;
                break;
            case 20:
                if(tolower(*nextstr)=='i')
                    state=21;
                break;
            case 21:
                if(tolower(*nextstr)=='o')
                    state=22;
                break;
            case 22:
                if(tolower(*nextstr)=='n')
                    state=23;
                break;
            case 23:
                return 1;
                break;
        }
        lexemBegin1++;
    }
    return -1;//沒識別到返回-1
}

以上代碼改進了詞法分析的功能,在下一編中小女將講語法分析,並把詞法分析完善.

下面,給出完整的代碼.

#include "stdafx.h"
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
int GetKeyThen(char* lexemBegin,char* lexemEnd,int state);
typedef struct scrWord
{
    char data[100];//詞素
    int len;
    scrWord* next;
    scrWord* Property;//屬性
}scrWord;
char* opSetStr(const char* str)
{
    printf("sdfsdf");
    return "sdf";
}
void cpystr(char* des ,char* scr,int len)
{
    for(int i=0;i<len;i++)
    {
        *des=*scr;
        des++;
        scr++;
    }
}
bool cmptostr(char* scr ,char* lexemBegin ,int len)
{
    for(int i=0;i<len;i++)
    {
        if(tolower(*scr)!=tolower(*lexemBegin))
            return false;
        lexemBegin++;
        scr++;
    }
    return true;
}
void toker(char* cinput ,scrWord *wordTable)
{
    int Wordlen=strlen(cinput);
    scrWord *lpWordTable=wordTable;
    char* lexemBegin=cinput;
    char* lexemEnd=cinput;
    char* forward=cinput;
    bool isNewWord=true;
    bool isBreak=true;
    int count=0;
    while(*lexemEnd!=';')
    {
        if((*(lexemEnd+1))!=' '  &&  (*lexemEnd==' ' || *lexemEnd=='(' || *lexemEnd==')'))
            isNewWord=true;
        else
            isNewWord=false;
        if((*lexemEnd==' ' || *lexemEnd=='(' || *lexemEnd==')') && isNewWord==true)
        {
            scrWord *wordTable2=new scrWord();
            memset(wordTable2->data,0,100);
            cpystr(wordTable2->data,lexemBegin,lexemEnd-lexemBegin);//單詞表
            lpWordTable->next=wordTable2;
            lpWordTable->len=lexemEnd-lexemBegin;
            lpWordTable=lpWordTable->next;
            int k=-1;
            switch(tolower(*lexemBegin))
            {
                case 't':
                    k=GetKeyThen(lexemBegin,lexemEnd,360);
                    break;
            }
            if(k==-1)
            {
                //詞素為關鍵字則屬性為空
                lpWordTable->Property=NULL;
            }
            lexemBegin=lexemEnd+1;
        }
        lexemEnd++;
        if(lexemEnd-lexemBegin>Wordlen)
        {
            return;
        }
    }
}
int GetKeyThen(char* lexemBegin,char* lexemEnd,int state)
{
    char *lexemBegin1=lexemBegin;
    char *lexemEnd1=lexemEnd;
    int j=lexemEnd1-lexemBegin1+1;
    for(int i=0;i<j;i++)
    {
        char* nextstr=lexemBegin1+1;
        switch(state)
        {
            case 360:
                if(tolower(*nextstr)=='h')
                    state=7;
                else if (tolower(*nextstr)=='a')
                    state=3;
                else if (tolower(*nextstr)=='e')
                    state=4;
                else if (tolower(*nextstr)=='o')
                    state=5;
                else if (tolower(*nextstr)=='r')
                    state=6;
                else
                    return -1;
                break;
            case 7:
                if(tolower(*nextstr)=='e')
                    state=12;
                break;
            case 12:
                if(tolower(*nextstr)=='n')
                    state=13;
                break;
            case 13:
                    return 1;
                break;
            case 3:
                if(tolower(*nextstr)=='b')
                    state=8;
                break;
            case 8:
                if(tolower(*nextstr)=='l')
                    state=10;
                break;
            case 10:
                if(tolower(*nextstr)=='e')
                    state=11;
                break;
            case 11:
                return 1;
                break;
            case 4:
                if(tolower(*nextstr)=='m')
                    state=9;
                break;
            case 9:
                if(tolower(*nextstr)=='p')
                    state=14;
                break;
            case 14:
                return 1;
                break;
            case 5:
                 return 1;
                 break;
            case 6:
                if(tolower(*nextstr)=='a')
                    state=15;
                break;
            case 15:
                if(tolower(*nextstr)=='n')
                    state=16;
                break;
            case 16:
                if(tolower(*nextstr)=='s')
                    state=17;
                break;
            case 17:
                if(tolower(*nextstr)=='a')
                    state=18;
                break;
            case 18:
                if(tolower(*nextstr)=='c')
                    state=19;
                break;
            case 19:
                if(tolower(*nextstr)=='t')
                    state=20;
                break;
            case 20:
                if(tolower(*nextstr)=='i')
                    state=21;
                break;
            case 21:
                if(tolower(*nextstr)=='o')
                    state=22;
                break;
            case 22:
                if(tolower(*nextstr)=='n')
                    state=23;
                break;
            case 23:
                return 1;
                break;
        }
        lexemBegin1++;
    }
    return -1;
}
int main(int argc, char* argv[])
{
    scrWord *wordTable=new scrWord();
    char * str="create table  then temp ffek transaction to  thne hts(fe int);";
    toker(str,wordTable);
    return 0;
}

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM