上一編文章中的詞法分析沒有寫全,還不能識別關鍵字,小女繼續添加識別關鍵字的功能.
void toker(char* cinput ,scrWord *wordTable) {// cinput輸入的單詞, wordTable單詞表之后講解 int Wordlen=strlen(cinput);//得到輸入串的長度 scrWord *lpWordTable=wordTable; char* lexemBegin=cinput;//串的開始指針 char* lexemEnd=cinput; //串的結束指針 bool isNewWord=false;//是否是新詞單 bool isBreak=true;//是否是空格 int count=0;//記錄一共有幾個單 while(*lexemEnd!=';') { if((*(lexemEnd-1))!=' ' && (*lexemEnd==' ' || *lexemEnd=='(' || *lexemEnd==')'))//這里之后會改進 isNewWord=true;// else isNewWord=false; if((*lexemEnd==' ' || *lexemEnd=='(' || *lexemEnd==')') && isNewWord==true) { scrWord *wordTable2=new scrWord();//單詞表 memset(wordTable2->data,0,100);//內存初始化 cpystr(wordTable2->data,lexemBegin,lexemEnd-lexemBegin);//單詞表 lpWordTable->next=wordTable2;//用鏈表生成的單詞表 lpWordTable->len=lexemEnd-lexemBegin;// 單詞長度 lpWordTable=lpWordTable->next; lexemBegin=lexemEnd; //新添加的代碼 if(cmptostr("create",lexemBegin,lexemEnd-1-lexemBegin)) { //識別后關鍵字create后添加到單詞表,這種識別關鍵字的方法是小女自己想出 //來的,這種寫法不太好,多個子關鍵字就要加多個if } //新添加的代碼 } lexemEnd++; if(lexemEnd-lexemBegin>Wordlen) { return; } } }
以上代碼添加了關鍵字create識別,這種方法不好,下面把代碼改進成龍書中說的:狀態圖識別
這張狀態圖包今了sql語法中以T打頭的關鍵字:then,table,temp,to ,transaction這5個關鍵字
if(cmptostr("create",lexemBegin,lexemEnd-1-lexemBegin)) { //識別后關鍵字create后添加到單詞表,這種識別關鍵字的方法是小女自己想出 //來的,這種寫法不太好,多個子關鍵字就要加多個if }
這段代碼替換成:
int k=-1; switch(tolower(*lexemBegin)) {//大家可以自己完成C打頭的關鍵字 case 't': //實現T打頭的關鍵字的識別,真返回1,假返回-1. k=GetKeyThen(lexemBegin,lexemEnd,360); break; }
GetKeyThen函數實現如下: int GetKeyThen(char* lexemBegin,char* lexemEnd,int state) {//代碼要結合上圖來看 char *lexemBegin1=lexemBegin;//單詞的開始 char *lexemEnd1=lexemEnd; //單詞的結束 int j=lexemEnd1-lexemBegin1+1; //單詞的長度 for(int i=0;i<j;i++) { char* nextstr=lexemBegin1+1;//取下一個單詞 switch(state) { case 360:// 關鍵字的識別碼自己定 if(tolower(*nextstr)=='h') state=7; else if (tolower(*nextstr)=='a') state=3; else if (tolower(*nextstr)=='e') state=4; else if (tolower(*nextstr)=='o') state=5; else if (tolower(*nextstr)=='r') state=6; else return -1; break; case 7: if(tolower(*nextstr)=='e') state=12; break; case 12: if(tolower(*nextstr)=='n') state=13; break; case 13: return 1; break; case 3: if(tolower(*nextstr)=='b') state=8; break; case 8: if(tolower(*nextstr)=='l') state=10; break; case 10: if(tolower(*nextstr)=='e') state=11; break; case 11: return 1; break; case 4: if(tolower(*nextstr)=='m') state=9; break; case 9: if(tolower(*nextstr)=='p') state=14; break; case 14: return 1; break; case 5: return 1; break; case 6: if(tolower(*nextstr)=='a') state=15; break; case 15: if(tolower(*nextstr)=='n') state=16; break; case 16: if(tolower(*nextstr)=='s') state=17; break; case 17: if(tolower(*nextstr)=='a') state=18; break; case 18: if(tolower(*nextstr)=='c') state=19; break; case 19: if(tolower(*nextstr)=='t') state=20; break; case 20: if(tolower(*nextstr)=='i') state=21; break; case 21: if(tolower(*nextstr)=='o') state=22; break; case 22: if(tolower(*nextstr)=='n') state=23; break; case 23: return 1; break; } lexemBegin1++; } return -1;//沒識別到返回-1 }
以上代碼改進了詞法分析的功能,在下一編中小女將講語法分析,並把詞法分析完善.
下面,給出完整的代碼.
#include "stdafx.h" #include<stdio.h> #include<string.h> #include<stdlib.h> int GetKeyThen(char* lexemBegin,char* lexemEnd,int state); typedef struct scrWord { char data[100];//詞素 int len; scrWord* next; scrWord* Property;//屬性 }scrWord; char* opSetStr(const char* str) { printf("sdfsdf"); return "sdf"; } void cpystr(char* des ,char* scr,int len) { for(int i=0;i<len;i++) { *des=*scr; des++; scr++; } } bool cmptostr(char* scr ,char* lexemBegin ,int len) { for(int i=0;i<len;i++) { if(tolower(*scr)!=tolower(*lexemBegin)) return false; lexemBegin++; scr++; } return true; } void toker(char* cinput ,scrWord *wordTable) { int Wordlen=strlen(cinput); scrWord *lpWordTable=wordTable; char* lexemBegin=cinput; char* lexemEnd=cinput; char* forward=cinput; bool isNewWord=true; bool isBreak=true; int count=0; while(*lexemEnd!=';') { if((*(lexemEnd+1))!=' ' && (*lexemEnd==' ' || *lexemEnd=='(' || *lexemEnd==')')) isNewWord=true; else isNewWord=false; if((*lexemEnd==' ' || *lexemEnd=='(' || *lexemEnd==')') && isNewWord==true) { scrWord *wordTable2=new scrWord(); memset(wordTable2->data,0,100); cpystr(wordTable2->data,lexemBegin,lexemEnd-lexemBegin);//單詞表 lpWordTable->next=wordTable2; lpWordTable->len=lexemEnd-lexemBegin; lpWordTable=lpWordTable->next; int k=-1; switch(tolower(*lexemBegin)) { case 't': k=GetKeyThen(lexemBegin,lexemEnd,360); break; } if(k==-1) { //詞素為關鍵字則屬性為空 lpWordTable->Property=NULL; } lexemBegin=lexemEnd+1; } lexemEnd++; if(lexemEnd-lexemBegin>Wordlen) { return; } } } int GetKeyThen(char* lexemBegin,char* lexemEnd,int state) { char *lexemBegin1=lexemBegin; char *lexemEnd1=lexemEnd; int j=lexemEnd1-lexemBegin1+1; for(int i=0;i<j;i++) { char* nextstr=lexemBegin1+1; switch(state) { case 360: if(tolower(*nextstr)=='h') state=7; else if (tolower(*nextstr)=='a') state=3; else if (tolower(*nextstr)=='e') state=4; else if (tolower(*nextstr)=='o') state=5; else if (tolower(*nextstr)=='r') state=6; else return -1; break; case 7: if(tolower(*nextstr)=='e') state=12; break; case 12: if(tolower(*nextstr)=='n') state=13; break; case 13: return 1; break; case 3: if(tolower(*nextstr)=='b') state=8; break; case 8: if(tolower(*nextstr)=='l') state=10; break; case 10: if(tolower(*nextstr)=='e') state=11; break; case 11: return 1; break; case 4: if(tolower(*nextstr)=='m') state=9; break; case 9: if(tolower(*nextstr)=='p') state=14; break; case 14: return 1; break; case 5: return 1; break; case 6: if(tolower(*nextstr)=='a') state=15; break; case 15: if(tolower(*nextstr)=='n') state=16; break; case 16: if(tolower(*nextstr)=='s') state=17; break; case 17: if(tolower(*nextstr)=='a') state=18; break; case 18: if(tolower(*nextstr)=='c') state=19; break; case 19: if(tolower(*nextstr)=='t') state=20; break; case 20: if(tolower(*nextstr)=='i') state=21; break; case 21: if(tolower(*nextstr)=='o') state=22; break; case 22: if(tolower(*nextstr)=='n') state=23; break; case 23: return 1; break; } lexemBegin1++; } return -1; } int main(int argc, char* argv[]) { scrWord *wordTable=new scrWord(); char * str="create table then temp ffek transaction to thne hts(fe int);"; toker(str,wordTable); return 0; }