初步實現了一個接口:
1 #include<stdio.h> 2 #include <stdlib.h> 3 #include <string.h> 4 #include "lyPublic/lyCodeConvert.h" 5 #define X_LONGSEN 500 6 #define Y_LONGWORD 100 7 struct node 8 { 9 char MWord[Y_LONGWORD]; //對應的最高權權值 10 int order; //權值、並作為標記是否有詞 11 struct node *next[16]; 12 }; 13 14 typedef struct node node; 15 char z_Str[Y_LONGWORD]; 16 17 void insertTree(char *str, node *T, char *MaxWord, int num_max); 18 void findStr(char *str, node *T); 19 int findNum(char *str, node *T); 20 int SentenceTransform(char *FromWord,char *Tostr); 21 22 /* 23 函數功能: 24 將一句話里面的部分詞 轉化為 權值最高的詞 25 變量說明: 26 FromWord 原句子 27 Tostr 轉化后的句子 28 */ 29 30 int SentenceTransform(char *FromWord,char *Tostr) 31 { 32 //FILE *fp; 33 FILE *fq; 34 char GetSentence[X_LONGSEN] = ""; // 讀取權值文檔的句子 35 int lenGetSen, leWord, leSen; 36 char GetWord[Y_LONGWORD] = "" ; 37 char ToWord[Y_LONGWORD] = ""; 38 char strhan[Y_LONGWORD] = ""; 39 char MaxWord[Y_LONGWORD] = ""; //每一句的權值最高詞 40 char hanMax[Y_LONGWORD] = "" ; 41 node *T; 42 int i, j, k, len, s, num, max_num, f; 43 int from, to, at; 44 int num_max; 45 int lenTostr = 0 , lenZ_Str; 46 47 T = (node *)malloc(sizeof(node)); 48 //初始化節點 49 memset (T->MWord, 0 ,sizeof(T->MWord)); 50 T->order = -1; 51 for(i = 0; i < 16; i++) 52 T->next[i] = NULL; 53 54 55 /* 56 打開權值文檔,文檔格式: 57 平凡28&平淡--62 平庸--5 平凡--82 平常--38] 58 平常83&平常--38 尋常--31] 59 貧困24&困頓--0 貧困--42 窘迫--4] 60 貧困24&貧困--42 貧寒--0 清貧--31 貧窮--7 窮苦--1] 61 說明: 62 第一個是一句話權值最高的詞,緊跟着的就是權值 63 & 是分隔符 64 后面的是 近義詞 的詞和氣權值大小 65 */ 66 67 68 69 // 讀取權值文檔,建立字典樹 70 fq = fopen ("1.txt","r++"); 71 // fp = fopen ("jieguo.txt","w+r"); 72 while (fgets (GetSentence, 500, fq) != NULL) //讀取權值文檔,建立各個詞對應的最高權值 73 { 74 lenGetSen = strlen(GetSentence); 75 leSen = 0; 76 memset (MaxWord, 0, sizeof(MaxWord)); 77 leWord = 0; 78 leWord = 0; 79 while(GetSentence[leSen] != '&' && (GetSentence[leSen] < '0' || GetSentence[leSen] >'9')) 80 MaxWord[leWord++] = GetSentence[leSen++]; 81 //取最高權詞的權值 82 num_max = 0; 83 while(GetSentence[leSen] >= '0' && GetSentence[leSen] <= '9') 84 num_max = num_max*10 + GetSentence[leSen++] - '0'; 85 leSen++; 86 while (GetSentence[leSen] != ']' && leSen < lenGetSen) 87 { 88 memset (GetWord, 0, sizeof(GetWord)); 89 memset (ToWord, 0, sizeof(ToWord)); 90 leWord = 0; 91 while (GetSentence[leSen] != '-') 92 { 93 GetWord[leWord++] = GetSentence[leSen++]; 94 } 95 HanziToAnsi (GetWord, strlen(GetWord), ToWord, sizeof(ToWord)); 96 insertTree (ToWord, T, MaxWord, num_max); 97 while (GetSentence[leSen] == ' ' || ( GetSentence[leSen] >='0' && GetSentence[leSen] <= '9') || GetSentence[leSen] == '-') 98 leSen++; 99 } 100 } 101 102 103 //轉化句子 104 len = strlen(FromWord); 105 at = 0; 106 for (i = 0; i < len;) 107 { 108 max_num = -1; 109 memset(strhan, 0, sizeof(strhan)); 110 for (j = i; j <= len; j+=2) 111 { 112 memset (GetWord, 0, sizeof(GetWord)); 113 s = 0; 114 num = -10; 115 //記錄漢字 116 for (k = i; k < j; k++) 117 strhan[s++] = FromWord [k]; 118 119 //轉碼 120 HanziToAnsi(strhan, strlen(strhan), GetWord, sizeof(GetWord)); 121 122 if(strlen(strhan) != 0) 123 num = findNum(GetWord, T); 124 else 125 continue; 126 if (num > max_num) 127 { 128 max_num = num; 129 memset(hanMax, 0, sizeof(hanMax)); 130 strcpy(hanMax, strhan); 131 from = i; 132 to =j; 133 } 134 } 135 if(max_num != -1) 136 { 137 while(at < from) 138 { 139 //fputc(FromWord [at], fp); 140 Tostr[lenTostr++] = FromWord[at++]; 141 } 142 memset (GetWord, 0, sizeof(GetWord)); 143 memset(z_Str, 0, sizeof(z_Str)); 144 HanziToAnsi(hanMax, strlen(hanMax), GetWord, sizeof(GetWord)); 145 findStr(GetWord, T); 146 lenZ_Str = strlen(z_Str); 147 for(f = 0; f < lenZ_Str ;f++) 148 Tostr[lenTostr++] = z_Str[f]; 149 at = to; 150 i = to; 151 } 152 else 153 { 154 for(j = at; j < at+2; j++) 155 Tostr[lenTostr++] = FromWord[j]; 156 // fputc(FromWord[j], fp); 157 at += 2; 158 i += 2; 159 } 160 } 161 return 0; 162 } 163 164 void insertTree(char *str, node *T, char *MaxWord, int num_max) 165 { 166 int len, i, j, flag=0, id; 167 node *p, *q; 168 p = T; 169 len = strlen(str); 170 for (i = 0; i < len; i++) 171 { 172 if(str[i]>= 'a' && str[i] <= 'f')//當時abcdef時 轉化為數字 173 id = str[i]- 'a' + 10; 174 else 175 id = str[i] - '0'; 176 if( p ->next[id] == NULL)//擴展節點 177 { 178 flag = 1; 179 q = (node *)malloc(sizeof(node)); 180 memset(q->MWord, 0, sizeof(q->MWord)); 181 q->order = -1; 182 for(j = 0;j < 16 ;j++) 183 q ->next[j] = NULL; 184 p->next[id] = q; 185 } 186 p = p->next[id]; 187 } 188 if(flag) 189 { 190 strcpy(p->MWord, MaxWord); 191 p->order = num_max; 192 } 193 else 194 { 195 if( p -> order == -1) 196 { 197 strcpy(p->MWord, MaxWord); 198 p->order = num_max ; 199 } 200 } 201 } 202 203 void findStr(char *str, node *T) 204 { 205 int len , i, id; 206 node *p; 207 p = T; 208 len = strlen(str); 209 for (i=0; i< len ; ++i) 210 { 211 if(str[i]>= 'a' && str[i] <= 'f') 212 id = str[i]- 'a' + 10; 213 else 214 id = str[i] - '0'; 215 if(p->next[id] == NULL) 216 return; 217 p = p->next[id]; 218 } 219 strcpy(z_Str, p->MWord); 220 } 221 222 223 int findNum(char *str, node *T) 224 { 225 int len, i, id; 226 node *p; 227 p = T; 228 len = strlen(str); 229 for(i = 0; i < len; i++) 230 { 231 if(str[i]>= 'a' && str[i] <= 'f') 232 id = str[i]- 'a' + 10; 233 else 234 id = str[i] - '0'; 235 if(p->next[id] == NULL) 236 return -1; 237 p = p->next[id]; 238 } 239 return p->order; 240 }
1 #include<stdio.h> 2 #include <stdlib.h> 3 #include <string.h> 4 #include "lyPublic/lyCodeConvert.h" 5 #define X_LONGSEN 500 6 #define Y_LONGWORD 100 7 struct node 8 { 9 char MWord[Y_LONGWORD]; //對應的最高權權值 10 int order; //權值、並作為標記是否有詞 11 struct node *next[16]; 12 }; 13 14 typedef struct node node; 15 char z_Str[Y_LONGWORD]; 16 17 void insertTree(char *str, node *T, char *MaxWord, int num_max); 18 void findStr(char *str, node *T); 19 int findNum(char *str, node *T); 20 int SentenceTransform(char *FromWord,char *Tostr); 21 22 /* 23 函數功能: 24 將一句話里面的部分詞 轉化為 權值最高的詞 25 變量說明: 26 FromWord 原句子 27 Tostr 轉化后的句子 28 */ 29 30 int SentenceTransform(char *FromWord,char *Tostr) 31 { 32 //FILE *fp; 33 FILE *fq; 34 char GetSentence[X_LONGSEN] = ""; // 讀取權值文檔的句子 35 int lenGetSen, leWord, leSen; 36 char GetWord[Y_LONGWORD] = "" ; 37 char ToWord[Y_LONGWORD] = ""; 38 char strhan[Y_LONGWORD] = ""; 39 char MaxWord[Y_LONGWORD] = ""; //每一句的權值最高詞 40 char hanMax[Y_LONGWORD] = "" ; 41 node *T; 42 int i, j, k, len, s, num, max_num, f; 43 int from, to, at; 44 int num_max; 45 int lenTostr = 0 , lenZ_Str; 46 47 T = (node *)malloc(sizeof(node)); 48 //初始化節點 49 memset (T->MWord, 0 ,sizeof(T->MWord)); 50 T->order = -1; 51 for(i = 0; i < 16; i++) 52 T->next[i] = NULL; 53 54 55 /* 56 打開權值文檔,文檔格式: 57 平凡28&平淡--62 平庸--5 平凡--82 平常--38] 58 平常83&平常--38 尋常--31] 59 貧困24&困頓--0 貧困--42 窘迫--4] 60 貧困24&貧困--42 貧寒--0 清貧--31 貧窮--7 窮苦--1] 61 說明: 62 第一個是一句話權值最高的詞,緊跟着的就是權值 63 & 是分隔符 64 后面的是 近義詞 的詞和氣權值大小 65 */ 66 67 68 69 // 讀取權值文檔,建立字典樹 70 fq = fopen ("1.txt","r++"); 71 // fp = fopen ("jieguo.txt","w+r"); 72 while (fgets (GetSentence, 500, fq) != NULL) //讀取權值文檔,建立各個詞對應的最高權值 73 { 74 lenGetSen = strlen(GetSentence); 75 leSen = 0; 76 memset (MaxWord, 0, sizeof(MaxWord)); 77 leWord = 0; 78 leWord = 0; 79 while(GetSentence[leSen] != '&' && (GetSentence[leSen] < '0' || GetSentence[leSen] >'9')) 80 MaxWord[leWord++] = GetSentence[leSen++]; 81 //取最高權詞的權值 82 num_max = 0; 83 while(GetSentence[leSen] >= '0' && GetSentence[leSen] <= '9') 84 num_max = num_max*10 + GetSentence[leSen++] - '0'; 85 leSen++; 86 while (GetSentence[leSen] != ']' && leSen < lenGetSen) 87 { 88 memset (GetWord, 0, sizeof(GetWord)); 89 memset (ToWord, 0, sizeof(ToWord)); 90 leWord = 0; 91 while (GetSentence[leSen] != '-') 92 { 93 GetWord[leWord++] = GetSentence[leSen++]; 94 } 95 HanziToAnsi (GetWord, strlen(GetWord), ToWord, sizeof(ToWord)); 96 insertTree (ToWord, T, MaxWord, num_max); 97 while (GetSentence[leSen] == ' ' || ( GetSentence[leSen] >='0' && GetSentence[leSen] <= '9') || GetSentence[leSen] == '-') 98 leSen++; 99 } 100 } 101 102 103 //轉化句子 104 len = strlen(FromWord); 105 at = 0; 106 for (i = 0; i < len;) 107 { 108 max_num = -1; 109 memset(strhan, 0, sizeof(strhan)); 110 for (j = i; j <= len; j+=2) 111 { 112 memset (GetWord, 0, sizeof(GetWord)); 113 s = 0; 114 num = -10; 115 //記錄漢字 116 for (k = i; k < j; k++) 117 strhan[s++] = FromWord [k]; 118 119 //轉碼 120 HanziToAnsi(strhan, strlen(strhan), GetWord, sizeof(GetWord)); 121 122 if(strlen(strhan) != 0) 123 num = findNum(GetWord, T); 124 else 125 continue; 126 if (num > max_num) 127 { 128 max_num = num; 129 memset(hanMax, 0, sizeof(hanMax)); 130 strcpy(hanMax, strhan); 131 from = i; 132 to =j; 133 } 134 } 135 if(max_num != -1) 136 { 137 while(at < from) 138 { 139 //fputc(FromWord [at], fp); 140 Tostr[lenTostr++] = FromWord[at++]; 141 } 142 memset (GetWord, 0, sizeof(GetWord)); 143 memset(z_Str, 0, sizeof(z_Str)); 144 HanziToAnsi(hanMax, strlen(hanMax), GetWord, sizeof(GetWord)); 145 findStr(GetWord, T); 146 lenZ_Str = strlen(z_Str); 147 for(f = 0; f < lenZ_Str ;f++) 148 Tostr[lenTostr++] = z_Str[f]; 149 at = to; 150 i = to; 151 } 152 else 153 { 154 for(j = at; j < at+2; j++) 155 Tostr[lenTostr++] = FromWord[j]; 156 // fputc(FromWord[j], fp); 157 at += 2; 158 i += 2; 159 } 160 } 161 return 0; 162 } 163 164 void insertTree(char *str, node *T, char *MaxWord, int num_max) 165 { 166 int len, i, j, flag=0, id; 167 node *p, *q; 168 p = T; 169 len = strlen(str); 170 for (i = 0; i < len; i++) 171 { 172 if(str[i]>= 'a' && str[i] <= 'f')//當時abcdef時 轉化為數字 173 id = str[i]- 'a' + 10; 174 else 175 id = str[i] - '0'; 176 if( p ->next[id] == NULL)//擴展節點 177 { 178 flag = 1; 179 q = (node *)malloc(sizeof(node)); 180 memset(q->MWord, 0, sizeof(q->MWord)); 181 q->order = -1; 182 for(j = 0;j < 16 ;j++) 183 q ->next[j] = NULL; 184 p->next[id] = q; 185 } 186 p = p->next[id]; 187 } 188 if(flag) 189 { 190 strcpy(p->MWord, MaxWord); 191 p->order = num_max; 192 } 193 else 194 { 195 if( p -> order == -1) 196 { 197 strcpy(p->MWord, MaxWord); 198 p->order = num_max ; 199 } 200 } 201 } 202 203 void findStr(char *str, node *T) 204 { 205 int len , i, id; 206 node *p; 207 p = T; 208 len = strlen(str); 209 for (i=0; i< len ; ++i) 210 { 211 if(str[i]>= 'a' && str[i] <= 'f') 212 id = str[i]- 'a' + 10; 213 else 214 id = str[i] - '0'; 215 if(p->next[id] == NULL) 216 return; 217 p = p->next[id]; 218 } 219 strcpy(z_Str, p->MWord); 220 } 221 222 223 int findNum(char *str, node *T) 224 { 225 int len, i, id; 226 node *p; 227 p = T; 228 len = strlen(str); 229 for(i = 0; i < len; i++) 230 { 231 if(str[i]>= 'a' && str[i] <= 'f') 232 id = str[i]- 'a' + 10; 233 else 234 id = str[i] - '0'; 235 if(p->next[id] == NULL) 236 return -1; 237 p = p->next[id]; 238 } 239 return p->order; 240 }
主函數:
#include<stdio.h> #include<string.h> #include "AnalysisWord.h" int main() { char strGetFromWeb[500] = ""; char strGetToWeb[500] = ""; while(gets(strGetFromWeb)) { SentenceTransform(strGetFromWeb,strGetToWeb); puts(strGetToWeb); memset(strGetToWeb, 0, sizeof(strGetToWeb)); } return 0; }