今天來為大家分享一個編譯原理中用正規表達式轉NFA的小程序
正規表達式就是類似正則一樣的式子,例如:(a|b)*abb,最后應該轉化為:
大致的處理流程為:
例子中的表達式:(a|b)*abb,|和*都是運算法則,而且容易識別,但是處理abb就不是你那么方便了,所以我們在abb中間加上+號,就可以像|*那樣識別了,所以處理后為(a|b)*a+b+b
我們識別出來之后,首先根據書中提供的運算符->NFA部件的圖轉化為NFA部件,之后再根據優先級和各個部件組建NFA
運算符對應NFA中的各個部件圖為:
ε符:
φ符:
輸入符號:
| 符:
+符:
*符:
有一個問題,NFA的開始和終止都是狀態集合,但是整改了好多次,沒能設計出來,所以該程序產生的NFA均只有一個開始狀態和一個終止狀態
代碼注釋挺清楚的,我就不過多描述了
G[S]->NFA代碼如下:
#ifndef _GNFA_ #define _GNFA_ /* @author:Lv @time:2018-11 @title:正規式轉NFA */ #include <iostream> #include <string> #include <vector> #include <map> #include <set> namespace GNFAs { #define stds std:: /* @brief 記錄NFA每一個狀態 @member 記錄名稱 */ struct state { stds string _staName; //state(const stds string& name = "#") :_staName(name) { } bool operator<(const state& b)const { return _staName < b._staName; } }; /* @brief 記錄狀態之間轉換的邊信息 @member 起始狀態、終止狀態、狀態轉換輸入符號 */ struct edge { state _edgStart; state _edgEnd; char _edgSymbol; }; /* NFA-class */ class NFA { public: using ostream = stds ostream; using istream = stds istream; using exptype = stds string; using map_sta = stds vector<edge>; using container_sta = stds set<state>; using container_sym = stds set<char>; public: /* @brief nfaunit 用於記錄NFA數據集合--NFA基本數據類型--NFA單元 @member K 狀態集合 Σ 字母表 f 狀態映射 S 開始狀態 Z 終止狀態集合 */ typedef struct NFAunit { container_sta K; container_sym Σ; map_sta f; state S; state Z; NFAunit() { f.clear(); K.clear(); Σ.clear(); Z = state(); S = state(); } NFAunit(const NFAunit& other) :K(other.K) ,Σ(other.Σ) ,f(other.f) ,S(other.S) ,Z(other.Z) { } NFAunit& operator=(const NFAunit& other) { if (this != &other) { K = other.K; Σ = other.Σ; f = other.f; S = other.S; Z = other.Z; } return *this; } NFAunit& operator+=(const NFAunit& other) { K.insert(other.K.begin(), other.K.end()); Σ.insert(other.Σ.begin(), other.Σ.end()); f.insert(f.end(), other.f.begin(), other.f.end()); return *this; } } _type_; public: NFA(); NFA(const NFA&); NFA& operator=(const NFA&); _type_ getNFA()const { return _data; } exptype getExpression()const { return _expression; } public: /* @brief 輸入正規式 */ void input(); /* @brief 轉成NFA */ void toNFA(); /* @brief 展示NFA */ void show()const; /* @brief 刷新數據 */ void update(); /* @brief 運行 */ void run(); private: /* @brief 檢查正規式是否合法 @retur 是否合法 */ bool _checkExp()const; /* @brief 檢查正規式字符是否合法 @retur 是否合法 */ bool _checkSym()const; /* @brief 檢查正規式語法,如:括號匹配 @retur 是否合法 */ bool _checkSync()const; /* @brief 做一些處理便於表達式轉換 */ void change(); /* @brief 中綴轉后綴 */ void postexp(); /* @brief 棧內優先級 */ int inpriority(const char)const; /* @brief 棧外優先級 */ int outpriority(const char)const; /* @brief 整合a|b */ _type_ _or(_type_, _type_); /* @brief 整合ab */ _type_ _mul(_type_, _type_); /* @brief 整合a* */ _type_ _star(_type_); /* @brief 整合單元 */ _type_ _unit(const char); private: int _staNum; ostream& _out; istream& _in; NFAunit _data; exptype _expression; }; } #endif //_GNFA_
#include "GNFA.h" using namespace GNFAs; #include <stack> using stack_unit = stds stack<NFA::NFAunit>; using stack_char = stds stack<char>; #define enter stds endl NFA::NFA() :_staNum(0) , _out(std::cout) , _in(std::cin) { } void NFA::input() { _out << "請輸入正規式:" << enter; while (!_checkExp()) _in >> _expression; } bool NFA::_checkExp()const { if (!_checkSym()) { _out << "含有非法字符!" << enter; return false; } if (!_checkSync()) { _out << "含有語法錯誤!" << enter; return false; } return _expression != exptype(); } bool NFA::_checkSym()const { for (int i = 0; i < _expression.size(); ++i) { if (islower(_expression[i])) continue; else if (_expression[i] == '(' || _expression[i] == ')' || _expression[i] == '*' || _expression[i] == '|') continue; else return false; } return true; } bool NFA::_checkSync()const { stack_char stack; for (int i = 0; i < _expression.size(); ++i) { if (_expression[i] == '(') stack.push('('); else if (_expression[i] == ')') { if (stack.size() && stack.top() == '(') stack.pop(); else return false; } if (_expression[i] == '*') { if (i &&_expression[i - 1] != '|') continue; else return false; } } if (stack.size())return false; return true; } void NFA::change() { exptype t; char s, e; for (int i = 0; i < _expression.size(); ++i) { s = _expression[i]; e = _expression[i + 1]; t += s; if (s != '(' && s != '|' && islower(e)) t += '+'; else if (e == '(' && s != '|' && s != '(') t += '+'; } t += e; _expression = t; } int NFA::inpriority(const char c)const { switch (c) { case '#': return 0; case '(': return 1; case '*': return 7; case '|': return 5; case '+': return 3; case ')': return 8; } return -1; } int NFA::outpriority(const char c)const { switch (c) { case '#': return 0; case '(': return 8; case '*': return 6; case '|': return 4; case '+': return 2; case ')': return 1; } return -1; } void NFA::postexp() { _expression += '#'; exptype t = ""; stack_char s; char ch = '#', ch1, op; s.push(ch); //讀一個字符 int read_location = 0; ch = _expression.at(read_location++); while (!s.empty()) { if (islower(ch)) { t += ch; ch = _expression.at(read_location++); } else { ch1 = s.top(); if (inpriority(ch1)<outpriority(ch)) { s.push(ch); ch = _expression.at(read_location++); } else if (inpriority(ch1)>outpriority(ch)) { op = s.top(); s.pop(); t += op; } else { op = s.top(); s.pop(); if (op == '(') ch = _expression.at(read_location++); } } } t.erase(t.end() - 1); _expression = t; } void NFA::toNFA() { char item; _type_ left, right; stack_unit stack; for (int i = 0; i < _expression.size(); ++i) { item = _expression[i]; switch (item) { case '|': right = stack.top(); stack.pop(); left = stack.top(); stack.pop(); _data = _or(left, right); stack.push(_data); break; case '*': left = stack.top(); stack.pop(); _data = _star(left); stack.push(_data); break; case '+': right = stack.top(); stack.pop(); left = stack.top(); stack.pop(); _data = _mul(left, right); stack.push(_data); break; default: _data = _unit(item); stack.push(_data); } } _data = stack.top(); stack.pop(); } NFA::_type_ NFA::_or(_type_ unitl, _type_ unitr) { _type_ unit; exptype name; edge e1, e2, e3, e4; state start { name += _staNum++ + 'A' }; name = ""; state end{ name += _staNum++ + 'A' }; e1._edgStart = start; e1._edgEnd = unitl.f[0]._edgStart; e1._edgSymbol = '#'; e2._edgStart = start; e2._edgEnd = unitr.f[0]._edgStart; e2._edgSymbol = '#'; e3._edgStart = unitl.f[unitl.f.size() - 1]._edgEnd; e3._edgEnd = end; e3._edgSymbol = '#'; e4._edgStart = unitr.f[unitr.f.size() - 1]._edgEnd; e4._edgEnd = end; e4._edgSymbol = '#'; unit = unitl; unit += unitr; unit.f.push_back(e1); unit.f.push_back(e2); unit.f.push_back(e3); unit.f.push_back(e4); unit.S = start; unit.Z = end; return unit; } NFA::_type_ NFA::_mul(_type_ unitl, _type_ unitr) { for (auto &it : unitr.f) { if (it._edgStart._staName == unitr.S._staName) { it._edgStart = unitl.Z; _staNum--; } else if (it._edgEnd._staName == unitr.S._staName) { it._edgEnd = unitl.Z; _staNum--; } } unitr.S = unitl.Z; unitl += unitr; unitl.Z = unitr.Z; return unitl; } NFA::_type_ NFA::_star(_type_ u) { _type_ unit; exptype name; edge e1, e2, e3, e4; state start{ name += _staNum++ + 'A' }; name = ""; state end{ name += _staNum++ + 'A' }; e1._edgStart = start; e1._edgEnd = end; e1._edgSymbol = '#'; e2._edgStart = u.Z; e2._edgEnd = u.S; e2._edgSymbol = '#'; e3._edgStart = start; e3._edgEnd = u.Z; e3._edgSymbol = '#'; e4._edgStart = u.Z; e4._edgEnd = start; e4._edgSymbol = '#'; unit = u; unit.f.push_back(e1); unit.f.push_back(e2); unit.f.push_back(e3); unit.f.push_back(e4); unit.S = start; unit.Z = end; return unit; } NFA::_type_ NFA::_unit(const char ch) { _type_ unit; exptype name; edge e; state start{ name += _staNum++ + 'A' }; name = ""; state end{ name += _staNum++ + 'A' }; e._edgStart = start; e._edgEnd = end; e._edgSymbol = ch; unit.f.push_back(e); unit.S = start; unit.Z = end; return unit; } void NFA::show()const { _out << "NFA 的起始狀態:" << _data.S._staName << enter; _out << "NFA 的結束狀態:" << _data.Z._staName << enter << enter; for (auto it : _data.f) { _out << "from state: " << it._edgStart._staName << "\t to state: " << it._edgEnd._staName << "\t\tby "; if (it._edgSymbol == '#') _out << R"+(ε)+" << enter; else _out << it._edgSymbol << enter; } _out << enter; } void NFA::update() { for (auto it : _data.f) { _data.K.insert(it._edgStart); _data.K.insert(it._edgEnd); _data.Σ.insert(it._edgSymbol); } } void NFA::run() { input(); change(); postexp(); toNFA(); show(); update(); }
#include "GNFA.h" using namespace GNFAs; int main() { NFA nfa; nfa.run(); }
測試結果:
感謝您的閱讀,生活愉快~