正規表達式 轉 NFA C++


 

  今天來為大家分享一個編譯原理中用正規表達式轉NFA的小程序

正規表達式就是類似正則一樣的式子,例如:(a|b)*abb,最后應該轉化為:

 

大致的處理流程為:

例子中的表達式:(a|b)*abb,|和*都是運算法則,而且容易識別,但是處理abb就不是你那么方便了,所以我們在abb中間加上+號,就可以像|*那樣識別了,所以處理后為(a|b)*a+b+b

 

 我們識別出來之后,首先根據書中提供的運算符->NFA部件的圖轉化為NFA部件,之后再根據優先級和各個部件組建NFA

 

 運算符對應NFA中的各個部件圖為:

ε符:

        

φ符:

        

輸入符號:

        

| 符:      

  

+符:

    

*符:

    

 

 有一個問題,NFA的開始和終止都是狀態集合,但是整改了好多次,沒能設計出來,所以該程序產生的NFA均只有一個開始狀態和一個終止狀態

 

代碼注釋挺清楚的,我就不過多描述了

 

G[S]->NFA代碼如下:

#ifndef _GNFA_
#define _GNFA_

/*
@author:Lv
@time:2018-11
@title:正規式轉NFA
*/

#include <iostream>
#include <string>
#include <vector>
#include <map>
#include <set>

namespace GNFAs
    {
#define stds std::

    /*
    @brief 記錄NFA每一個狀態
    @member 記錄名稱
    */
    struct state        
        {
        stds string _staName;
        //state(const stds string& name = "#") :_staName(name) {  }
        bool operator<(const state& b)const { return _staName < b._staName; }
        };

    /*
    @brief 記錄狀態之間轉換的邊信息
    @member 起始狀態、終止狀態、狀態轉換輸入符號
    */
    struct edge { state _edgStart; state _edgEnd; char _edgSymbol; }; /*
    NFA-class
    */
    class NFA
        {
    public:
        using ostream = stds ostream;
        using istream = stds istream;
        using exptype = stds string;
        using map_sta = stds vector<edge>;
        using container_sta = stds set<state>;
        using container_sym = stds set<char>;
    public:
        /*
        @brief nfaunit 用於記錄NFA數據集合--NFA基本數據類型--NFA單元
        @member K 狀態集合
                Σ 字母表
                f 狀態映射        
                S 開始狀態
                Z 終止狀態集合
        */
        typedef struct NFAunit
            {
            container_sta K;
            container_sym Σ;
            map_sta f;
            state S;
            state Z;    

            NFAunit()
                {
                f.clear();
                K.clear();
                Σ.clear();
                Z = state();
                S = state();
                }

            NFAunit(const NFAunit& other)
                :K(other.K)
                ,Σ(other.Σ)
                ,f(other.f)
                ,S(other.S)
                ,Z(other.Z)
                {

                }

            NFAunit& operator=(const NFAunit& other)
                {
                if (this != &other)
                    {
                    K = other.K;
                    Σ = other.Σ;
                    f = other.f;
                    S = other.S;
                    Z = other.Z;
                    }
                return *this;
                }

            NFAunit& operator+=(const NFAunit& other)
                {
                K.insert(other.K.begin(), other.K.end());
                Σ.insert(other.Σ.begin(), other.Σ.end());
                f.insert(f.end(), other.f.begin(), other.f.end());
                return *this;
                }
            } _type_;
        
    public:
        NFA();

        NFA(const NFA&);

        NFA& operator=(const NFA&);

        _type_ getNFA()const { return _data; }

        exptype getExpression()const { return _expression; }


    public:
        /*
        @brief 輸入正規式
        */
        void input();

        /*
        @brief 轉成NFA
        */
        void toNFA();

        /*
        @brief 展示NFA
        */
        void show()const;

        /*
        @brief 刷新數據
        */
        void update();

        /*
        @brief 運行
        */
        void run();

    private:
        /*
        @brief 檢查正規式是否合法
        @retur 是否合法
        */
        bool _checkExp()const;

        /*
        @brief 檢查正規式字符是否合法
        @retur 是否合法
        */
        bool _checkSym()const;

        /*
        @brief 檢查正規式語法,如:括號匹配
        @retur 是否合法
        */
        bool _checkSync()const;

        /*
        @brief 做一些處理便於表達式轉換
        */
        void change();

        /*
        @brief 中綴轉后綴
        */
        void postexp();

        /*
        @brief 棧內優先級
        */
        int inpriority(const char)const;

        /*
        @brief 棧外優先級
        */
        int outpriority(const char)const;

        /*
        @brief 整合a|b
        */
        _type_ _or(_type_, _type_);

        /*
        @brief 整合ab
        */
        _type_ _mul(_type_, _type_);

        /*
        @brief 整合a*
        */
        _type_ _star(_type_);

        /*
        @brief 整合單元
        */
        _type_ _unit(const char);

    private:
        int _staNum;
        ostream& _out;
        istream& _in;
        NFAunit _data;
        exptype _expression;
        };
    }

#endif //_GNFA_

 

#include "GNFA.h"
using namespace GNFAs;

#include <stack>

using stack_unit = stds stack<NFA::NFAunit>;
using stack_char = stds stack<char>;

#define enter stds endl

NFA::NFA()
    :_staNum(0)
    , _out(std::cout)
    , _in(std::cin)
    {    }

void NFA::input()
{
    _out << "請輸入正規式:" << enter;

    while (!_checkExp())        _in >> _expression;
}

bool NFA::_checkExp()const
{
    if (!_checkSym())
    {
        _out << "含有非法字符!" << enter;
        return false;
    }
    if (!_checkSync())
    {
        _out << "含有語法錯誤!" << enter;
        return false;
    }
    return _expression != exptype();
}

bool NFA::_checkSym()const
{
    for (int i = 0; i < _expression.size(); ++i)
    {
        if (islower(_expression[i]))    continue;
        else if (_expression[i] == '(' || _expression[i] == ')' || _expression[i] == '*' || _expression[i] == '|')
            continue;
        else
            return false;
    }
    return true;
}

bool NFA::_checkSync()const
{
    stack_char stack;
    for (int i = 0; i < _expression.size(); ++i)
    {
        if (_expression[i] == '(')
            stack.push('(');
        else if (_expression[i] == ')')
        {
            if (stack.size() && stack.top() == '(')
                stack.pop();
            else return false;
        }
        if (_expression[i] == '*')
        {
            if (i &&_expression[i - 1] != '|')
                continue;
            else return false;
        }
    }
    if (stack.size())return false;
    return true;
}

void NFA::change()
{
    exptype t;

    char s, e;

    for (int i = 0; i < _expression.size(); ++i)
    {
        s = _expression[i];
        e = _expression[i + 1];
        t += s;

        if (s != '(' && s != '|' && islower(e))        t += '+';
        else if (e == '(' && s != '|' && s != '(')    t += '+';
    }
    t += e;
    _expression = t;
}

int NFA::inpriority(const char c)const
{
    switch (c)
    {
    case '#': return 0;
    case '(': return 1;
    case '*': return 7;
    case '|': return 5;
    case '+': return 3;
    case ')': return 8;
    }
    return -1;
}

int NFA::outpriority(const char c)const
{
    switch (c)
    {
    case '#': return 0;
    case '(': return 8;
    case '*': return 6;
    case '|': return 4;
    case '+': return 2;
    case ')': return 1;
    }
    return -1;
}

void NFA::postexp()
{
    _expression += '#';
    exptype t = "";
    stack_char s;
    char ch = '#', ch1, op;
    s.push(ch);

    //讀一個字符
    int read_location = 0;
    ch = _expression.at(read_location++);
    while (!s.empty())
    {
        if (islower(ch))
        {
            t += ch;
            ch = _expression.at(read_location++);
        }
        else
        {
            ch1 = s.top();
            if (inpriority(ch1)<outpriority(ch))
            {
                s.push(ch);
                ch = _expression.at(read_location++);
            }
            else if (inpriority(ch1)>outpriority(ch))
            {
                op = s.top();
                s.pop();
                t += op;
            }
            else 
            {
                op = s.top();
                s.pop();

                if (op == '(')
                    ch = _expression.at(read_location++);
            }
        }
    }
    t.erase(t.end() - 1);
    _expression = t;
}

void NFA::toNFA()
{
    char item;
    _type_ left, right;
    stack_unit stack;

    for (int i = 0; i < _expression.size(); ++i)
    {
        item = _expression[i];
        switch (item)
        {
        case '|':
            right = stack.top();
            stack.pop();
            left = stack.top();
            stack.pop();
            _data = _or(left, right);
            stack.push(_data);
            break;
        case '*':
            left = stack.top();
            stack.pop();
            _data = _star(left);
            stack.push(_data);
            break;
        case '+':
            right = stack.top();
            stack.pop();
            left = stack.top();
            stack.pop();
            _data = _mul(left, right);
            stack.push(_data);
            break;
        default:
            _data = _unit(item);
            stack.push(_data);
        }
    }

    _data = stack.top();
    stack.pop();
}

NFA::_type_ NFA::_or(_type_ unitl, _type_ unitr)
{
    _type_ unit;
    exptype name;
    edge e1, e2, e3, e4;

    state start { name += _staNum++ + 'A' };
    name = "";
    state end{ name += _staNum++ + 'A' };

    e1._edgStart = start;
    e1._edgEnd = unitl.f[0]._edgStart;
    e1._edgSymbol = '#';

    e2._edgStart = start;
    e2._edgEnd = unitr.f[0]._edgStart;
    e2._edgSymbol = '#';

    e3._edgStart = unitl.f[unitl.f.size() - 1]._edgEnd;
    e3._edgEnd = end;
    e3._edgSymbol = '#';

    e4._edgStart = unitr.f[unitr.f.size() - 1]._edgEnd;
    e4._edgEnd = end;
    e4._edgSymbol = '#';

    unit = unitl;
    unit += unitr;
    unit.f.push_back(e1);
    unit.f.push_back(e2);
    unit.f.push_back(e3);
    unit.f.push_back(e4);

    unit.S = start;
    unit.Z = end;

    return unit;
}

NFA::_type_ NFA::_mul(_type_ unitl, _type_ unitr)
{
    for (auto &it : unitr.f)
    {
        if (it._edgStart._staName == unitr.S._staName)
        {
            it._edgStart = unitl.Z;
            _staNum--;
        }
        else if (it._edgEnd._staName == unitr.S._staName)
        {
            it._edgEnd = unitl.Z;
            _staNum--;
        }
    }
    unitr.S = unitl.Z;
    unitl += unitr;
    unitl.Z = unitr.Z;

    return unitl;
}

NFA::_type_ NFA::_star(_type_ u)
{
    _type_ unit;
    exptype name;
    edge e1, e2, e3, e4;

    state start{ name += _staNum++ + 'A' };
    name = "";
    state end{ name += _staNum++ + 'A' };

    e1._edgStart = start;
    e1._edgEnd = end;
    e1._edgSymbol = '#';

    e2._edgStart = u.Z;
    e2._edgEnd = u.S;
    e2._edgSymbol = '#';

    e3._edgStart = start;
    e3._edgEnd = u.Z;
    e3._edgSymbol = '#';

    e4._edgStart = u.Z;
    e4._edgEnd = start;
    e4._edgSymbol = '#';

    unit = u;

    unit.f.push_back(e1);
    unit.f.push_back(e2);
    unit.f.push_back(e3);
    unit.f.push_back(e4);

    unit.S = start;
    unit.Z = end;

    return unit;
}

NFA::_type_ NFA::_unit(const char ch)
{
    _type_ unit;
    exptype name;
    edge e;

    state start{  name += _staNum++ + 'A' };
    name = "";
    state end{ name += _staNum++ + 'A' };

    e._edgStart = start;
    e._edgEnd = end;
    e._edgSymbol = ch;

    unit.f.push_back(e);
    unit.S = start;
    unit.Z = end;

    return unit;
}

void NFA::show()const
{
    _out << "NFA 的起始狀態:" << _data.S._staName << enter;
    _out << "NFA 的結束狀態:" << _data.Z._staName << enter << enter;

    for (auto it : _data.f)
    {
        _out << "from state:  " << it._edgStart._staName
            << "\t    to state: " << it._edgEnd._staName
            << "\t\tby  ";
        if (it._edgSymbol == '#') _out << R"+(ε)+" << enter;
        else _out << it._edgSymbol << enter;
    }
    _out << enter;
}

void NFA::update()
{
    for (auto it : _data.f)
    {
        _data.K.insert(it._edgStart);
        _data.K.insert(it._edgEnd);
        _data.Σ.insert(it._edgSymbol);
    }
}

void NFA::run()
{
    input();

    change();

    postexp();

    toNFA();

    show();

    update();
}

 

 

#include "GNFA.h"
using namespace GNFAs;

int main()
{
    NFA nfa;
    nfa.run();
}

 

 測試結果:

 

 

 感謝您的閱讀,生活愉快~

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM