哈夫曼編碼系統 C++實現

本文轉載自查看原文 2020-01-04 15:19 1346

最近的數據結構大作業…
其中涉及到了很多，像一些哈夫曼樹的編碼、譯碼，以及樹的二叉樹形式的存儲及恢復。。
[基本要求]
一個完整的系統應具有以下功能：
（1）I：初始化（Initialization）。從終端讀入字符集大小n，以及n個字符和n個權值，建立哈夫曼樹，並將它存於文件hfmTree中。
（2）E：編碼（Encoding）。利用已建好的哈夫曼樹（如不在內存，則從文件htmTree中讀入），對文件ToBeTran中的正文進行編碼，然后將結果存入文件CodeFile中。
（3）D：譯碼（Decoding）。利用已建好的哈夫曼樹將文件CodeFile中的代碼進行譯碼，結果存入文件TextFile中。
（4）P：印代碼文件（Print）。將文件CodeFile以緊湊格式顯示在終端上，每行50個代碼。同時將此字符形式的編碼寫入文件CodePrint中。
（5）T：印哈夫曼樹（Tree Printing）。將已在內存中的哈夫曼樹以直觀的方式（樹或凹入表形式）顯示在終端上，同時將此字符形式的哈夫曼樹寫入文件TreePrint中。

注釋很詳細了，也花了不少時間。這些我當時也是參考了許多他人的資料，希望我這篇博客能夠在總結前人的基礎上，讓大家更好、更綜合地理解這一個實現過程。
我自認為我的編碼風格還是比較容易懂的，函數名字也都是有意義的，如果看下來的話其實不會太吃力，同時很多地方的參考我也在前面列舉了，如果有疑問或者是有問題，歡迎在評論區留言。

參考:

哈夫曼編碼C++實現
二叉樹的文件存儲和讀取
c++按行讀取文件的方式
c++ofstream與c風格fwrite的一個小區別
c++字符串按空格分割
這里是用了頭文件<sstream>處理的，相對簡單一些，但是限於以空格分割，更復雜的請搜索split函數。
上面的空格分割字符串存在只能處理一行的問題，我在其進行了改進
c++ string類型與基本數值類型的互相轉換
c++ 頭文件中stringstream流的用法的一些補充
access函數：檢測文件是否存在/是否具有讀/寫權限
string 與const char*、char*、char[]之間相互轉換
一次讀入整個txt文件到一個string中
在fstream流中新手可能把模式ios::a|ios::b中的"|“寫成”||"，會導致文件無法打開

注：此代碼是在VS2019下運行，因有一些函數可能與標准庫不一樣，如下面的_access函數，如果你在你編譯器上報錯，請把這些帶有"_"的函數的前綴“_”去掉即可。

// 赫夫曼編碼系統.cpp : 此文件包含 "main" 函數。程序執行將在此處開始並結束。
#include<iostream>
#include<fstream>
#include<string>
#include<sstream>
#include <vector>
#include <map>
#include <algorithm>
#include<io.h> //調用access函數確認文件是否存在
#include<windows.h>
using namespace std;

const string hfmTree = "hfmTree.txt";
const string tobeEncoding = "ToBeTran.txt";
const string EncodingResult = "CodeFile.txt";
const string DecodingResult = "TextFile.txt";
const string CodePrin = "CodePrin.txt";
const string TreePrin = "TreePrin.txt";

typedef double weighttype;
//string對象轉換為數值類型
template <class Type>
Type stringToNum(const string& str) {
    istringstream iss(str);
    Type num;
    iss >> num;
    return num;
}
struct HFMNode
{
    char key;
    weighttype weight;
    HFMNode* left, * right;
    HFMNode(char k, weighttype w) :key(k), weight(w), left(nullptr), right(nullptr) {};
    HFMNode(weighttype w) :key('/0'), weight(w), left(nullptr), right(nullptr) {};
    //第二個構造函數用於存儲合並兩個樹的父節點，這時其key應該是被禁用，這里用'\0'
};
typedef HFMNode* HFMNodeP;
typedef map<int, HFMNodeP> NodeMap;//節點的位置為key,節點的指針為值
typedef int Position;
//把樹存儲在文件中
struct HFMNodeFile {
    char key; //節點值
    weighttype weight;
    Position p; //節點在完全二叉樹中的位置
};
bool compare(HFMNode* e1, HFMNode* e2) {
    return e1->weight < e2->weight;
}//構建小頂堆，方便每次取兩個最小值
class HFMTree {
public:
    HFMTree() {
        root = nullptr;
        count = 0;
    }
    ~HFMTree() {
        ClearDecodeTree();
    }
    //建立哈夫曼樹
    HFMNode* BuildHFMTree(const map<char, double>& KVmap) {
        vector<HFMNode*> HFMNodes;
        for (auto itr = KVmap.begin(); itr != KVmap.end(); ++itr) {
            HFMNodes.push_back(new HFMNode(itr->first, itr->second));
            ++count;
        }

        make_heap(HFMNodes.begin(), HFMNodes.end(), compare);

        while (HFMNodes.size() > 1) {
            HFMNode* right = HFMNodes.front();
            pop_heap(HFMNodes.begin(), HFMNodes.end(), compare);
            HFMNodes.pop_back();

            HFMNode* left = HFMNodes.front();
            pop_heap(HFMNodes.begin(), HFMNodes.end(), compare);
            HFMNodes.pop_back();

            HFMNode* parent = new HFMNode(left->weight + right->weight);
            parent->left = left;
            parent->right = right;

            HFMNodes.push_back(parent);
            push_heap(HFMNodes.begin(), HFMNodes.end(), compare);
        }

        if (!HFMNodes.empty()) {
            root = HFMNodes.front();
        }

        return root;
    }
    //建立哈夫曼編碼樹,返回根指針
    HFMNode* BuildCodeTree() {
        //EncodingResults.resize(std::numeric_limits<char>().max());
        string code;//默認值為null,與string code="" 的區別見https://blog.csdn.net/yuanliang861/article/details/82893539
        //或者說，有點像vector里的reserve與resize方法；string code是預留空間，但不創建真正的對象；后者是創建真正的""的對象
        BuildCode(root, code);
        return root;
    };
    //得到整個字符集的哈夫曼編碼
    void BuildCode(HFMNode* pNode, string& code) {
        if (pNode->left == NULL) {
            EncodingResults[pNode->key] = code;
            return;
        }

        code.push_back('0');
        BuildCode(pNode->left, code);
        code.pop_back();//去掉左邊的0編碼，走向右邊
        code.push_back('1');
        BuildCode(pNode->right, code);
        code.pop_back();
    }
    //對待編碼文件中的字符進行編碼，輸出到codeflie中
    void GetEncoding() {
        if (_access(tobeEncoding.c_str(), 00)) {
            cerr << tobeEncoding <<"該文件不存在!\n";
            exit(2);
        }
        ifstream fin(tobeEncoding);
        if (!fin.is_open()) {
            cerr << "文件" << tobeEncoding << "無法打開!\n";
            exit(1);
        }
        istreambuf_iterator<char> beg(fin), end;
        string strdata(beg, end);
        fin.close();
        ofstream fout(EncodingResult, ios::out | ios::trunc);
        if (!fout.is_open()) {
            cerr << "文件" << EncodingResult << "打開失敗!\n";
            exit(1);
        }

        fout << this->GetCode(strdata);
    }
    //對編碼文件codefile的編碼進行譯碼，輸出到TextFile中
    void GetText() {
        if (_access(EncodingResult.c_str(), 00)) {
            cerr << "該文件不存在!\n";
            exit(2);
        }
        ifstream fin(EncodingResult);
        if (!fin.is_open()) {
            cerr << "該文件無法打開!\n";
            exit(1);
        }
        istreambuf_iterator<char> beg(fin), end;
        string strdata(beg, end);
        string result = this->Decode(strdata);
        fin.close();
        ofstream fout(DecodingResult, ios::out | ios::trunc);
        if (!fout.is_open()) {
            cerr << "該文件無法打開!\n";
            exit(1);
        }
        fout << result << endl;
    }
    //清空編碼樹，釋放內存
    void ClearDecodeTree() {
        ClearDecodeTree(root);
        root = nullptr;
    }
    //將哈夫曼編碼樹寫入文件
    void writeBTree() {
        ofstream fout(hfmTree, ios::out | ios::trunc);
        if (!fout.is_open()) {
            cerr << "打開文件失敗，將退出！\n";
            exit(1);
        }
        fout << count << endl;
        writeNode(root, 1); //寫入節點
        fout.close();
    }
    //從哈夫曼編碼樹文件hfmTree中讀取樹並恢復到內存
    HFMTree* readBTree() {
        HFMTree* hfmtp = new HFMTree;
        NodeMap mapNode;
        HFMNode* nodep;
        string anode;//按行讀取一條記錄
        ifstream fin(hfmTree, ios::in);
        if (fin.is_open()) {
            fin >> hfmtp->count;//首先讀取字符集的大小
            //接下來為count行字符的key與權重與位置
            stringstream input;
            vector<string> res;//存儲分割的字符串
            string tmp;
            char tmpkey; double tmpweight; int tmpposition;
            getline(fin, tmp);
            int i = -1;
            while (getline(fin, anode)) {//getline丟掉了換行，不需再考慮
                input << anode;
                while (input >> anode)//按空格分割，分別得到char 型的key, int 型的weight, int型的 position
                {
                    res.push_back(anode);
                }
                input.clear(ios::goodbit);
                tmpkey = res[++i][0];//res[0]得到key的string，再用res[0][0]得到第一個字符即key
                tmpweight = stringToNum<weighttype>(res[++i]);
                tmpposition = stringToNum<int>(res[++i]);
                nodep = new HFMNode(tmpkey, tmpweight);
                mapNode.insert(NodeMap::value_type(tmpposition, nodep));
            }
            NodeMap::iterator iter;
            NodeMap::iterator iter_t;
            for (iter = mapNode.begin(); iter != mapNode.end(); iter++) {
                iter_t = mapNode.find(2 * iter->first);
                if (iter_t != mapNode.end()) { //找到左兒子
                    iter->second->left = iter_t->second;
                }
                else {	//未找到左兒子
                    iter->second->left = NULL;
                }
                iter_t = mapNode.find(2 * iter->first + 1);
                if (iter_t != mapNode.end()) { //找到右兒子
                    iter->second->right = iter_t->second;
                }
                else {	//未找到右兒子
                    iter->second->right = NULL;
                }
            }
            iter_t = mapNode.find(1); //找root節點
            if (iter_t != mapNode.end()) {
                hfmtp->root = iter_t->second;
            }
            fin.close();
        }
        return hfmtp;
    }

    void printBTreeToScreen(HFMTree* hfmt) {
        printSubBTreeToScreen(hfmt->root, 0);
    }
    void printBTreeToFile(HFMTree* hfmt,  string filename=TreePrin) {
        ofstream fout(filename, ios::out | ios::trunc);
        if (!fout.is_open()) {
            cerr << "打開文件失敗，將退出！\n";
            exit(1);
        };
        printSubBTreeToFile(hfmt->root, 0,fout);
        fout.close();
    }
    HFMNode* root;
private:
    int count;
    map<char, string>EncodingResults;
    //vector<string> EncodingResults;
    //寫入單個哈夫曼樹節點
    void writeNode(const HFMNodeP hfm_nodep, Position p) {
        if (!hfm_nodep) {
            return;
        }
        ofstream fout(hfmTree, ios::out | ios::app);
        if (!fout.is_open()) {
            cerr << "打開文件失敗，將退出！\n";
            exit(1);
        }
        HFMNodeFile node;
        node.key = hfm_nodep->key;
        node.weight = hfm_nodep->weight;
        node.p = p;
        //寫入當前節點,按行寫入字符，權重，在哈夫曼樹中的位置
        fout << node.key << " " << node.weight << " " << node.p << endl;
        //寫入左子樹
        writeNode(hfm_nodep->left, 2 * p);
        //寫入右子樹
        writeNode(hfm_nodep->right, 2 * p + 1);
    }
    //帶縮進地打印一個哈夫曼樹節點,每層縮進量增加2個空格
    void printSubBTreeToScreen(HFMNodeP hfmnp, int indentation) {
        int i;
        if (!hfmnp)
            return;
        for (i = 0; i < indentation; i++)
            cout << " ";
        cout << hfmnp->key << " with " << hfmnp->weight << endl;
        printSubBTreeToScreen(hfmnp->left, indentation + 2);
        printSubBTreeToScreen(hfmnp->right, indentation + 2);
    }
    void printSubBTreeToFile(HFMNodeP hfmnp, int indentation,ofstream&fout) {
        int i;
        if (!hfmnp)
            return;
        for (i = 0; i < indentation; i++)
            fout << " ";
        fout << hfmnp->key << " with " << hfmnp->weight << endl;
        printSubBTreeToFile(hfmnp->left, indentation + 2,fout);
        printSubBTreeToFile(hfmnp->right, indentation + 2,fout);
    }
    //遞歸到葉子節點后再刪除葉子節點
    void ClearDecodeTree(HFMNode* pNode) {
        if (pNode == nullptr) return;

        ClearDecodeTree(pNode->left);
        ClearDecodeTree(pNode->right);
        delete pNode;
    }
    //根據文本內容，遍歷每一個字，連接每個字的編碼得到文本的編碼
    string GetCode(const string& Text) {
        string TextEncodingResult;
        for (int i = 0; i < Text.size(); ++i) {
            TextEncodingResult += EncodingResults[Text[i]];
        }

        return TextEncodingResult;
    }
    //根據一段文本的哈夫曼編碼，得到對應的文本
    string Decode(const string& TextEncodingResult) {
        string Text;

        HFMNode* pNode = root;
        for (int i = 0; i < TextEncodingResult.size(); ++i) {
            if (TextEncodingResult[i] == '0') {
                pNode = pNode->left;
            }
            else {
                pNode = pNode->right;
            }

            if (pNode->left == NULL) {
                //哈夫曼樹中只有度為0和度為2的節點
                //因此，只需判斷左子樹或右子樹為空，就可以確定是葉子節點。
                //這時，把對應的字符壓進文本，同時，從頭開始遍歷編碼樹
                Text.push_back(pNode->key);
                pNode = root;
            }
        }

        return Text;
    }
};
typedef HFMTree* HFMTreeP;
HFMTree* Initialization(int n);
int main()
{
    int flag = 0;
    int choice;
    HFMTree* hfmtp = nullptr;
    cout << "\n\n\t\t\t|**********************************************|\n";
    cout << "\t\t\t ______________________________________________|\n";
    cout << "\t\t\t| 哈夫曼編碼/譯碼系統 |\n";
    cout << "\t\t\t| |\n";
    cout << "\t\t\t| 1.初始化 2.編碼 |\n";
    cout << "\t\t\t| |\n";
    cout << "\t\t\t| 3.譯碼 4.印代碼文件 |\n";
    cout << "\t\t\t| |\n";
    cout << "\t\t\t| 5.印哈夫曼樹 |\n\n";
    cout << "\t\t\t| 0.退出 |\n";
    cout << "\t\t\t|********************S**************************|\n";
    while (cout << "\n\t\t\t請選擇功能（輸入0-5任意一個數字）:\n" && cin >> choice)
    {
        switch (choice)
        {
        case 1: {
            int n;
            cout << "請輸入字符集大小:\n";
            cin >> n;
            hfmtp = Initialization(n);
            flag = 1;
            cout << "初始化完成\n";
            break;
        }
        case 2: {
            if (flag == 1) {
                //內存中有哈夫曼樹
                hfmtp->GetEncoding();
            }
            else {
                //內存中沒有哈夫曼樹，需要先從文件讀取，並恢復哈夫曼樹再進行操作
                hfmtp = hfmtp->readBTree();
                flag = 1;//經過恢復后，內存中已有哈夫曼樹
                hfmtp->BuildCodeTree();
                hfmtp->GetEncoding();
            }
            cout << "編碼完成\n";
            break;
        }
        case 3: {
            if (flag == 1) {
                hfmtp->GetText();
            }
            else {
                hfmtp = hfmtp->readBTree();
                flag = 1;//經過恢復后，內存中已有哈夫曼樹
                hfmtp->GetText();
            }
            cout << "譯碼完成\n";
            break;
        }
        case 4: {
            ifstream fin(EncodingResult);
            istreambuf_iterator<char> beg(fin), end;
            string strdata(beg, end);
            fin.close();
            ofstream fout(CodePrin, ios::out | ios::trunc);
            if (!fout.is_open()) {
                cerr << "打開文件失敗，將退出！\n";
                exit(1);
            }
            fout << strdata;
            for (int i = 1; i <= strdata.size(); ++i) {
                if (i % 50 == 0) cout << "\n";
                cout << strdata[i - 1];
            }
            break;
        }
        case 5: {
            if (flag == 0) {
                //內存中沒有哈夫曼樹，需要先從文件讀取，並恢復哈夫曼樹再進行操作
                hfmtp = hfmtp->readBTree();
                flag = 1;//經過恢復后，內存中已有哈夫曼樹
            }
            ofstream fout(TreePrin, ios::out | ios::trunc);
            if (!fout.is_open()) {
                cerr << "打開文件失敗，將退出！\n";
                exit(1);
            }
            hfmtp->printBTreeToScreen(hfmtp);
            hfmtp->printBTreeToFile(hfmtp);
            break;
        }
        case 0:{
            cout << "正在退出...\n";
            Sleep(1000);
            exit(0);
        }
        default:
            cout << "請輸入0~5之間的輸入!\n";
            break;
        }
    }
    return 0;
}
HFMTree* Initialization(int n) {

    map<char, double>tmp;
    char key;
    double weight;
    cout << "依次輸入字符及其權重\n";
    for (int i = 0; i < n; ++i) {
        cin >> key >> weight;
        tmp.insert(pair<char,double>(key, weight));
    }
    HFMTree* hfmt = new HFMTree;
    hfmt->BuildHFMTree(tmp);

    ofstream fout(hfmTree, ios::out | ios::trunc);
    if (!fout.is_open()) {
        cerr << "打開文件失敗" << endl;
        exit(1);
    }
    hfmt->writeBTree();
    return hfmt;
}
//我認為判斷內存中是否存在哈夫曼樹的方式，是看是否進行了Initialization或者readBTree，因此，我在該函數中設置了flag變量

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 哈夫曼編碼譯碼系統（c/c++） C++哈夫曼樹編碼和譯碼的實現數據結構圖文解析之：哈夫曼樹與哈夫曼編碼詳解及C++模板實現哈夫曼樹與哈夫曼編碼的實現哈夫曼編解碼壓縮解壓文件—C++實現哈弗曼樹與哈夫曼編碼哈夫曼編碼問題哈夫曼樹及編碼哈夫曼編碼哈夫曼編碼