統計單詞個數及詞頻(C++實現)


#include<iostream>
#include<fstream>
#include<string>

using namespace std;

struct Word       //定義結構體
{
    string word;
    size_t length;
    Word* next;
    int repnum;//repeat number 重復次數
    bool ifdel;//if delete 是否被刪除
    Word(string _word, size_t _lendth = 0, Word* _next = NULL, int _repnum = 1, bool _ifdel = false) :
        word(_word), length(_lendth), next(_next), repnum(_repnum), ifdel(_ifdel){}
};

Word *head = NULL, *tail = NULL;
int size = 0;  //鏈表長度,即單詞總個數
int delsum = 0;//delete sum 被刪除的總個數

void Push(const string& str, const size_t& len)   //形成鏈表
{
    if (NULL == head)
    {
        head = tail = new Word(str, len, NULL, 1, false);
    }
    else
    {
        tail->next = new Word(str, len, NULL, 1, false);
        tail = tail->next;
    }
    size++;
}

void Destory()   //delete new
{
    Word* ptr = head;
    while (ptr)
    {
        Word* pt = ptr;
        ptr = ptr->next;
        delete pt;
    }
    head = tail = NULL;
    size = 0;
}

void Readin(string& mystr)   //read in  讀入
{
    string temps;
    for (size_t i = 0; i < mystr.length(); i++)
    {
        if (mystr[i] >= 'a'&&mystr[i] <= 'z' || mystr[i] >= 'A'&&mystr[i] <= 'Z')
        {
            temps += mystr[i];
        }
        else
        {
            if (!temps.empty())//不空的時候返回0
            {

                Push(temps, temps.length());
                temps.erase(temps.begin(), temps.end());
            }
        }
    }
}
void DeSame()      //delete the same 刪除相同的單詞(不是真刪,只是做標記)
{
    Word* p = head;
    while (p&&p->next)
    {
        while (p->ifdel&&p->next)
        {
            p = p->next;
        }
        Word* pt = p->next;
        while (pt)
        {
            if (!pt->ifdel&&pt->word == p->word)
            {
                p->repnum++;
                pt->ifdel = true;
                delsum++;
            }
            pt = pt->next;
        }
        p = p->next;
    }
}

void Inputp(Word* warr[])   //input point 將未被“刪除”的結點的指針傳入數組
{
    int i = 0;
    Word* pt = head;
    while (pt)
    {
        if (!pt->ifdel)
        {
            warr[i] = pt;
            i++;
        }
        pt = pt->next;
    }
}

void Sort(Word** warr, int start, int end)   //將指針按其指向的結點的repnum從大到小排序,快排實現
{
    int i = 0, j = 0;
    Word* key = NULL;
    key = warr[start];
    i = start;
    j = end;
    while (i<j)
    {
        while (warr[j]->repnum <= key->repnum&&i<j)j--;
        warr[i] = warr[j];
        while (warr[i]->repnum >= key->repnum&&i<j)i++;
        warr[j] = warr[i];
    }
    warr[i] = key;
    if (i - 1>start)Sort(warr, start, i - 1);
    if (end > i + 1)Sort(warr, i + 1, end);
}

void Show(Word** warr, int len)
{
    for (int i = 0; i < len; i++)
    {
        cout << warr[i]->word << "  " << warr[i]->repnum << endl;
    }
}

int main()
{
    ifstream readfile("zpc.txt", ios::in);
    if (!readfile){ cout << "程序出現異常,自動退出!" << endl; return 0; }
    string str, str1;
    while (!readfile.eof())
    {
        getline(readfile, str1);
        str += str1;
        str += ' ';
    }
    readfile.close();
    Readin(str);
    DeSame();
    cout << "單詞總個數(不考慮重復):" << size << endl;
    cout << "除去重復后的單詞個數(即重復的單詞按1個計):" << size - delsum << endl;
    Word** wdarr = new Word*[size - delsum];
    Inputp(wdarr);
    Sort(wdarr, 0, size - delsum - 1);
    Show(wdarr, size - delsum);
    delete[]wdarr;
    Destory();
    return 0;
}

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM