壓縮算法實現之LZ78


LZ78編碼

LZ78算法,建立詞典的算法。

LZ78的編碼思想:

不斷地從字符流中提取新的綴-符串(String),通俗地理解為新"詞條",然后用"代號"也就是碼字(Code word)表示這個"詞條"。

對字符流的編碼就變成了用碼字(Code word)去替換字符流(Charstream),生成碼字流(Codestream),從而達到壓縮數據的目的。

幾個約定:

  1. 字符流(Charstream):要被編碼的數據序列。
  2. 字符(Character):字符流中的基本數據單元。
  3. 前綴(Prefix): 在一個字符之前的字符序列。
  4. 綴-符串(String):前綴+字符。
  5. 碼字(Code word):編碼以后在碼字流中的基本數據單元,代表詞典中的一串字符
  6. 碼字流(Codestream): 碼字和字符組成的序列,是編碼器的輸出
  7. 詞典(Dictionary): 綴-符串表。按照詞典中的索引號對每條綴-符串(String)指定一個碼字(Code word)
  8. 當前前綴(Current prefix):在編碼算法中使用,指當前正在處理的前綴,用符號P表示
  9. 當前字符(Current character):在編碼算法中使用,指當前前綴之后的字符,用符號Char表示。
  10. 當前碼字(Current code word): 在譯碼算法中使用,指當前處理的碼字,用W表示當前碼字,String.W表示當前碼字的綴-符串。

編碼算法步驟:

步驟1: 在開始時,詞典和當前前綴P 都是空的。

步驟2: 當前字符Char :=字符流中的下一個字符。

步驟3: 判斷P+Char是否在詞典中:

(1) 如果"是":用Char擴展P,讓P := P+Char ;

(2) 如果"否":① 輸出與當前前綴P相對應的碼字和當前字符Char;

② 把字符串P+Char 添加到詞典中。③ 令P :=空值。

(3) 判斷字符流中是否還有字符需要編碼

① 如果"是":返回到步驟2。

② 如果"否":若當前前綴P不空,輸出相應於當前前綴P的碼字,結束編碼。

解碼算法步驟:

步驟1:在開始時詞典為空;

步驟2:當前碼字W:= 碼字流中的下一個碼字

步驟3:當前字符Char:=緊隨碼字之后的字符

步驟4:把當前碼字的綴-符串(string.W)輸出到字符流,然后輸出字符Char

步驟5:把string.W + Char添加到詞典中

步驟6:判斷碼字流中是否還有碼字要譯碼,

(1)如果有,返回步驟2 (2)如果沒有,則結束

代碼實現(C#):

  1. /// <summary>
  2.     /// LZ78編碼所需詞典
  3.     /// </summary>
  4.     public struct Dictionary
  5.     {
  6.         public int id;
  7.         public string context;
  8.         public Dictionary(int id, string str)
  9.         {
  10.             this.id = id;
  11.             this.context = str;
  12.         }
  13.     }

 

  1. /// <summary>
  2.     /// 編碼器類
  3.     /// </summary>
  4.     public static class Encoder
  5.     {
  6.         /// <summary>
  7.         /// 詞典
  8.         /// </summary>
  9.         static List<Dictionary> D = new List<Dictionary>();
  10.  
  11.         /// <summary>
  12.         /// 在詞典中查找相應串
  13.         /// </summary>
  14.         /// <param name="item"></param>
  15.         /// <param name="D"></param>
  16.         /// <returns></returns>
  17.         static bool Find(string item, List<Dictionary> D)
  18.         {
  19.             foreach (Dictionary d in D)
  20.                 if (d.context == item)
  21.                     return true;
  22.             return false;
  23.         }
  24.  
  25.         /// <summary>
  26.         /// 根據詞典條目內容查找相應編號
  27.         /// </summary>
  28.         /// <param name="item"></param>
  29.         /// <param name="D"></param>
  30.         /// <returns></returns>
  31.         static int GetDicID(string item, List<Dictionary> D)
  32.         {
  33.             foreach (Dictionary d in D)
  34.                 if (d.context == item)
  35.                     return d.id;
  36.             return 0;
  37.         }
  38.  
  39.         /// <summary>
  40.         /// 將一個條目加入詞典
  41.         /// </summary>
  42.         /// <param name="item"></param>
  43.         /// <param name="D"></param>
  44.         static void AddToDic(string item, List<Dictionary> D)
  45.         {
  46.             int maxID;
  47.             if (D.Count == 0)
  48.                 maxID = 0;
  49.             else
  50.                 maxID = D.Last().id;
  51.  
  52.             D.Add(new Dictionary(maxID + 1, item));
  53.         }
  54.  
  55.         /// <summary>
  56.         /// 顯示詞典
  57.         /// </summary>
  58.         public static void ShowDictionary()
  59.         {
  60.             Console.WriteLine("Dictionary:");
  61.             foreach (Dictionary d in D)
  62.             {
  63.                 Console.WriteLine("<{0},{1}>", d.id, d.context);
  64.             }
  65.         }
  66.  
  67.         /// <summary>
  68.         /// 執行LZ78編碼算法
  69.         /// </summary>
  70.         /// <param name="str"></param>
  71.         public static void Execute(string str)
  72.         {
  73.             StringBuilder P = new StringBuilder();
  74.             char CHAR;
  75.             P.Clear();
  76.             foreach (char c in str)
  77.             {
  78.                 CHAR = c;
  79.                 if (Find((P.ToString() + CHAR.ToString()), D))
  80.                     P.Append(CHAR);
  81.                 else
  82.                 {
  83.                     Console.Write("({0},{1})", GetDicID(P.ToString(), D), c);
  84.                     AddToDic(P.ToString() + c.ToString(), D);
  85.                     P.Clear();
  86.                 }
  87.             }
  88.             if (P.ToString() != "")
  89.                 Console.Write("({0},{1})", GetDicID(P.ToString(), D), "/");
  90.             Console.WriteLine();
  91.         }
  92.     }

 

  1. /// <summary>
  2.     /// 解碼器類
  3.     /// </summary>
  4.     public static class Decoder
  5.     {
  6.         /// <summary>
  7.         /// 內部類:將碼字字符串轉換為編碼數組
  8.         /// </summary>
  9.         struct Codes
  10.         {
  11.             public int id;
  12.             public char code;
  13.             public Codes(int id, char code)
  14.             {
  15.                 this.id = id;
  16.                 this.code = code;
  17.             }
  18.         }
  19.  
  20.         /// <summary>
  21.         /// 詞典
  22.         /// </summary>
  23.         static List<Dictionary> D = new List<Dictionary>();
  24.  
  25.         /// <summary>
  26.         /// 碼字流,從字符串中獲取
  27.         /// </summary>
  28.         static List<Codes> CodeStream = new List<Codes>();
  29.  
  30.         /// <summary>
  31.         /// 將碼字串變為碼字流
  32.         /// </summary>
  33.         /// <param name="str"></param>
  34.         static void BuildCodes(string str)
  35.         {
  36.             /******************
  37.              * stauts 定義:
  38.              * 0: 開始/結束狀態
  39.              * 1: 逗號之前
  40.              * 2: 逗號之后
  41.              ******************/
  42.             int status = 0;
  43.             int id = 0;
  44.             char code = (char)0;
  45.             string number = "";
  46.             foreach (char c in str)
  47.             {
  48.                 if (c == '(')
  49.                     status = 1;
  50.  
  51.                 else if (status == 1 && c != ',')
  52.                     number += c;
  53.  
  54.                 else if (c == ',')
  55.                 {
  56.                     status = 2;
  57.                     id = Convert.ToInt32(number);
  58.                     number = "";
  59.                 }
  60.  
  61.                 else if (status == 2)
  62.                 {
  63.                     code = c;
  64.                     status = 0;
  65.                 }
  66.  
  67.                 else if (c == ')')
  68.                     CodeStream.Add(new Codes(id, code));
  69.             }
  70.         }
  71.  
  72.         /// <summary>
  73.         /// 將一個條目加入詞典
  74.         /// </summary>
  75.         /// <param name="item"></param>
  76.         /// <param name="D"></param>
  77.         static void AddToDic(string item, List<Dictionary> D)
  78.         {
  79.             int maxID;
  80.             if (D.Count == 0)
  81.                 maxID = 0;
  82.             else
  83.                 maxID = D.Last().id;
  84.  
  85.             D.Add(new Dictionary(maxID + 1, item));
  86.         }
  87.  
  88.         /// <summary>
  89.         /// 根據詞典序號找出詞典內容
  90.         /// </summary>
  91.         /// <param name="id"></param>
  92.         /// <param name="D"></param>
  93.         /// <returns></returns>
  94.         static string GetContext(int id, List<Dictionary> D)
  95.         {
  96.             foreach (Dictionary d in D)
  97.             {
  98.                 if (d.id == id)
  99.                     return d.context;
  100.             }
  101.             return string.Empty;
  102.         }
  103.  
  104.         /// <summary>
  105.         /// 執行LZ78譯碼算法
  106.         /// </summary>
  107.         /// <param name="str"></param>
  108.         public static void Execute(string str)
  109.         {
  110.             int W;
  111.             char CHAR;
  112.             string original;
  113.  
  114.             BuildCodes(str);
  115.             foreach (Codes c in CodeStream)
  116.             {
  117.                 W = c.id;
  118.                 if (c.code != '/')
  119.                     CHAR = c.code;
  120.                 else CHAR = (char)0;
  121.                 if (W == 0)
  122.                 {
  123.                     Console.Write(CHAR);
  124.                     AddToDic(CHAR.ToString(), D);
  125.                 }
  126.                 else
  127.                 {
  128.                     original = GetContext(W, D);
  129.                     Console.Write(original + CHAR.ToString());
  130.                     AddToDic(original + CHAR.ToString(), D);
  131.                 }
  132.             }
  133.             Console.WriteLine();
  134.         }
  135.     }

執行效果(主界面程序代碼省略):

可見算法執行的結果是完全正確的。

源碼下載:http://files.cnblogs.com/ryuasuka/LZ78.rar


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM