哈夫曼編碼—數據壓縮與解壓(Java)
博客說明
文章所涉及的資料來自互聯網整理和個人總結,意在於個人學習和經驗匯總,如有什么地方侵權,請聯系本人刪除,謝謝!
介紹
- 赫夫曼編碼也翻譯為 哈夫曼編碼(Huffman Coding),又稱霍夫曼編碼,是一種編碼方式, 屬於一種程序算法
- 赫夫曼編碼是赫哈夫曼樹在電訊通信中的經典的應用之一。
- 赫夫曼編碼廣泛地用於數據文件壓縮。其壓縮率通常在20%~90%之間
- 赫夫曼碼是可變字長編碼(VLC)的一種。Huffman於1952年提出一種編碼方法,稱之為最佳編碼
通信領域中信息的處理方式
定長編碼
數據傳輸太長
i like like like java do you like a java // 共40個字符(包括空格)
105 32 108 105 107 101 32 108 105 107 101 32 108 105 107 101 32 106 97 118 97 32 100 111 32 121 111 117 32 108 105 107 101 32 97 32 106 97 118 97 //對應Ascii碼
01101001 00100000 01101100 01101001 01101011 01100101 00100000 01101100 01101001 01101011 01100101 00100000 01101100 01101001 01101011 01100101 00100000 01101010 01100001 01110110 01100001 00100000 01100100 01101111 00100000 01111001 01101111 01110101 00100000 01101100 01101001 01101011 01100101 00100000 01100001 00100000 01101010 01100001 01110110 01100001 //對應的二進制
變長編碼
存在多義性
i like like like java do you like a java // 共40個字符(包括空格)
d:1 y:1 u:1 j:2 v:2 o:2 l:4 k:4 e:4 i:5 a:5 :9 // 各個字符對應的個數
0= , 1=a, 10=i, 11=e, 100=k, 101=l, 110=o, 111=v, 1000=j, 1001=u, 1010=y, 1011=d
說明:按照各個字符出現的次數進行編碼,原則是出現次數越多的,則編碼越小,比如 空格出現了9 次, 編碼為0 ,其它依次類推.
按照上面給各個字符規定的編碼,則我們在傳輸 "i like like like java do you like a java" 數據時,編碼就是 10010110100...
哈夫曼編碼(前綴編碼)
i like like like java do you like a java // 共40個字符(包括空格)
d:1 y:1 u:1 j:2 v:2 o:2 l:4 k:4 e:4 i:5 a:5 :9 // 各個字符對應的個數
按照上面字符出現的次數構建一顆赫夫曼樹, 次數作為權值
//根據赫夫曼樹,給各個字符
//規定編碼 , 向左的路徑為0
//向右的路徑為1 , 編碼如下:
o: 1000 u: 10010 d: 100110 y: 100111 i: 101
a : 110 k: 1110 e: 1111 j: 0000 v: 0001
l: 001 : 01
按照上面的赫夫曼編碼,我們的"i like like like java do you like a java" 字符串對應的編碼為 (注意這里我們使用的無損壓縮)
1010100110111101111010011011110111101001101111011110100001100001110011001111000011001111000100100100110111101111011100100001100001110
長度為 : 133
說明:
原來長度是 359 , 壓縮了 (359-133) / 359 = 62.9%
此編碼滿足前綴編碼, 即字符的編碼都不能是其他字符編碼的前綴。不會造成匹配的多義性
注意
這個哈夫曼樹根據排序方法不同,也可能不太一樣,這樣對應的赫夫曼編碼也不完全一樣,但是wpl 是一樣的,都是最小的
壓縮思路
- 首先將字符串轉化為字節數組
- 創建哈夫曼樹,將值和權重寫入
- 根據葉子結點的權重來計算哈夫曼編碼表
- 根據哈夫曼編碼表來計算哈夫曼編碼
- 最后再轉化為字節數組
代碼
package cn.guizimo.huffmancode;
import java.util.*;
/**
* @author guizimo
* @date 2020/8/8 11:55 上午
*/
public class HuffmanCode {
public static void main(String[] args) {
String content = "i like like like java do you like a java";
byte[] contentBytes = content.getBytes();
//哈夫曼編碼
byte[] zip = huffmanZip(contentBytes);
System.out.println("哈夫曼編碼:" + Arrays.toString(zip));
}
private static byte[] huffmanZip(byte[] bytes){
List<Node> nodes = getNodes(bytes);
//哈夫曼樹
Node huffmanTree = createHuffmanTree(nodes);
//哈夫曼編碼表
Map<Byte, String> huffmanCodes = getCodes(huffmanTree);
//哈夫曼編碼
byte[] zip = zip(bytes, huffmanCodes);
return zip;
}
//壓縮
private static byte[] zip(byte[] bytes, Map<Byte, String> huffmanCodes) {
StringBuilder stringBuilder = new StringBuilder();
for (byte b : bytes) {
stringBuilder.append(huffmanCodes.get(b));
}
int len;
if (stringBuilder.length() % 8 == 0) {
len = stringBuilder.length() / 8;
} else {
len = stringBuilder.length() / 8 + 1;
}
byte[] by = new byte[len];
int index = 0;
for (int i = 0; i < stringBuilder.length(); i += 8) {
String strByte;
if (i + 8 > stringBuilder.length()) {
strByte = stringBuilder.substring(i);
by[index] = (byte) Integer.parseInt(strByte, 2);
index++;
} else {
strByte = stringBuilder.substring(i, i + 8);
by[index] = (byte) Integer.parseInt(strByte, 2);
index++;
}
}
return by;
}
static Map<Byte, String> huffmanCodes = new HashMap<Byte, String>();
static StringBuilder stringBuilder = new StringBuilder();
//重載
private static Map<Byte, String> getCodes(Node root) {
if (root == null) {
return null;
}
getCodes(root.left, "0", stringBuilder);
getCodes(root.right, "1", stringBuilder);
return huffmanCodes;
}
//獲取哈夫曼編碼
private static void getCodes(Node node, String code, StringBuilder stringBuilder) {
StringBuilder builder = new StringBuilder(stringBuilder);
builder.append(code);
if (node != null) {
if (node.data == null) { //遞歸
getCodes(node.left, "0", builder);
getCodes(node.right, "1", builder);
} else {
huffmanCodes.put(node.data, builder.toString());
}
}
}
//前序遍歷
private static void preOrder(Node root) {
if (root != null) {
root.preOrder();
} else {
System.out.println("哈夫曼樹為空");
}
}
//生成哈夫曼樹
private static Node createHuffmanTree(List<Node> nodes) {
while (nodes.size() > 1) {
Collections.sort(nodes);
Node leftNode = nodes.get(0);
Node rightNode = nodes.get(1);
Node parent = new Node(null, leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
nodes.remove(leftNode);
nodes.remove(rightNode);
nodes.add(parent);
}
return nodes.get(0);
}
//接收字節數組
private static List<Node> getNodes(byte[] bytes) {
List<Node> nodes = new ArrayList<>();
Map<Byte, Integer> counts = new HashMap<>();
for (byte b : bytes) {
Integer count = counts.get(b);
if (count == null) {
counts.put(b, 1);
} else {
counts.put(b, count + 1);
}
}
//遍歷map
for (Map.Entry<Byte, Integer> entry : counts.entrySet()) {
nodes.add(new Node(entry.getKey(), entry.getValue()));
}
return nodes;
}
}
class Node implements Comparable<Node> {
Byte data;
int weight; //字符出現的次數
Node left;
Node right;
//前序遍歷
public void preOrder() {
System.out.println(this);
if (this.left != null) {
this.left.preOrder();
}
if (this.right != null) {
this.right.preOrder();
}
}
public Node(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public int compareTo(Node o) {
//從小到大排序
return this.weight - o.weight;
}
@Override
public String toString() {
return "Node{" +
"data=" + data +
", weight=" + weight +
'}';
}
}
解壓思路
- 將字節數組轉化為二進制
- 根據反轉的哈夫曼編碼表生成ASCLL集合
代碼
package cn.guizimo.huffmancode;
import java.util.*;
/**
* @author guizimo
* @date 2020/8/8 11:55 上午
*/
public class HuffmanCode {
public static void main(String[] args) {
String content = "i like like like java do you like a java";
byte[] contentBytes = content.getBytes();
//哈夫曼壓縮
byte[] zip = huffmanZip(contentBytes);
System.out.println("哈夫曼壓縮:" + Arrays.toString(zip));
//哈夫曼解壓
byte[] unzip = huffmanUnzip(huffmanCodes, zip);
System.out.println("哈夫曼解壓:" + new String(unzip));
}
//哈夫曼解壓
private static byte[] huffmanUnzip(Map<Byte, String> huffmanCodes, byte[] huffmanBytes) {
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < huffmanBytes.length; i++) {
byte b = huffmanBytes[i];
boolean flag = (i == huffmanBytes.length - 1);
stringBuilder.append(byteToBitString(!flag, b));
}
//解碼,反向編碼表
HashMap<String, Byte> map = new HashMap<>();
for (Map.Entry<Byte, String> entry : huffmanCodes.entrySet()) {
map.put(entry.getValue(), entry.getKey());
}
//根據編碼掃描到對應的ASCLL碼對應的字符
List<Byte> list = new ArrayList<>();
for (int i = 0; i < stringBuilder.length(); ) {
int count = 1;
boolean flag = true;
Byte b = null;
while (flag) {
String key = stringBuilder.substring(i, i + count);
b = map.get(key);
if (b == null) {
count++;
} else {
flag = false;
}
}
list.add(b);
i += count;
}
byte b[] = new byte[list.size()];
for (int i = 0; i < b.length; i++) {
b[i] = list.get(i);
}
return b;
}
//轉化二進制
private static String byteToBitString(boolean flag, byte b) {
int temp = b;
if (flag) {
temp |= 256;
}
String str = Integer.toBinaryString(temp);
if (flag) {
return str.substring(str.length() - 8);
} else {
return str;
}
}
//哈夫曼編碼壓縮
private static byte[] huffmanZip(byte[] bytes) {
List<Node> nodes = getNodes(bytes);
//哈夫曼樹
Node huffmanTree = createHuffmanTree(nodes);
//哈夫曼編碼表
Map<Byte, String> huffmanCodes = getCodes(huffmanTree);
//哈夫曼編碼
byte[] zip = zip(bytes, huffmanCodes);
return zip;
}
//壓縮
private static byte[] zip(byte[] bytes, Map<Byte, String> huffmanCodes) {
StringBuilder stringBuilder = new StringBuilder();
for (byte b : bytes) {
stringBuilder.append(huffmanCodes.get(b));
}
int len;
if (stringBuilder.length() % 8 == 0) {
len = stringBuilder.length() / 8;
} else {
len = stringBuilder.length() / 8 + 1;
}
byte[] by = new byte[len];
int index = 0;
for (int i = 0; i < stringBuilder.length(); i += 8) {
String strByte;
if (i + 8 > stringBuilder.length()) {
strByte = stringBuilder.substring(i);
by[index] = (byte) Integer.parseInt(strByte, 2);
index++;
} else {
strByte = stringBuilder.substring(i, i + 8);
by[index] = (byte) Integer.parseInt(strByte, 2);
index++;
}
}
return by;
}
static Map<Byte, String> huffmanCodes = new HashMap<Byte, String>();
static StringBuilder stringBuilder = new StringBuilder();
//重載
private static Map<Byte, String> getCodes(Node root) {
if (root == null) {
return null;
}
getCodes(root.left, "0", stringBuilder);
getCodes(root.right, "1", stringBuilder);
return huffmanCodes;
}
//獲取哈夫曼編碼
private static void getCodes(Node node, String code, StringBuilder stringBuilder) {
StringBuilder builder = new StringBuilder(stringBuilder);
builder.append(code);
if (node != null) {
if (node.data == null) { //遞歸
getCodes(node.left, "0", builder);
getCodes(node.right, "1", builder);
} else {
huffmanCodes.put(node.data, builder.toString());
}
}
}
//前序遍歷
private static void preOrder(Node root) {
if (root != null) {
root.preOrder();
} else {
System.out.println("哈夫曼樹為空");
}
}
//生成哈夫曼樹
private static Node createHuffmanTree(List<Node> nodes) {
while (nodes.size() > 1) {
Collections.sort(nodes);
Node leftNode = nodes.get(0);
Node rightNode = nodes.get(1);
Node parent = new Node(null, leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
nodes.remove(leftNode);
nodes.remove(rightNode);
nodes.add(parent);
}
return nodes.get(0);
}
//接收字節數組
private static List<Node> getNodes(byte[] bytes) {
List<Node> nodes = new ArrayList<>();
Map<Byte, Integer> counts = new HashMap<>();
for (byte b : bytes) {
Integer count = counts.get(b);
if (count == null) {
counts.put(b, 1);
} else {
counts.put(b, count + 1);
}
}
//遍歷map
for (Map.Entry<Byte, Integer> entry : counts.entrySet()) {
nodes.add(new Node(entry.getKey(), entry.getValue()));
}
return nodes;
}
}
class Node implements Comparable<Node> {
Byte data;
int weight; //字符出現的次數
Node left;
Node right;
//前序遍歷
public void preOrder() {
System.out.println(this);
if (this.left != null) {
this.left.preOrder();
}
if (this.right != null) {
this.right.preOrder();
}
}
public Node(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public int compareTo(Node o) {
//從小到大排序
return this.weight - o.weight;
}
@Override
public String toString() {
return "Node{" +
"data=" + data +
", weight=" + weight +
'}';
}
}
測試

感謝
尚硅谷
以及勤勞的自己
關注公眾號: 歸子莫,獲取更多的資料,還有更長的學習計划
