数据结构14-哈夫曼树 哈夫曼树(Huffman Tree)是由计算机科学家David A. Huffman于1952年提出的一种用于数据压缩的二叉树,它能有效地减少存储数据所需的空间。哈夫曼树广泛应用于无损数据压缩算法,如ZIP压缩、PNG图像格式、MP3音频格式等。哈夫曼编码是一种基于字符频率的变长编码,频率高的字符用较短的编码,频率低的字符用较长的编码,从而达到数据压缩的效果。
哈夫曼树的基本原理 哈夫曼树的基本思想是:根据输入数据中字符的频率,构建一棵最优的二叉树,树中的每个叶子节点代表一个字符,每个字符的编码通过从根节点到叶子节点的路径来表示。路径中的左子树表示0
,右子树表示1
。在哈夫曼树中,频率较高的字符会有较短的编码,频率较低的字符会有较长的编码。
哈夫曼树的构建步骤 构建哈夫曼树的过程一般分为以下几个步骤:
1. 统计字符频率 首先需要统计输入数据中每个字符出现的频率。频率高的字符会优先获得较短的编码。
2. 创建哈夫曼树节点 将每个字符及其频率作为一个单独的节点,将所有节点加入到一个优先队列(最小堆)中。队列中的节点会根据字符的频率进行排序,频率小的节点排在前面。
3. 构建哈夫曼树 每次从队列中取出两个频率最小的节点,合并成一个新节点。新节点的频率为两个子节点的频率之和,而其左右子节点分别是这两个最小频率节点。然后将新节点重新插入队列。这个过程重复进行,直到队列中只剩下一个节点,这个节点即为哈夫曼树的根节点。
4. 生成哈夫曼编码 从根节点出发,沿着哈夫曼树生成每个字符的编码。通常,左子树的边为0
,右子树的边为1
,字符的编码即为从根节点到该字符的路径。
哈夫曼树的性质 最优性 :哈夫曼树是一种贪心算法,其通过频率的优先级合并节点来保证编码的最优性。对于给定的一组字符及其频率,哈夫曼编码总是能够得到最小的平均编码长度。树的深度 :哈夫曼树的深度是非负整数,且树的深度越深,字符出现的频率越低。频率越高的字符一般在树的深度较浅的位置,因此它们的编码较短。无前缀性 :哈夫曼编码具有前缀性质,即不会有任何编码是另一个编码的前缀。这使得哈夫曼编码可以无歧义地解码。哈夫曼树的应用 哈夫曼树广泛应用于数据压缩领域,以下是一些典型的应用场景:
文件压缩 :如ZIP、RAR、7z等文件压缩格式通过哈夫曼编码实现数据压缩。图像压缩 :如PNG、JPEG图像格式的压缩采用了类似哈夫曼编码的技术。音频压缩 :MP3等音频格式也使用哈夫曼编码来压缩音频数据。代码模板: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 #include <iostream> using namespace std;typedef struct hafumantree { int weight; int parent; int leftchild; int rightchild; }; void selectmintwo (hafumantree*& HT, int n, int & s1, int & s2) { for (int j = 1 ;j <= n;j++) { if (HT[j].parent == 0 ) { if (s1 == -1 || HT[s1].weight > HT[j].weight) { s2 = s1; s1 = j; } else if (s2 == -1 || HT[s2].weight > HT[j].weight) { s2 = j; } } } } void Createhafumantree (hafumantree*& HT, int n) { if (n <= 1 )return ; int m = 2 * n - 1 ; HT = new hafumantree[m + 1 ]; for (int i = 1 ;i <= m;i++) { HT[i].leftchild = 0 ; HT[i].parent = 0 ; HT[i].rightchild = 0 ; HT[i].weight = 0 ; } for (int i = 1 ;i <= n;i++) { cin >> HT[i].weight; } for (int i = n + 1 ;i < m + 1 ;i++) { int s1 = -1 , s2 = -1 ; selectmintwo (HT, i - 1 , s1, s2); HT[i].leftchild = s1; HT[i].rightchild = s2; HT[i].weight = HT[s1].weight + HT[s2].weight; HT[s1].parent = i; HT[s2].parent = i; } }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 #include <iostream> #include <string> #include <map> using namespace std;map<char , int >dataweight; map<char , string>datacode; map<string, char >codedatainformation; #define max_size 9999 string stargeinformation; typedef struct { int weight; int parent; int leftchild; int rightchild; }Huffmannode,*HuffmanTree; void getweight (string& app) { for (auto spp : app) { dataweight[spp]++; } } HuffmanTree CreateHuffmanTree (int length) { int totallength = length * 2 - 1 ; HuffmanTree tree = (HuffmanTree)malloc (sizeof (Huffmannode) * (totallength)); for (int i = 0 ;i < totallength;i++) { tree[i].weight = 0 ; tree[i].leftchild = -1 ; tree[i].rightchild = -1 ; tree[i].parent = -1 ; } int index = 0 ; for (auto it : dataweight) { tree[index++].weight = it.second; } for (int i = length;i < totallength;i++) { int minnumber1 = -1 ; int minnumber2 = -1 ; for (int j = 0 ;j < i;j++) { if (tree[j].parent == -1 ) { if (minnumber1 == -1 || tree[minnumber1].weight > tree[j].weight) { minnumber2 = minnumber1; minnumber1 = j; } else if (minnumber2 == -1 || tree[minnumber2].weight > tree[j].weight) { minnumber2 = j; } } } tree[i].weight = tree[minnumber1].weight + tree[minnumber2].weight; tree[i].leftchild = minnumber1; tree[i].rightchild = minnumber2; tree[minnumber1].parent = i; tree[minnumber2].parent = i; } return tree; } void codehafuman (HuffmanTree& tree) { int stroenumber = 0 ; for (auto app : dataweight) { char datachar = app.first; string codedata = "" ; int current = stroenumber; int parent = tree[current].parent; while (parent != -1 ) { if (tree[parent].leftchild == current) { codedata = "0" + codedata; }else { codedata = "1" + codedata; } current = parent; parent = tree[current].parent; } datacode[datachar] = codedata; stroenumber++; } } string encoding (string stargeinformation) { string codedata = "" ; for (auto app : stargeinformation) { codedata += datacode[app]; } return codedata; } string skillcodehafuman (string& codedata,HuffmanTree&tree) { for (auto app : datacode) { codedatainformation[app.second] = app.first; } string stringdata = "" ; string datanumber = "" ; int number = dataweight.size () * 2 - 2 ; for (auto it : codedata) { if (it == '0' ) { number = tree[number].leftchild; datanumber += it; } else { number = tree[number].rightchild; datanumber += it; } if (tree[number].leftchild == -1 && tree[number].rightchild == -1 ) { stringdata += codedatainformation[datanumber]; number = dataweight.size () * 2 - 2 ; datanumber = "" ; } } return stringdata; } int main () { cout << "请输入文章内容:" << endl; string line; while (true ) { getline (cin, line); if (line.empty ()) { break ; } stargeinformation += line + "\n" ; } getweight (stargeinformation); HuffmanTree tree = CreateHuffmanTree (dataweight.size ()); codehafuman (tree); string codedata = encoding (stargeinformation); cout << codedata << endl; cout << endl; string relute = skillcodehafuman (codedata, tree); cout << relute << endl; free (tree); return 0 ; }