霍夫曼编码(Huffman Code) 简单模拟
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了霍夫曼编码(Huffman Code) 简单模拟相关的知识,希望对你有一定的参考价值。
这周的数据结构实验作业,刚好被抽到号要上交,于是乎在宿舍写了一晚上。
若有错漏之处请斧正!
关于建树的参考博文:http://blog.csdn.net/xgf415/article/details/52628073
实验要求如下:
编译器:VS 2015
运行环境: WIN 10
代码如下:
hfmTree.h
1 #include <string> 2 #include "MinHeap.h" 3 #include <stack> 4 #include <sstream> 5 using namespace std; 6 #pragma once 7 8 9 const char EndTag = ‘#‘; //读写文件标识用,用来标记NULL孩子指针 10 const char NoneTag = ‘*‘; //建树识别用,非字符结点 11 class HuffmanTree; //前置声明 12 class Node 13 { 14 private: 15 char ch; 16 float wei; 17 string code; 18 Node *lc, *rc; 19 20 friend class HuffmanTree; 21 public : 22 Node() : ch(NoneTag),wei(0),code(),lc(NULL),rc(NULL){} 23 Node(char c, float w) { 24 ch = c; 25 wei= w; 26 lc = NULL; 27 rc = NULL; 28 } 29 30 float getWeight() { return wei; } 31 char getChar() { return ch; } 32 string getCode() { return code; } 33 Node* getRightChild() const { return rc; } 34 Node* getLeftChild() const { return lc; } 35 void setCodes(const string &s) { code = s; } 36 void set(const Node& l, const Node & r) { lc = new Node(l); rc = new Node(r); } 37 void clone(Node &n) { 38 ch = n.getChar(); 39 wei = n.getWeight(); 40 code = n.getCode(); 41 lc = n.getLeftChild(); 42 rc = n.getRightChild(); 43 } 44 45 bool hasChild() 46 { 47 return rc != NULL && lc != NULL; 48 } 49 50 Node &operator = (const Node &n) { 51 ch = n.ch; 52 wei = n.wei; 53 code = n.code; 54 lc = n.lc; 55 rc = n.rc; 56 return *this; 57 } 58 59 bool operator <=(Node &R) { 60 return wei <= R.wei; 61 } 62 63 bool operator >(Node &R) { 64 return wei > R.wei; 65 } 66 67 bool operator < (Node &R) { 68 return wei < R.wei; 69 } 70 71 friend ostream & operator << (ostream &out, const Node& n) { 72 out << "Char:" << n.ch << ‘\\t‘ << "Weight:" << n.wei << endl; 73 return out; 74 } 75 76 }; 77 78 class HuffmanTree 79 { 80 private: 81 int size; //结点数,包括空格结点 82 Node *root; 83 bool has_EndTag; 84 85 float averageLen; 86 public: 87 HuffmanTree() { 88 size = 0; 89 has_EndTag = false; 90 root = NULL; 91 } 92 93 //建立哈夫曼树 94 HuffmanTree(MinHeap<Node> &mh) 95 { 96 has_EndTag = false; 97 Node first, second; 98 while (mh.getSize() != 1) 99 { 100 mh.RemoveMin(first); 101 mh.RemoveMin(second); 102 Node h(NoneTag, first.getWeight() + second.getWeight()); 103 h.set(first, second); 104 mh.Insert(h); 105 } 106 mh.RemoveMin(first); 107 root = new Node(first); 108 } 109 110 void BuildTree(MinHeap<Node> &mh) 111 { 112 if (root != NULL) DeleteTree(root); 113 has_EndTag = false; 114 Node first, second; 115 while (mh.getSize() != 1) 116 { 117 mh.RemoveMin(first); 118 mh.RemoveMin(second); 119 Node h(NoneTag, first.getWeight() + second.getWeight()); 120 h.set(first, second); 121 mh.Insert(h); 122 } 123 mh.RemoveMin(first); 124 root = new Node(first); 125 } 126 127 void DeleteTree(Node *t) //删除以t为根的子树 128 { 129 if (t == NULL) 130 return; 131 else 132 { 133 Node *p1 = t->lc; 134 Node *p2 = t->rc; 135 if (p1 != NULL) DeleteTree(p1); 136 if (p2 != NULL) DeleteTree(p2); 137 delete t; 138 } 139 } 140 void Initialize() 141 { 142 string s = ""; 143 Initialization(root, s); 144 } 145 146 void Initialization(Node *n,string s) //初始化编码表 147 { 148 if (n == NULL) return; 149 n->code = s; 150 Initialization(n->lc, s + ‘0‘); 151 Initialization(n->rc, s + ‘1‘); 152 } 153 154 void addTag(Node *p) //添加结尾标志的递归函数(方便读写文件) 155 { 156 if (p->lc == NULL) 157 { 158 Node *t = new Node(EndTag, 0); 159 p->lc = t; 160 } 161 else addTag(p->lc); 162 if (p->rc == NULL) 163 { 164 Node *t = new Node(EndTag, 0); 165 p->rc = t; 166 } 167 else addTag(p->rc); 168 } 169 170 void addLeafTag() //给Huffman树添加叶子结点的空子女标志结点#,方便文件读写 171 { 172 addTag(root); 173 has_EndTag = true; 174 } 175 176 void delTag(Node *p) //删除叶子结点的空子女标志结点的递归函数 177 { 178 if (p == NULL) 179 return; 180 if (p->ch == EndTag) 181 delete p; 182 if (p->rc != NULL) 183 delTag(p->rc); 184 if (p->lc != NULL) 185 delTag(p->lc); 186 } 187 188 void delLeafTag() //删除叶子结点的空子女标志结点 189 { 190 delTag(root); 191 has_EndTag = false; 192 } 193 194 void SaveTable(ostream &os) //储存编码表到文件(非递归前序遍历) 195 { 196 197 if (!has_EndTag) 198 addLeafTag(); 199 stack<Node *> S; 200 Node *p = root; 201 S.push(p); 202 while (!S.empty()) 203 { 204 p = S.top(); S.pop(); 205 Node temp = *p; 206 os << temp.ch << ‘ ‘ << temp.wei <<‘ ‘ << temp.code << endl; 207 if (p->lc != NULL) S.push(p->lc); 208 if (p->rc != NULL) S.push(p->rc); 209 } 210 } 211 212 void ReadTable(istream &is) //读取编码表 213 { 214 if (root != NULL) DeleteTree(root); 215 ReadTable(is, root); 216 if (root == NULL) 217 cout << "error !读取文件遇到问题!" << endl; 218 } 219 220 void ReadTable(istream &is, Node *&dest) //读取编码表的递归函数 221 { 222 if (!is.eof()) 223 { 224 string line; 225 getline(is, line); 226 Node temp; 227 stringstream ss(line); 228 if (line[0] == ‘ ‘) 229 { 230 temp.ch = ‘ ‘; 231 ss >> temp.wei >> temp.code; 232 } 233 else 234 ss >> temp.ch >> temp.wei >> temp.code; 235 if (temp.ch == EndTag) 236 { 237 dest = NULL; 238 return; 239 } 240 dest = new Node(temp); 241 ReadTable(is, dest->lc); 242 ReadTable(is, dest->rc); 243 } 244 } 245 246 void ShowTree() 247 { 248 Show(root); 249 averageLen = averageLen / size; 250 cout << "平均编码长度:" << averageLen << endl; 251 } 252 253 void Show(Node *n) //前序遍历输出编码结果 254 { 255 if (n == NULL) return; 256 if (n->ch != NoneTag) 257 { 258 cout << n->ch << ":"; 259 cout <<n->code << endl; 260 averageLen += n->code.length(); 261 size++; 262 } 263 Show(n->lc); 264 Show(n->rc); 265 } 266 267 string getCode(char c) //给字符c编码,返回c的编码 268 { 269 Node *p = SearchChar(root,c); 270 if (p == NULL) 271 { 272 cout << "找不到" << c << "对应的编码" << endl; 273 exit(1); 274 } 275 else 276 return p->code; 277 } 278 279 Node *SearchChar(Node *p,char c) //查找字符为c的结点递归函数 280 { 281 if (p->ch == c) 282 return p; 283 else 284 { 285 Node *cur = NULL; 286 if (p->rc != NULL) 287 { 288 Node *rc = SearchChar(p->rc, c); 289 if (rc != NULL) cur = rc; 290 } 291 if (p->lc != NULL) 292 { 293 Node *lc = SearchChar(p->lc, c); 294 if (lc != NULL) cur = lc; 295 } 296 return cur; 297 } 298 } 299 300 char getChar(string c) //解码,返回编码c对应的字符 301 { 302 Node *p = SearchCode(root, c); 303 if (p == NULL) { cout << "解码失败,该字符不存在" << endl; return NoneTag; } 304 else return p->ch; 305 } 306 307 Node *SearchCode(Node *p, string c) //查找编码为c的对应字符结点 308 { 309 if (p->code == c) 310 return p; 311 else 312 { 313 Node *cur = NULL; 314 if (p->rc != NULL) 315 { 316 Node *rc = SearchCode(p->rc, c); 317 if (rc != NULL) cur = rc; 318 } 319 if (p->lc != NULL) 320 { 321 Node *lc = SearchCode(p->lc, c); 322 if (lc != NULL) cur = lc; 323 } 324 return cur; 325 } 326 } 327 };
建树要用到优先级队列,用最小堆模板类实现,代码如下:
MinHeap.h
1 #pragma once 2 #define DefaultSize 10 3 template<class E> 4 class MinHeap 5 { 6 private: 7 E *heap; 8 int currentSize; 9 int maxHeapSize; 10 void siftDown(int start, int m); 11 void siftUp(int start); 12 public: 13 MinHeap(int sz = DefaultSize); 14 MinHeap(E arr[], int n); 15 ~MinHeap() { delete[]heap; } 16 bool Insert(const E &x); 17 bool RemoveMin(E &x); 18 bool IsEmpty()const 19 {return (currentSize == 0) ? true : false;} 20 bool IsFull() const{return (currentSize == maxHeapSize) ? true : false;} 21 void MakeEmpty() { currentSize = 0; } 22 23 int getSize() const { return currentSize; } 24 void showAll() { 25 for (int i = 0; i < currentSize; ++i) 26 cout << heap[i]; 27 } 28 }; 29 30 template<class E> 31 MinHeap<E>::MinHeap(int sz) 32 { 33 maxHeapSize = (DefaultSize < sz) ? sz : DefaultSize; 34 heap = new E[maxHeapSize]; 35 if (heap == NULL) { 36 exit(1); 37 } 38 currentSize = 0; 39 } 40 41 template<class E> 42 MinHeap<E>::MinHeap(E arr[], int n) 43 { 44 maxHeapSize = (DefaultSize < n) ? n : DefaultSize; 45 heap = new E[maxHeapSize]; 46 if (heap == NULL) { 47 exit(1); 48 } 49 for (int i = 0; i < n; ++i) heap[i] = arr[i]; 50 currentSize = n; 51 int currentPos = (currentSize - 2) / 2; 52 while (currentPos >= 0) 53 { 54 siftDown(currentPos, currentSize - 1); 55 currentPos--; 56 } 57 } 58 59 template<class E> 60 void MinHeap<E>::siftDown(int start, int m) 61 { 62 if(m <= start) return; 63 int i = start, j = 2 * i + 1; 64 E temp = heap[i]; 65 while (j <= m) 66 { 67 if (j < m && heap[j] > heap[j + 1]) j++; 68 if (temp <= heap[j]) break; 69 else { heap[i] = heap[j]; i = j; j = 2 * j + 1; } 70 } 71 heap[i] = temp; 72 } 73 74 template<class E> 75 void MinHeap<E>::siftUp(int start) 76 { 77 int j = start, i = (j - 1) / 2; // i : parent, j : current 78 E temp = heap[j]; 79 while (j > 0) 80 { 81 if (heap[i] <= temp) break; // parent is smaller 82 else { 83 heap[j] = heap[i]; 84 j = i; 85 i = (i - 1) / 2; 86 } 87 } 88 heap[j] = temp; 89 } 90 91 template<class E> 92 bool MinHeap<E>::Insert(const E& x) 93 { 94 if (currentSize == maxHeapSize) 95 { 96 exit(1); 97 } 98 heap[currentSize] = x; 99 siftUp(currentSize); 100 currentSize++; 101 return true; 102 } 103 104 template<class E> 105 bool MinHeap<E>::RemoveMin(E &x) 106 { 107 if (currentSize == 0) { 108 std::cout << "Heap empty!" << std::endl; 109 std::cout << currentSize << endl; 110 return false; 111 } 112 else 113 { 114 x = heap[0]; 115 heap[0] = heap[currentSize - 1]; 116 currentSize--; 117 siftDown(0, currentSize - 1); 118 return true; 119 } 120 121 }
下面是main.cpp
1 #include <iostream> 2 #include <fstream> 3 #include "hfmTree.h" 4 #define MaxCh 50 5 using namespace std; 6 const char * TreeFile = "hfmTree.txt"; 7 const char * ToBeTran = "ToBeTran.txt"; 8 const char * CodeFile = "CodeFile.txt"; 9 const char * TextFile = "TextFile.txt"; 10 11 void showMenu() 12 { 13 cout << "\\t\\tI:初始化(Initialization)" << endl; 14 cout << "\\t\\tE:编码(Encoding)" << endl; 15 cout << "\\t\\tD:译码(Decoding)" << endl; 16 cout << "\\t\\tW: 重新写入正文" << endl; 17 cout << "\\t\\tQ:退出" << endl; 18 cout << endl; 19 } 20 21 void eatLine() 22 { 23 string line; 24 getline(cin,line); 25 } 26 27 HuffmanTree hfmTree; 28 bool has_init = false; 29 void Initialization() 30 { 31 MinHeap<Node> mh(MaxCh); 32 int n; 33 cout << "请输入字符集中字符个数:"; 34 cin >> n; 35 eatLine(); 36 while (n--) 37 { 38 char ch; 39 float wei; 40 cout << "输入编码字符:"; 41 ch = getchar(); 42 eatLine(); 43 cout << "输入权值:"; 44 cin >> wei; 45 eatLine(); 46 Node t(ch, wei); 47 mh.Insert(t); 48 cout << t; 49 } 50 hfmTree.BuildTree(mh); 51 hfmTree.Initialize(); 52 cout << "初始化编码表结果:" << endl; 53 hfmTree.ShowTree(); 54 ofstream fout(TreeFile); 55 hfmTree.SaveTable(fout); 56 cout << "写入文件成功!" << endl; 57 has_init = true; 58 fout.close(); 59 } 60 61 void ReadTreeFile() 62 { 63 ifstream fin(TreeFile, ios::app); 64 hfmTree.ReadTable(fin); 65 hfmTree.ShowTree(); 66 fin.close(); 67 has_init = true; 68 } 69 70 void WriteText() 71 { 72 ofstream fout(ToBeTran); 73 if (!fout.is_open()) 74 { 75 cout << "打开文件" << ToBeTran << "失败" << endl; 76 return; 77 } 78 string text; 79 eatLine(); 80 cout << "请输入需要编码的文本:" << endl; 81 getline(cin, text); 82 fout << text; 83 cout << "写入原始文本成功!" << endl; 84 fout.close(); 85 } 86 87 void Encoding() 88 { 89 if (has_init == false) 90 ReadTreeFile(); 91 92 ifstream fin(ToBeTran); 93 if (!fin.is_open()) 94 { 95 cout << "打开文件" << ToBeTran << "失败" << endl; 96 return; 97 } 98 99 string text; 100 getline(fin, text); 101 cout << "将要给下述文本编码:" << endl; 102 cout << text << endl; 103 104 int len = text.length(); 105 cout << "length:" << len << endl; 106 107 string code; 108 for (int i = 0; i < len; ++i) 109 { 110 string c = hfmTree.getCode(text[i]); 111 code += c; 112 } 113 cout << "编码结果为:" << endl; 114 cout << code << endl; 115 ofstream fout(CodeFile); 116 if (!fout.is_open()) 117 { 118 cout << "打开文件" << CodeFile << "失败" << endl; 119 return; 120 } 121 fout << code; 122 cout << "编码成功!" << endl; 123 fout.close(); 124 fin.close(); 125 } 126 127 void Decoding() 128 { 129 if (has_init == false) 130 ReadTreeFile(); 131 ifstream fin(CodeFile); 132 if (!fin.is_open()) 133 { 134 cout << "打开文件" << ToBeTran << "失败" << endl; 135 return; 136 } 137 string code; 138 fin >> code; 139 cout << "将要给下述编码进行解码:" << endl; 140 cout << code << endl; 141 string text; 142 int len = code.length(); 143 string codenow; 144 for (int i = 0; i < len; ++i) 145 { 146 codenow += code[i]; 147 char temp = hfmTree.getChar(codenow); 148 if (temp != NoneTag) { text += temp; codenow.clear(); } 149 } 150 cout << "解码结果为:" << endl; 151 cout << text <<endl; 152 ofstream fout(TextFile); 153 fout << text; 154 155 fin.close(); 156 fout.close(); 157 } 158 159 int main() 160 { 161 cout << "请输入字符进行下述操作:" << endl << endl; 162 while (1) 163 { 164 char tag; 165 showMenu(); 166 cin >> tag; 167 switch (tag) 168 { 169 case ‘I‘: 170 Initialization(); 171 break; 172 case ‘E‘: 173 Encoding(); 174 break; 175 case ‘D‘: 176 Decoding(); 177 break; 178 case ‘Q‘: 179 return 0; 180 case ‘W‘: 181 WriteText(); 182 break; 183 default: 184 cout << "请输入正确的字符!" << endl; 185 cin >> tag; 186 break; 187 } 188 } 189 190 return 0; 191 }
以上是关于霍夫曼编码(Huffman Code) 简单模拟的主要内容,如果未能解决你的问题,请参考以下文章
学习数据结构笔记(10) --- [赫夫曼树(Huffman Tree)与赫夫曼编码(Huffman coding)]
编码译码基于matlab GUI霍夫曼Huffman编码译码含Matlab源码 1976期