文件压缩与解压

Posted ZDF0414

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了文件压缩与解压相关的知识,希望对你有一定的参考价值。

文件压缩与解压思想:

 (1)统计字符出现次数,结合最小堆的性质生成哈夫曼树;

 (2)对叶节点进行编码,结点左边编0,右边编1

 (3)读取文件,用哈夫曼编码代替字符产生新的字符,即压缩;

 (4)读取压缩文件,进行哈夫曼编码的解读产生相应字符,即解压;

例如,对以下数据生成哈夫曼树,以及产生相应的哈夫曼编码:


//自己写的最小堆(因为在加强巩固,所以没用STL里的)
#pragma once
#include<iostream>
#include<vector>
using namespace std;
template<class T>
struct Less

	bool operator()(const T&left, const T&right)
	
		return left->_weight < right->_weight;
	
;

template<class T,class Compare=Less<T>>
class Heap

public:
	Heap()
	
	Heap(const T*v, int size, int valid)
	
		for (int i = 0; i < size; i++)
		
			if (v[i] != valid)
				_v.push_back(v[i]);
		
		int _size = _v.size();
		int begin = (_size - 2) / 2;
		for (int root = begin; root >= 0; root--)
		
			AdjustDown(root);
		
	
	void Insert(const T& value)
	
		_v.push_back(value);
		int leaf = _v.size() - 1;
		AdjustUp(leaf);
	
	void Remove()
	
		swap(_v[0], _v[_v.size() - 1]);
		_v.pop_back();
		AdjustDown(0);
	
	T& GetTop()
	
		if (!_v.empty())
			return _v[0];
	
	bool Empty()
	
		if (_v.empty())
			return true;
		return false;
	
	int Size()
	
		return _v.size();
	
protected:
	void AdjustDown(int root)
	
		size_t left = root * 2 + 1;
		size_t right = left + 1;
		int key = left;
		while (left < _v.size())
		
			if (right < _v.size() && Compare()(_v[right], _v[left]))
				key = right;
			if (Compare()(_v[key], _v[root]))
			
				swap(_v[key], _v[root]);
				root = key;
				left = root * 2 + 1;
				right = left + 1;
				key = left;
			
			else
				break;
		
	
	void AdjustUp(int leaf)
	
		int root = (leaf - 1) / 2;
		while (leaf > 0)
		
			if (Compare()(_v[leaf], _v[root]))
			
				swap(_v[leaf], _v[root]);
				leaf = root;
				root = (leaf - 1) / 2;
			
			else
				break;
		
	
private:
	vector<T> _v;
;

//哈夫曼树的建立
#pragma once
#include<iostream>
#include<string>
#include"Heap.h"
using namespace std;
template<class T>
struct HuffmanTreeNode

	HuffmanTreeNode<T>* _left;
	HuffmanTreeNode<T>* _right;
	T _weight;
	HuffmanTreeNode(const T& weight)
		:_left(NULL)
		, _right(NULL)
		, _weight(weight)
	
;
template<class T>
class HuffmanTree

	typedef HuffmanTreeNode<T> Node;
public:
	HuffmanTree(const T*a, int size, T invaild)
	
		_CreatHuffmanTree(a, size, invaild);
	
	Node* GetRoot()
	
		return _root;
	
protected:
	void _CreatHuffmanTree(const T*a,int size,T invaild)
	
		Heap<Node*,Less<Node*>>hp;
		for (int i = 0; i < size; i++)
		
			if (a[i] != invaild)
				hp.Insert(new Node(a[i]));//建立小堆
		
		//当_v中只剩下一个数据时,哈弗曼树建立完成
		while (hp.Size()>1)
		
			Node* left = hp.GetTop();
			hp.Remove();
			Node* right = hp.GetTop();
			hp.Remove();
			Node*parent = new Node(left->_weight + right->_weight);
			parent->_left = left;
			parent->_right = right;
			hp.Insert(parent);
		
		_root = hp.GetTop();
	
private:
	Node* _root;
;

//文件压缩与解压
#pragma once
#include<iostream>
#include<string>
using namespace std;
#include"HuffmanTree.h"
struct CharInfo

	unsigned char _ch;
	long _count;
	string _code;
	CharInfo(const long count = 0)
		:_count(count)
	
	CharInfo(const unsigned char ch)
		:_ch(ch)
	
	long operator+(const CharInfo& info)const
	
		return _count + info._count;
	
	bool operator!=(const CharInfo& info)const
	
		return _count != info._count;
	
	bool operator<(const CharInfo& info)const
	
		return _count < info._count;
	
;
class FileCompress

public:
	//对文件filename进行压缩
	void Compress(const char* filename)
	
		FILE* fread = fopen(filename, "rb");
		if (fread == NULL)
		
			cout << "打开文件失败..." << endl;
			return;
		
		//打开文件成功
		//1、字符与下标对应,填充结构体的_ch
		for (int i = 0; i < 256; i++)
		
			_Info[i]._ch = i;
		

		//2、统计各个字符出现的次数
		unsigned char ch = fgetc(fread);
		while (!feof(fread))
		
			_Info[ch]._count++;
			ch = fgetc(fread);
		

		//3、建立哈夫曼树
		HuffmanTree<CharInfo> hft(_Info, 256, CharInfo());

		//4、获取各个字符的哈夫曼编码
		//每次都从根节点开始,走到叶节点,即得到叶节点对应字符的编码
		HuffmanTreeNode<CharInfo>*root = hft.GetRoot();
		string code;
		GetHuffmanCodeOfChar(root, code);

	


		//5、从头开始读文件,把读取字符的编码重新组合写入压缩文件中
		fseek(fread, 0, SEEK_SET);
		//压缩产生的文件的名字
		string fcp = filename;
		fcp += ".compress";
		FILE* fwrite = fopen(fcp.c_str(), "wb");
		if (fwrite == NULL)
		
			cout << "compress 文件打开失败" << endl;
			return;
		

		//打开文件成功
		unsigned char data = 0;//写入压缩文件的字符
		int offset = 7;//偏移量
		ch = fgetc(fread);
		while (!feof(fread))
		
			const char* str = _Info[ch]._code.c_str();//获取当前字符的编码
			while (*str != '\\0')
			
				if (offset >= 0)
				
					data = data | ((*str - '0') << offset);
					offset--;
				
				if (offset < 0)
				
					fputc(data, fwrite);
					data = 0;
					offset = 7;
				
				str++;
			
			ch = fgetc(fread);
		
		fputc(data, fwrite);
		WriteConfig(filename);
		fclose(fread);
		fclose(fwrite);
	
	void UnCompress(const char* filename)
	
		string fcp = filename;
		fcp += ".compress";
		FILE* fread = fopen(fcp.c_str(), "rb");
		if (fread == NULL)
		
			cout << "打开文件失败..." << endl;
			return;
		
		//打开文件成功
		//3、建立哈夫曼树
		CharInfo info[256];
		ReadConfig(filename, info);
		HuffmanTree<CharInfo> hft(info, 256, CharInfo());

		string fucp = filename;
		fucp += ".uncompress";
		FILE* fwrite = fopen(fucp.c_str(), "wb");

		HuffmanTreeNode<CharInfo>*root = hft.GetRoot();
		HuffmanTreeNode<CharInfo>*cur = root;//每次都要从根开始读,读到叶节点即可获取一个原字符
		long DataTotal = (root->_weight)._count;//原文件中的字符总数
		unsigned char ch = fgetc(fread);
		while (DataTotal)
		
			int tmp = 1;
			int offset = 7;
			while (offset >= 0)
			
				if (ch&(1 << offset))//检验位是否为0
				
					cur = cur->_right;
					offset--;
				
				else
				
					cur = cur->_left;
					offset--;
				
				if (cur->_left == NULL&&cur->_right == NULL)
				
					unsigned char wch = cur->_weight._ch;
					fputc(wch, fwrite);
					cur = root;
					DataTotal--;
					//最后一个字符的编码在最后两个字节当中的情况
					if (!DataTotal)  
					
						break;
					
				
			
			ch = fgetc(fread);
		
		fclose(fread);
		fclose(fwrite);
	
protected:
	void WriteConfig(const char*filename)
	
		string Config = filename;
		Config += "config";
		FILE* fwrite = fopen(Config.c_str(), "wb");
		if (fwrite == NULL)
		
			cout << "打开文件失败" << endl;
			return;
		
		//打开成功
		for (int i = 0; i < 256; i++)
		
			if (_Info[i] ._count)
			
				fputc(_Info[i]._ch, fwrite);
				fputc(',', fwrite);
				char count[100];
				//参数说明:1.要转换的值;2.结果存放区;3.进制
				_itoa(_Info[i]._count, count, 10);
				fputs(count, fwrite);
				fputc(',', fwrite);
				fputs(_Info[i]._code.c_str(), fwrite);
				fputc('\\n', fwrite);
			
		
		fclose(fwrite);
	
	void ReadConfig(const char*filename,CharInfo *info)
	
		string Config = filename;
		Config += "config";
		FILE* fread = fopen(Config.c_str(), "rb");
		if (fread == NULL)
		
			cout << "打开文件失败" << endl;
			return;
		
		//打开成功
		char str[100];
		while (fgets(str, 100, fread))
		
			char*pstr = str;
			unsigned char ch = (unsigned char)*pstr;//得到字符
			info[ch]._ch = ch;
			if (ch == '\\n')
			
				fgets(str, 100, fread);
				pstr=str;
				pstr++;
			
			else
				pstr+=2;//跳过分隔符(逗号)
			//得到_count的字符串形式
			long count = 0;
			while (*pstr&&*pstr!=',')
			
				count *= 10;
				count += *pstr - '0';
				pstr++;
			
			info[ch]._count = count;//得到出现次数
			pstr++;
			string code(pstr);
			info[ch]._code = code;
		
	
	void GetHuffmanCodeOfChar(HuffmanTreeNode<CharInfo>*root, string& code)
	
		if (root == NULL)
			return;
		if (root->_left == NULL&&root->_right == NULL)
		
			unsigned char ch = root->_weight._ch;
			_Info[ch]._code = code;
			return;
		
		GetHuffmanCodeOfChar(root->_left, code + '0');//左边编码为0
		GetHuffmanCodeOfChar(root->_right, code + '1');//右边编码为1
	

private:
	CharInfo _Info[256];//结构体数组
;


以上是关于文件压缩与解压的主要内容,如果未能解决你的问题,请参考以下文章

(转)使用 linux tar 命令压缩与解压文件

linux系统常见压缩与解压

上传一个压缩文件并解压,同时保持原目录不变

Linux学习 - 压缩解压命令

linux十九压缩解压

tar与压缩详解