基于二叉树和数组实现限制长度的最优Huffman编码

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了基于二叉树和数组实现限制长度的最优Huffman编码相关的知识,希望对你有一定的参考价值。

具体介绍详见上篇博客:基于二叉树和双向链表实现限制长度的最优Huffman编码
基于数组和基于链表的实现方式在效率上有明显区别:
编码256个符号,符号权重为1...256,限制长度为16,循环编码1w次,Release模式下。基于链表的耗时为8972ms,基于数组的耗时为1793ms,速度是链表实现方式的5倍.
详细代码例如以下:
//Reference:A fast algorithm for optimal length-limited Huffman codes.pdf,http://pan.baidu.com/s/1o6E19Bs
//author:by Pan Yumin.2014-06-18
//with the method of BinaryTree and linked-list
#include <stdio.h>
#include <memory.h>
#include <malloc.h>

#define  MaxSymbols 256	//the Maximum Number of Symbols
#define  MaxHuffLen	16	//the Limited Length

typedef unsigned char boolean;
#ifndef FALSE			//in case these macros already exist
#define FALSE	0		//values of boolean
#endif

#ifndef TRUE
#define TRUE	1
#endif

typedef struct __Node{
	int width;
	int weight;
	int index;
	int depth;

	struct __Node *left;	//left child
	struct __Node *right;	//right child
}Node;

typedef struct __HuffTable{
	unsigned int index;
	unsigned int len;
	unsigned int code;
}HuffTable;

//Test memory leak
/*int g_malloc = 0,g_free = 0;

void* my_malloc(int size){
	g_malloc++;
	return malloc(size);
}
void my_free(void *ptr){
	if(ptr){
		g_free++;
		free(ptr);
		ptr = NULL;
	}
}
#define malloc my_malloc
#define free my_free*/

//Get the smallest term in the diadic expansion of X
int GetSmallestTerm(int X)
{
	int N=0;
	while((X & 0x01) == 0){
		X >>= 1;
		N++;
	}
	return 1<<N;
}
void RemoveNodeMark(Node *tree,unsigned char *Flag,int Symbols)
{
	if(tree->left == NULL && tree->right == NULL){
		Flag[tree->depth*Symbols+tree->index] = 0;	//set the nodemark zero
	}
	if(tree->left){
		RemoveNodeMark(tree->left,Flag,Symbols);
	}
	if(tree->right){
		RemoveNodeMark(tree->right,Flag,Symbols);
	}
}

void PrintHuffCode(HuffTable Huffcode)
{
	int i;
	for(i=Huffcode.len-1;i>=0;i--){
		printf("%d",(Huffcode.code>>i) & 0x01);
	}
}
void GenerateHuffmanCode(HuffTable *HuffCode,unsigned char *Flag,int L,int Symbols,int *SortIndex)
{
	char Code[17];
	int Pre_L = 0;
	int i=0,j=0;
	unsigned int codes[MaxHuffLen+2]={0},rank[MaxHuffLen+1] = {0};	//rank: the number of symbols in every length
	//find the first code
	for(i=0;i<Symbols;i++){
		for(j=0;j<L;j++){
			HuffCode[i].len += Flag[j*Symbols+i];
		}
		if(HuffCode[i].len != 0)
			rank[HuffCode[i].len]++;
		HuffCode[i].index = SortIndex[i];
	}

	for(i=0;i<=L;i++){
		codes[i+1] = (codes[i]+rank[i])<<1;
		rank[i] = 0;
	}

	//code
	for(i=0;i<Symbols;i++){
		HuffCode[i].code = codes[HuffCode[i].len] + rank[HuffCode[i].len]++;
	}
}
float BitsPerSymbol(HuffTable *HuffCode,int *weight,int Symbols,int WeightSum)
{
	float bitspersymbol = 0.0;
	int i;
	for(i=0;i<Symbols;i++){
		bitspersymbol += (float)HuffCode[i].len*weight[i];
	}
	return bitspersymbol/WeightSum;
}

//ascending order
void FreqSort(int *Freq,int *SortIndex,int Symbols)
{
	int i,j,tmp;
	for(i=0;i<Symbols;i++){
		for(j=i+1;j<Symbols;j++){
			if(Freq[i]>Freq[j]){
				tmp = Freq[i];
				Freq[i] = Freq[j];
				Freq[j] = tmp;

				tmp = SortIndex[i];
				SortIndex[i] = SortIndex[j];
				SortIndex[j] = tmp;
			}
		}
	}
}

//ascending order, quick sort
void QuickSort(int *arr, int *SortIndex,int startPos,int endPos)
{
	int i,j,key,index; 
	key=arr[startPos];
	index = SortIndex[startPos];
	i = startPos;	j = endPos; 
	while(i < j){
		while(arr[j]>=key && i<j)
			--j;
		arr[i]=arr[j];	SortIndex[i] = SortIndex[j];
		while(arr[i]<=key && i<j)
			++i; 
		arr[j]=arr[i];	SortIndex[j] = SortIndex[i];
	} 
	arr[i]=key;		SortIndex[i] = index;
	if(i-1 > startPos) 
		QuickSort(arr,SortIndex,startPos,i-1);
	if(endPos > i+1) 
		QuickSort(arr,SortIndex,i+1,endPos); 
}
int GenLenLimitedOptHuffCode(int *Freq,int Symbols)
{
	int i,j;
	unsigned char *Flag = NULL;		//record the state of the node
	unsigned int rank[MaxHuffLen];	
	Node *tree = NULL, *base = NULL, *left = NULL, *right = NULL;
	Node *start = NULL, *end = NULL, *Last = NULL;	//start:the first(min weight) node of 2*r,end:the last(max weight) node of 2*r,Last:the last node of array.
	Node *node = NULL;
	HuffTable HuffCode[MaxSymbols];
	float bitspersymbols = 0.0;
	int WeightSum = 0;
	int SortIndex[MaxSymbols];
	int X = (Symbols-1)<<MaxHuffLen;	//avoid float calculation
	int minwidth,r,weight;
	int r_Num = 0;
	
	if(Symbols > (1<<MaxHuffLen)){
		printf("Symbols > (1<<MaxHuffLen)\n");
		return -1;
	}

	for(i=0;i<MaxSymbols;i++){
		SortIndex[i] = i;
	}
	//FreqSort(Freq,SortIndex,Symbols);		//sort
	QuickSort(Freq,SortIndex,0,Symbols-1);		//sort

	for(i=0;i<Symbols;i++){	
		WeightSum += Freq[i];
	}
	tree = (Node *)malloc(Symbols*MaxHuffLen*2*sizeof(Node));
	memset(tree,0,Symbols*MaxHuffLen*2*sizeof(Node));		//2: for the optimize
	Flag = (unsigned char*)malloc(MaxHuffLen*Symbols*sizeof(unsigned char));
	memset(Flag,0x01,MaxHuffLen*Symbols*sizeof(unsigned char));	//mark every node 1
	memset(HuffCode,0,sizeof(HuffCode));

	for(i=0;i<MaxHuffLen;i++){
		for(j=0;j<Symbols;j++){
			tree[i*Symbols+j].depth = i;
			tree[i*Symbols+j].index = j;
			tree[i*Symbols+j].width = 1<<i;	//avoid float calculation
			tree[i*Symbols+j].weight = Freq[j];
		}
	}

	//start code
	base = tree;	Last = tree+MaxHuffLen*Symbols-1;
	while(X>0){
		minwidth = GetSmallestTerm(X);
		r = base->width;
		if(r > minwidth){	//there is no optimal solution.
			return -2;
		}	
		else if(r == minwidth){
			X -= minwidth;
			base++;
		}else{	//merge the smallest width and insert it into the original array
			if(r < (1<<(MaxHuffLen-1))){
				start = base+1;	r_Num = 1;
				//find start and end
				while(start->width < 2*r && start <= Last){
					r_Num++;
					start++;
				}
				end = start;
				while(end->width == 2*r && end <= Last){
					end++;
				}
				end--;
				//move back the (>=2*r)width node
				node = Last;	r_Num = r_Num/2;
				while(node >= start){
					*(node+r_Num) = *node;
					node--;
				}
				//package and merge
				node = start;
				start = start + r_Num;
				end = end + r_Num;
				for(i=0;i<r_Num;i++){
					left = base;	base++;
					right = base;	base++;
					weight = left->weight + right->weight;
					while(start <= end && start->weight <= weight){
						*node = *start;
						start++;
						node++;
					}
					node->weight = weight;	node->width = 2*r;
					node->left = left;		node->right = right;

					node++;
				}
				if(base->width == r){	//if r_Num is odd,remove the last r(width) Node.
					RemoveNodeMark(base,Flag,Symbols);
					base++;
				}
				Last += r_Num;
			}else{		//r >= (1<<(MaxHuffLen-1))
				while(base->width == r){
					left = base;	weight = base->weight;
					if((*(base+1)).width == r){
						base++;
						right = base;	weight += base->weight;
						base++;
						Last++;
						Last->weight = weight;	Last->width = 2*r;
						Last->left = left;		Last->right = right;
					}else{
						RemoveNodeMark(base,Flag,Symbols);
						base++;
					}
				}
			}
		} 
	}
	//output the HuffCode
	GenerateHuffmanCode(HuffCode,Flag,MaxHuffLen,Symbols,SortIndex);

	//print HuffCode
	for(i=0;i<Symbols;i++){
		printf("%03d weight:%04d Code:",HuffCode[i].index,Freq[i]);
		PrintHuffCode(HuffCode[i]);
		printf("\tCodeLen:%02d",HuffCode[i].len);
		printf("\n");
	}
	bitspersymbols = BitsPerSymbol(HuffCode,Freq,Symbols,WeightSum);
	printf("average code length:%f bits/symbol.\n",bitspersymbols);

	free(tree);	tree = NULL;
	free(Flag);	Flag = NULL;

	return 0;
}
#include <time.h>
int main()
{
// 	int Freq[MaxSymbols] = {1,25,3,4,9,6,4,6,26,15,234,4578};	//weight is not zero.
 	int Freq[MaxSymbols] = {10,6,2,1,1};	//weight is not zero.
 	GenLenLimitedOptHuffCode(Freq,5);		//5,12
 	return 0;
}
输出结果例如以下所看到的:
技术分享

以上是关于基于二叉树和数组实现限制长度的最优Huffman编码的主要内容,如果未能解决你的问题,请参考以下文章

Huffman树和Huffman编码

哈夫曼(Huffman)树和哈夫曼编码

构建Huffman哈夫曼最优二叉树,binarytree,Python

构建Huffman哈夫曼最优二叉树,binarytree,Python

构建Huffman哈夫曼最优二叉树,binarytree,Python

Huffman树的构造及编码与译码的实现