作者: 阙荣文(querw)
  这是一个基于LZARI算法的数据压缩的类.Haruhiko Okumura 于1989年7月4日用c语言写实现了这个算法.但是上面用到了一些全局或静态的变量,在MFC下用起来很不方便.我把它改写成了一个c++类,使它可以方便的压缩和解压缩,更重要的是,我新增加了两个接口,这个类可以压缩/解压缩一段内存缓冲区,而不仅仅是文件.
void Compress(const char *lpszInfile,const char *lpszOutfile);
void UnCompress(const char *lpszInfile,const char *lpszOutfile);
LZARI Lzari;
Lzari.Compress("show.bmp","show.liz");	//压缩文件 show.bmp 到 show.liz
// Lzari.UnCompress("show.liz","show.bmp"); // 解压缩文件 show.liz 到 show.bmp
void Compress(const BYTE *pInBuffer,int nInLength,const BYTE * &pOutBuffer ,int &nOutLength);
void UnCompress(const BYTE *pInBuffer,int nInLength,const BYTE * &pOutBuffer,int &nOutLength);
LZARI Lzari;
BYTE *pOutBuffer = NULL;
int nOutSize = 0;
char szInBuffer[] = "This is a class for compress and uncompress";
// 用pOutBuffer 做一些事情
void Release();
若想让一个LZARI类实例既进行压缩操作又进行解压缩操作,请在后一个操作调用之前调用一下Release(); 如下所示:
LZARI Lzari;
// 用pOutBuffer 做一些事情
Lzari.UnCompress(pInBuffer2,nInsize2,pOutBuffer2,nOutSize2); //解压缩pInBuffer2
// ...
// 用pOutBuffer 做一些事情
Lzari.UnCompress(pOutBuffer,nOutSize,pOutBuffer2,nOutSize2); //解压缩第一次压缩的结果
LZARI Lzari;
LZARI UnLzari;
// ...

UnLzari.UnCompress(pOutBuffer,nOutSize,pOutBuffer2,nOutSize2); //解压缩第一次压缩的结果
// ...
#include .当然,这个类并不依赖于MFC,可以使用在任何C++程序中.
注:与算法有关的问题请不要问我,我也不知道 :) 其他问题欢迎指教 querw@sina.com

 LZARI.C -- A Data Compression Program
 (tab = 4 spaces)
 4/7/1989 Haruhiko Okumura
 Use, distribute, and modify this program freely.
 Please send me your improved versions.
  NIFTY-Serve PAF01022
  CompuServe 74050,1022

/********** Bit I/O **********/
//#pragma warning(disable:4786)
//#include <VECTOR>


#define N   4096 /* size of ring buffer */
#define F     60 /* upper limit for match_length */
#define THRESHOLD 2   /* encode string into position and length
         if match_length is greater than this */
#define NIL   N /* index for root of binary search trees */
/********** Arithmetic Compression **********/

/*  If you are not familiar with arithmetic compression, you should read
  I. E. Witten, R. M. Neal, and J. G. Cleary,
   Communications of the ACM, Vol. 30, pp. 520-540 (1987),
 from which much have been borrowed.  */

#define M   15

/* Q1 (= 2 to the M) must be sufficiently large, but not so
 large as the unsigned long 4 * Q1 * (Q1 - 1) overflows.  */

#define Q1  (1UL << M)
#define Q2  (2 * Q1)
#define Q3  (3 * Q1)
#define Q4  (4 * Q1)
#define MAX_CUM (Q1 - 1)

#define N_CHAR  (256 - THRESHOLD + F)

class LZARI
 virtual ~LZARI();
 FILE  *infile, *outfile;
 unsigned long textsize;
 unsigned long codesize;
 unsigned long printcount;
 unsigned char  text_buf[N + F - 1]; /* ring buffer of size N,with extra F-1 bytes to facilitate string comparison */
 int match_position;
 int match_length;  /* of longest match.  These areset by the InsertNode() procedure. */
 int lson[N + 1];
 int rson[N + 257];
 int dad[N + 1];  /* left & right children &parents -- These constitute binary search trees. */

 /* character code = 0, 1, ..., N_CHAR - 1 */

 unsigned long low;
 unsigned long high;
 unsigned long value;
 int  shifts;  /* counts for magnifying low and high around Q2 */
 int  char_to_sym[N_CHAR];
 int sym_to_char[N_CHAR + 1];
 unsigned int sym_freq[N_CHAR + 1];  /* frequency for symbols */
 unsigned int sym_cum[N_CHAR + 1];   /* cumulative freq for symbols */
 unsigned int position_cum[N + 1];   /* cumulative freq for positions */

 // Compress in memory;
 bool m_bMem;

 std::vector<BYTE> m_OutBuffer;
 //BYTE *m_pOutBuffer;
 int m_nOutLength;
 //int m_nOutCur;

 const BYTE *m_pInBuffer;
 int m_nInLength;
 int m_nInCur;

 unsigned int  buffer_putbit, mask_putbit;
 unsigned int  buffer_getbit, mask_getbit;

 void Error(char *message);
 void PutBit(int bit);  /* Output one bit (bit = 0,1) */
 void FlushBitBuffer(void);  /* Send remaining bits */
 int GetBit(void);  /* Get one bit (0 or 1) */

/********** LZSS with multiple binary trees **********/

 void InitTree(void);  /* Initialize trees */
 void InsertNode(int r);
 void DeleteNode(int p);  /* Delete node p from tree */
 void StartModel(void); /* Initialize model */
 void UpdateModel(int sym);
 void Output(int bit);  /* Output 1 bit, followed by its complements */
 void EncodeChar(int ch);
 void EncodePosition(int position);
 void EncodeEnd(void);
 int BinarySearchSym(unsigned int x);
 int BinarySearchPos(unsigned int x);
 void StartDecode(void);
 int DecodeChar(void);
 int DecodePosition(void);

 void Encode(void);
 void Decode(void);

 void Compress(const char *lpszInfile,const char *lpszOutfile);
 void UnCompress(const char *lpszInfile,const char *lpszOutfile);

 void Compress(const BYTE *pInBuffer,int nInLength,const BYTE * &pOutBuffer ,int &nOutLength);
 void UnCompress(const BYTE *pInBuffer,int nInLength,const BYTE * &pOutBuffer,int &nOutLength);
 void Release();

 LZARI.C -- A Data Compression Program
 (tab = 4 spaces)
 4/7/1989 Haruhiko Okumura
 Use, distribute, and modify this program freely.
 Please send me your improved versions.
  NIFTY-Serve PAF01022
  CompuServe 74050,1022

 lzari.cpp -- A Data Compression Class
 created: 2004/10/04
 created: 4:10:2004   16:44
 file base: lzari
 file ext: cpp
 author: 阙荣文 (querw@sina.com)
 purpose: 如上所述,lzari.c提供了lzari压缩算法的实现,基于lzari.c我把它

#include "StdAfx.h"
//#include <stdio.h>
//#include <stdlib.h>
//#include <string.h>
//#include <ctype.h>
#include "Lzari.h"

 infile = NULL;
 outfile = NULL;

 textsize = 0;
 codesize = 0;
 printcount = 0;

 low = 0;
 high = Q4;
 value = 0;
 shifts = 0;/* counts for magnifying low and high around Q2 */
 m_bMem = FALSE;

 m_pInBuffer = NULL;
 m_nInLength = 0;
 m_nInCur = 0;

 //m_pOutBuffer = NULL;
 m_nOutLength = 0;
// m_nOutCur = 0;

 buffer_putbit = 0;
 mask_putbit = 128;

 buffer_getbit = 0;
 mask_getbit = 0;



void LZARI::Error(char *message)
 printf("/n%s/n", message);
 int e = 1;
 throw e;

void LZARI::PutBit(int bit)  /* Output one bit (bit = 0,1) */
 if (bit) buffer_putbit |= mask_putbit;
 if ((mask_putbit >>= 1) == 0)
  if (!m_bMem)
   if (putc(buffer_putbit, outfile) == EOF) Error("Write Error");
   //if (m_nOutCur == m_nOutLength) Error("Write Error");
   //m_pOutBuffer[m_nOutCur++] = buffer;
  buffer_putbit = 0; 
  mask_putbit = 128; 

void LZARI::FlushBitBuffer(void)  /* Send remaining bits */
 int  i;
 for (i = 0; i < 7; i++) PutBit(0);

int LZARI::GetBit(void)  /* Get one bit (0 or 1) */

 if ((mask_getbit >>= 1) == 0)
  if (!m_bMem)
   buffer_getbit = getc(infile);
   buffer_getbit = m_pInBuffer[m_nInCur++];
  mask_getbit = 128;
 return ((buffer_getbit & mask_getbit) != 0);

/********** LZSS with multiple binary trees **********/

void LZARI::InitTree(void)  /* Initialize trees */
 int  i;

 /* For i = 0 to N - 1, rson[i] and lson[i] will be the right and
    left children of node i.  These nodes need not be initialized.
    Also, dad[i] is the parent of node i.  These are initialized to
    NIL (= N), which stands for 'not used.'
    For i = 0 to 255, rson[N + i + 1] is the root of the tree
    for strings that begin with character i.  These are initialized
    to NIL.  Note there are 256 trees. */

 for (i = N + 1; i <= N + 256; i++) rson[i] = NIL; /* root */
 for (i = 0; i < N; i++) dad[i] = NIL; /* node */

void LZARI::InsertNode(int r)
 /* Inserts string of length F, text_buf[r..r+F-1], into one of the
    trees (text_buf[r]'th tree) and returns the longest-match position
    and length via the global variables match_position and match_length.
    If match_length = F, then removes the old node in favor of the new
    one, because the old one will be deleted sooner.
    Note r plays double role, as tree node and position in buffer. */
 int  i, p, cmp, temp;
 unsigned char  *key;

 cmp = 1;  key = &text_buf[r];  p = N + 1 + key[0];
 rson[r] = lson[r] = NIL;  match_length = 0;
 for ( ; ; )
  if (cmp >= 0)
   if (rson[p] != NIL) p = rson[p];
   else {  rson[p] = r;  dad[r] = p;  return;  }
  } else
   if (lson[p] != NIL) p = lson[p];
   else {  lson[p] = r;  dad[r] = p;  return;  }
  for (i = 1; i < F; i++)
   if ((cmp = key[i] - text_buf[p + i]) != 0)  break;
  if (i > THRESHOLD)
   if (i > match_length)
    match_position = (r - p) & (N - 1);
    if ((match_length = i) >= F) break;
   } else if (i == match_length)
    if ((temp = (r - p) & (N - 1)) < match_position)
     match_position = temp;
 dad[r] = dad[p];  lson[r] = lson[p];  rson[r] = rson[p];
 dad[lson[p]] = r;  dad[rson[p]] = r;
 if (rson[dad[p]] == p) rson[dad[p]] = r;
 else                   lson[dad[p]] = r;
 dad[p] = NIL;  /* remove p */

void LZARI::DeleteNode(int p)  /* Delete node p from tree */
 int  q;
 if (dad[p] == NIL) return;  /* not in tree */
 if (rson[p] == NIL) q = lson[p];
 else if (lson[p] == NIL) q = rson[p];
  q = lson[p];
  if (rson[q] != NIL)
   do {  q = rson[q];  } while (rson[q] != NIL);
   rson[dad[q]] = lson[q];  dad[lson[q]] = dad[q];
   lson[q] = lson[p];  dad[lson[p]] = q;
  rson[q] = rson[p];  dad[rson[p]] = q;
 dad[q] = dad[p];
 if (rson[dad[p]] == p) rson[dad[p]] = q;
 else                   lson[dad[p]] = q;
 dad[p] = NIL;

/********** Arithmetic Compression **********/

/*  If you are not familiar with arithmetic compression, you should read
  I. E. Witten, R. M. Neal, and J. G. Cleary,
   Communications of the ACM, Vol. 30, pp. 520-540 (1987),
 from which much have been borrowed.  */

 /* character code = 0, 1, ..., N_CHAR - 1 */

void LZARI::StartModel(void)  /* Initialize model */
 int ch, sym, i;
 sym_cum[N_CHAR] = 0;
 for (sym = N_CHAR; sym >= 1; sym--)
  ch = sym - 1;
  char_to_sym[ch] = sym;  sym_to_char[sym] = ch;
  sym_freq[sym] = 1;
  sym_cum[sym - 1] = sym_cum[sym] + sym_freq[sym];
 sym_freq[0] = 0;  /* sentinel (!= sym_freq[1]) */
 position_cum[N] = 0;
 for (i = N; i >= 1; i--)
  position_cum[i - 1] = position_cum[i] + 10000 / (i + 200);
   /* empirical distribution function (quite tentative) */
   /* Please devise a better mechanism! */

void LZARI::UpdateModel(int sym)
 int i, c, ch_i, ch_sym;
 if (sym_cum[0] >= MAX_CUM)
  c = 0;
  for (i = N_CHAR; i > 0; i--)
   sym_cum[i] = c;
   c += (sym_freq[i] = (sym_freq[i] + 1) >> 1);
  sym_cum[0] = c;
 for (i = sym; sym_freq[i] == sym_freq[i - 1]; i--) ;
 if (i < sym)
  ch_i = sym_to_char[i];    ch_sym = sym_to_char[sym];
  sym_to_char[i] = ch_sym;  sym_to_char[sym] = ch_i;
  char_to_sym[ch_i] = sym;  char_to_sym[ch_sym] = i;
 while (--i >= 0) sym_cum[i]++;

void LZARI::Output(int bit)  /* Output 1 bit, followed by its complements */
 for ( ; shifts > 0; shifts--) PutBit(! bit);

void LZARI::EncodeChar(int ch)
 int  sym;
 unsigned long int  range;

 sym = char_to_sym[ch];
 range = high - low;
 high = low + (range * sym_cum[sym - 1]) / sym_cum[0];
 low +=       (range * sym_cum[sym    ]) / sym_cum[0];
 for ( ; ; )
  if (high <= Q2) Output(0);
  else if (low >= Q2)
   Output(1);  low -= Q2;  high -= Q2;
  else if (low >= Q1 && high <= Q3)
   shifts++;  low -= Q1;  high -= Q1;
  else break;
  low += low;
  high += high;

void LZARI::EncodePosition(int position)
 unsigned long int  range;

 range = high - low;
 high = low + (range * position_cum[position    ]) / position_cum[0];
 low +=       (range * position_cum[position + 1]) / position_cum[0];
 for ( ; ; )
  if (high <= Q2) Output(0);
  else if (low >= Q2)
   Output(1);  low -= Q2;  high -= Q2;
  else if (low >= Q1 && high <= Q3)
   shifts++;  low -= Q1;  high -= Q1;
  else break;
  low += low;
  high += high;

void LZARI::EncodeEnd(void)
 if (low < Q1) Output(0);  else Output(1);
 FlushBitBuffer();  /* flush bits remaining in buffer */

int LZARI::BinarySearchSym(unsigned int x)
 /* 1      if x >= sym_cum[1],
    N_CHAR if sym_cum[N_CHAR] > x,
    i such that sym_cum[i - 1] > x >= sym_cum[i] otherwise */
 int i, j, k;
 i = 1;  j = N_CHAR;
 while (i < j)
  k = (i + j) / 2;
  if (sym_cum[k] > x) i = k + 1;  else j = k;
 return i;

int LZARI::BinarySearchPos(unsigned int x)
 /* 0 if x >= position_cum[1],
    N - 1 if position_cum[N] > x,
    i such that position_cum[i] > x >= position_cum[i + 1] otherwise */
 int i, j, k;
 i = 1;  j = N;
 while (i < j)
  k = (i + j) / 2;
  if (position_cum[k] > x) i = k + 1;  else j = k;
 return i - 1;

void LZARI::StartDecode(void)
 int i;

 for (i = 0; i < M + 2; i++)
  value = 2 * value + GetBit();

int LZARI::DecodeChar(void)
 int  sym, ch;
 unsigned long int  range;
 range = high - low;
 sym = BinarySearchSym((unsigned int)
  (((value - low + 1) * sym_cum[0] - 1) / range));
 high = low + (range * sym_cum[sym - 1]) / sym_cum[0];
 low +=       (range * sym_cum[sym    ]) / sym_cum[0];
 for ( ; ; ) {
  if (low >= Q2) {
   value -= Q2;  low -= Q2;  high -= Q2;
  } else if (low >= Q1 && high <= Q3) {
   value -= Q1;  low -= Q1;  high -= Q1;
  } else if (high > Q2) break;
  low += low;  high += high;
  value = 2 * value + GetBit();
 ch = sym_to_char[sym];
 return ch;

int LZARI::DecodePosition(void)
 int position;
 unsigned long int  range;
 range = high - low;
 position = BinarySearchPos((unsigned int)
  (((value - low + 1) * position_cum[0] - 1) / range));
 high = low + (range * position_cum[position    ]) / position_cum[0];
 low +=       (range * position_cum[position + 1]) / position_cum[0];
 for ( ; ; ) {
  if (low >= Q2) {
   value -= Q2;  low -= Q2;  high -= Q2;
  } else if (low >= Q1 && high <= Q3) {
   value -= Q1;  low -= Q1;  high -= Q1;
  } else if (high > Q2) break;
  low += low;  high += high;
  value = 2 * value + GetBit();
 return position;

/********** Encode and Decode **********/

void LZARI::Encode(void)
 int  i, c, len, r, s, last_match_length;
  fseek(infile, 0L, SEEK_END);
  textsize = ftell(infile);
  if (fwrite(&textsize, sizeof textsize, 1, outfile) < 1)
   Error("Write Error");  /* output size of text */
  codesize += sizeof textsize;
  if (textsize == 0) return;
  textsize = 0;
  textsize = m_nInLength;
  m_OutBuffer.resize(sizeof textsize);
  memcpy(&m_OutBuffer[0],&textsize,sizeof textsize);
  //m_nOutCur += sizeof textsize;
  codesize += sizeof textsize;
  if(textsize == 0) return;
  m_nInCur = 0;
  textsize = 0;
 s = 0;  r = N - F;
 for (i = s; i < r; i++) text_buf[i] = ' ';
  for (len = 0; len < F && (c = getc(infile)) != EOF; len++) text_buf[r + len] = c;
  for (len = 0; len < F && m_nInCur < m_nInLength ; len++)
   c = m_pInBuffer[m_nInCur++];
   text_buf[r + len] = c;
 textsize = len;
 for (i = 1; i <= F; i++) InsertNode(r - i);
 do {
  if (match_length > len) match_length = len;
  if (match_length <= THRESHOLD)
   match_length = 1;  EncodeChar(text_buf[r]);
   EncodeChar(255 - THRESHOLD + match_length);
   EncodePosition(match_position - 1);
  last_match_length = match_length;
   for (i = 0; i < last_match_length && (c = getc(infile)) != EOF; i++)
    DeleteNode(s);  text_buf[s] = c;
    if (s < F - 1) text_buf[s + N] = c;
    s = (s + 1) & (N - 1);
    r = (r + 1) & (N - 1);
   for (i = 0; i < last_match_length && m_nInCur < m_nInLength ; i++)
    c = m_pInBuffer[m_nInCur++];
    text_buf[s] = c;
    if (s < F - 1) text_buf[s + N] = c;
    s = (s + 1) & (N - 1);
    r = (r + 1) & (N - 1);
  if ((textsize += i) > printcount)
   printf("%12ld/r", textsize);
   printcount += 1024;
  while (i++ < last_match_length)
   s = (s + 1) & (N - 1);
   r = (r + 1) & (N - 1);
   if (--len) InsertNode(r);
 } while (len > 0);

 printf("In : %lu bytes/n", textsize);
 printf("Out: %lu bytes/n", codesize);
 printf("Out/In: %.3f/n", (double)codesize / textsize);

void LZARI::Decode(void)
 int  i, j, k, r, c;
 unsigned long int  count;

 if (!m_bMem)
  if (fread(&textsize, sizeof textsize, 1, infile) < 1)
   Error("Read Error");  /* read size of text */
  if(m_nInLength < sizeof textsize)
   Error("Read Error");
  memcpy(&textsize,m_pInBuffer + m_nInCur,sizeof textsize);
  m_nOutLength = textsize;
  //m_nOutCur = 0;
  m_nInCur += sizeof textsize;
 if (textsize == 0) return;
 for (i = 0; i < N - F; i++) text_buf[i] = ' ';
 r = N - F;
 for (count = 0; count < textsize; )
  c = DecodeChar();
  if (c < 256)
    putc(c, outfile);
    //m_OutBuffer[m_nOutCur++] = c;
   text_buf[r++] = c;
   r &= (N - 1);
   i = (r - DecodePosition() - 1) & (N - 1);
   j = c - 255 + THRESHOLD;
   for (k = 0; k < j; k++)
    c = text_buf[(i + k) & (N - 1)];
     putc(c, outfile);
    // m_pOutBuffer[m_nOutCur++] = c;
     //m_nOutCur ++;
    text_buf[r++] = c;
    r &= (N - 1);
  if (count > printcount)
   printf("%12lu/r", count);
   printcount += 1024;

 printf("%12lu/n", count);

void LZARI::Compress(const char *lpszInfile,const char *lpszOutfile)
 m_bMem = FALSE;
 infile = fopen(lpszInfile,"rb");
 outfile = fopen(lpszOutfile,"wb");
 if(infile && outfile)
  infile = NULL;
  outfile = NULL;

void LZARI::UnCompress(const char *lpszInfile,const char *lpszOutfile)
 m_bMem = FALSE;

 infile = fopen(lpszInfile,"rb");
 outfile = fopen(lpszOutfile,"wb");
 if(infile && outfile)
  infile = NULL;
  outfile = NULL;

void LZARI::Compress(const BYTE *pInBuffer,int nInLength,const BYTE *&pOutBuffer ,int &nOutLength)
 m_pInBuffer = pInBuffer;
 m_nInLength = nInLength;
 m_nInCur = 0;

// m_nOutCur = 0;

 m_bMem = TRUE;
 pOutBuffer = &m_OutBuffer[0];
 nOutLength = m_OutBuffer.size();

void LZARI::UnCompress(const BYTE *pInBuffer,int nInLength,const BYTE *&pOutBuffer ,int &nOutLength)
 m_pInBuffer = pInBuffer;
 m_nInLength = nInLength;
 m_nInCur = 0;

 m_bMem = TRUE;
 pOutBuffer = &m_OutBuffer[0];
 nOutLength = m_OutBuffer.size();

void LZARI::Release()
  infile = NULL;
  outfile = NULL;
  textsize = 0;
  codesize = 0;
  printcount = 0;
  low = 0;
  high = Q4;
  value = 0;
  shifts = 0;
  m_bMem = FALSE;
  m_pInBuffer = NULL;
  m_nInLength = 0;
  m_nInCur = 0;
  m_nOutLength = 0;

  buffer_putbit = 0;
  mask_putbit = 128;
  buffer_getbit = 0;
  mask_getbit = 0;


