算法系列之使用赫夫曼编码的实战应用对文件进行压缩解压缩

Posted Roninaxious

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了算法系列之使用赫夫曼编码的实战应用对文件进行压缩解压缩相关的知识,希望对你有一定的参考价值。

1.首先在准备一张图片

2.测试压缩效果



3.测试解压缩效果

将桌面a.jpg删除



4.源代码

package com.zsh.algorithm.tree;

import java.io.*;
import java.util.*;

/**
 * @author:Ronin
 * @since:2021/9/29
 * @email:1817937322@qq.com
 */

public class HuffmanEncrypt {
    public static void main(String[] args) {
//        String str = "i love china china china oh";
//        byte[] bytes = huffmanZip(str.getBytes());
//        System.out.println(Arrays.toString(bytes));
//        String s = calHuffmanZipRatio(str.getBytes(), bytes);
//        System.out.println("压缩率为:" + s);
//        String s1 = byteToBinaryString(false, (byte) -1);
//        System.out.println("测试-1的补码为:"+ s1);
//        decode(huffmanCodesMap, bytes);
//        System.out.println("以下是文件压缩的测试----------------");
 //       fileEncode("C:\\\\Users\\\\18179\\\\Desktop\\\\a.jpg", "C:\\\\Users\\\\18179\\\\Desktop\\\\a.zip");
        fileDecode("C:\\\\Users\\\\18179\\\\Desktop\\\\a.zip", "C:\\\\Users\\\\18179\\\\Desktop\\\\a.jpg");
    }

    //用来存储转换二进制字符串集合
    private static StringBuilder stringBuilder = new StringBuilder();
    //哈夫曼编码表
    private static Map<Byte, String> huffmanCodesMap = new HashMap<>();

//---------------------------------------以下是压缩相关函数------------------------------------------//
    /**
     * 赫夫曼编码封装方法
     *
     * @param bytes
     * @return
     */
    public static byte[] huffmanZip(byte[] bytes) {
        //1.计算字符串中每个字符出现的次数,并字符、次数封装到EncryptNode结点中,将其加入到list集合中
        List<EncryptNode> encryptNodeList = calByteNums(bytes);
        //outputList(encryptNodeList);
        //2.构建哈夫曼树
        EncryptNode root = buildHuffmanTree(encryptNodeList);
        // preErgodic(root);
        //3.生成赫夫曼编码表
        Map<Byte, String> huffmanCodes = getHuffmanCodes(root);
        //System.out.println(huffmanCodes);
        //4.转换为赫夫曼编码字节表
        return zip(bytes, huffmanCodes);
    }

    /**
     * 计算字符串中的每个字符的长度
     *
     * @param bytes 待计算的字符串
     * @return 返回list集合
     */
    public static List<EncryptNode> calByteNums(byte[] bytes) {
        Map<Byte, Integer> map = new HashMap<>();
        byte[] charArray = bytes;
        for (byte c : charArray) {
            if (map.containsKey(c)) {
                int num = map.get(c);
                map.replace(c, num, num + 1);
            } else {
                map.put(c, 1);
            }
        }
        List<EncryptNode> list = new ArrayList<>();
        for (Map.Entry<Byte, Integer> entry : map.entrySet()) {
            Byte key = entry.getKey();
            Integer value = entry.getValue();
            list.add(new EncryptNode(key, value));
        }
        return list;
    }

    /**
     * 构建赫夫曼树
     *
     * @param list 权重集合
     * @return 返回root节点
     */
    public static EncryptNode buildHuffmanTree(List<EncryptNode> list) {
        while (list.size() > 1) {
            Collections.sort(list);
            EncryptNode leftNode = list.remove(0);
            EncryptNode rightNode = list.remove(0);
            EncryptNode parentNode = new EncryptNode(null, leftNode.value + rightNode.value);
            parentNode.left = leftNode;
            parentNode.right = rightNode;
            list.add(parentNode);
        }
        return list.remove(0);
    }

    /**
     * 重载getHuffmanCodes(EncryptNode node, String path,  StringBuilder stringBuilder)
     *
     * @param root 根节点
     * @return 哈夫曼编码map集合
     */
    public static Map<Byte, String> getHuffmanCodes(EncryptNode root) {
        if (root == null) {
            return null;
        } else {
            getHuffmanCodes(root.left, "0", stringBuilder);
            getHuffmanCodes(root.right, "1", stringBuilder);
        }
        return huffmanCodesMap;
    }

    /**
     * 赫夫曼编码
     *
     * @param node          结点
     * @param path          1代表右结点,0代表左结点路径
     * @param stringBuilder 用于path拼接
     */
    public static void getHuffmanCodes(EncryptNode node, String path, StringBuilder stringBuilder) {
        StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
        stringBuilder2.append(path);
        if (node != null) {
            if (node.c == null) {
                getHuffmanCodes(node.left, "0", stringBuilder2);
                getHuffmanCodes(node.right, "1", stringBuilder2);
            } else {
                huffmanCodesMap.put(node.c, stringBuilder2.toString());
            }
        }
    }

    /**
     * @param bytes 待编码字节数组
     * @param entry 赫夫曼编码集合
     * @return 返回编码后的byte数组
     */
    public static byte[] zip(byte[] bytes, Map<Byte, String> entry) {
        StringBuilder stringBuilder = new StringBuilder();
        for (byte ch : bytes) {
            stringBuilder.append(entry.get(ch));
        }
        //System.out.println("stringBuilder=" + stringBuilder.toString());
        int length = stringBuilder.length();
        int len = (length + 7) / 8;
        byte[] huffmanCodeBytes = new byte[len];
        int index = 0;
        for (int k = 0; k < length; k += 8) {
            String str;

            if ((k + 8) > length) {
                str = stringBuilder.substring(k);
            } else {
                str = stringBuilder.substring(k, k + 8);
            }
            huffmanCodeBytes[index] = (byte) Integer.parseInt(str, 2);
            index++;
        }
        return huffmanCodeBytes;
    }

    /**
     * 计算赫夫曼压缩率
     *
     * @param original 未压缩前的字节数组
     * @param newByte  压缩后的字节数组
     * @return 压缩率
     */
    public static String calHuffmanZipRatio(byte[] original, byte[] newByte) {
        double zipRatio = (double) (original.length - newByte.length) / original.length * 100;
        String s = String.format("%.2f", zipRatio);
        return s + "%";
    }

    //---------------------------------以上是压缩相关函数--------------------------------------------------------//
    //---------------------------------以下是解压缩相关函数-----------------------------------------------------//
    private static byte[] decode(Map<Byte, String> huffmanCodesMap, byte[] bytes) {
        StringBuilder stringBuilder = new StringBuilder();
        for (int k = 0; k < bytes.length; k++) {
            boolean flag = (k == bytes.length - 1);
            stringBuilder.append(byteToBinaryString(!flag, bytes[k]));
        }
        Map<String, Byte> reverseMap = new HashMap<>();
        for (Map.Entry<Byte, String> map : huffmanCodesMap.entrySet()) {
            reverseMap.put(map.getValue(), map.getKey());
        }
        List<Byte> list = new ArrayList<>();
        for (int i = 0; i < stringBuilder.length(); ) {
            int count = 1;
            Byte b = null;
            boolean flag = true;
            while (flag) {
                String key = stringBuilder.substring(i, i + count);
                b = reverseMap.get(key);
                if (b == null) {
                    count++;
                } else {
                    flag = false;
                }
            }
            list.add(b);
            i += count;
        }
        byte[] result = new byte[list.size()];
        for (int k = 0; k < result.length; k++) {
            result[k] = list.get(k);
        }
        return result;
    }

    /**
     * @param flag 如果为true, 表示需要进行高位补齐;如果是false,表示不需要进行高位补齐-->因为最后一位不一定刚好是满足八位
     * @param be   这是将转换为二进制字符串的字节
     * @return 返回二进制字符串
     */
    private static String byteToBinaryString(boolean flag, byte be) {
        int beInt = be;
        if (flag) {
            beInt |= 256;                                   //按位与  , 解决正数的高位补齐的问题
        }
        String s = Integer.toBinaryString(beInt);           //补码,正数需要高位补齐,负数需要截取
        if (flag) {
            return s.substring(s.length() - 8);
        } else {
            return s;
        }
    }

    //---------------------------------以上是解压缩相关函数-----------------------------------------------------//

    //---------------------------------以下是文件压缩解压相关函数-----------------------------------------------------//

    /**
     * 文件压缩方法  --- 赫夫曼编码压缩
     * @param readFile  压缩文件路径      文件 + 文件名
     * @param storeFile 压缩之后存储的路径  文件 + 文件名
     */
    private static void fileEncode(String readFile, String storeFile) {
        FileInputStream is = null;
        ObjectOutputStream oos = null;
        FileOutputStream os = null;
        try {
            is = new FileInputStream(readFile);
            byte[] bytes = new byte[is.available()];
            is.read(bytes);
            byte[] huffmanBytes = huffmanZip(bytes);
            os = new FileOutputStream(storeFile);
            oos = new ObjectOutputStream(os);
            oos.writeObject(huffmanBytes);
            oos.writeObject(huffmanCodesMap);
            os.write(huffmanBytes);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                oos.close();
                os.close();
                is.close();
            } catch (IOException e) {
                e.printStackTrace();
            }

        }
    }
    public static void fileDecode(String zipFile, String dstFile) {
        System.out.println("文件解压开始----------------------");
        FileInputStream is = null;
        ObjectInputStream ois = null;
        FileOutputStream os = null;
        try {
            is = new FileInputStream(zipFile);
            ois = new ObjectInputStream(is);
            byte[] huffmanBytes = (byte[])ois.readObject();
            Map<Byte, String> huffmanCodes = (Map<Byte, String>) ois.readObject();
            byte[] bytes = decode(huffmanCodes, huffmanBytes);
            os = new FileOutputStream(dstFile);
            os.write(bytes);

        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } finally {
            try {
                os.close();
                ois.close();
                is.close();
            } catch (IOException e) {
                e.printStackTrace();
            }

        }
    }
    //---------------------------------以上是文件压缩解压相关函数-----------------------------------------------------//

    //先序遍历赫夫曼树
    public static void preErgodic(EncryptNode cur) {
        if (cur == null) {
            return;
        }
        System.out.println(cur);
        preErgodic(cur.left);
        preErgodic(cur.right);
    }

    //打印list集合
    public static <T> void outputList(List<T> list) {
        for (T t : list) {
            System.out.println(t);
        }
    }
}

/**
 * 实现Comparable接口,便于使用Collections.sort排序
 */
class EncryptNode implements Comparable<EncryptNode> {
    public int value;   //出现的次数
    public Byte c;      //字符
    public EncryptNode left;
    public EncryptNode right;

    public EncryptNode(int value) {
        this.value = value;
    }

    public EncryptNode(以上是关于算法系列之使用赫夫曼编码的实战应用对文件进行压缩解压缩的主要内容,如果未能解决你的问题,请参考以下文章

项目实战——基于LZ77变形和哈夫曼编码的GZIP压缩

项目实战——基于LZ77变形和哈夫曼编码的GZIP压缩

Huffman编码实现压缩解压缩

哈夫曼编码(贪心算法)

算法赫夫曼编码 解码 实际应用 文件的编码 解码

霍夫曼树和霍夫曼编码以及霍夫曼编码的应用