LZ78 压缩算法的 Java 实现
Posted 简简单单OnlineZuozuo
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了LZ78 压缩算法的 Java 实现相关的知识,希望对你有一定的参考价值。
LZ78 压缩算法的 Java 实现
1、压缩算法的实现
通过多路搜索树提高检索速度
package com.wretchant.lz78;
import java.util.*;
/**
多路英文单词查找树
*/
class Trie {
private TrieNode root;
public Trie() {
root = new TrieNode();
root.wordEnd = false;
}
public void insert(String word) {
TrieNode node = root;
for (int i = 0; i < word.length(); i++) {
Character c = word.charAt(i);
if (!node.childdren.containsKey(c)) {
node.childdren.put(c, new TrieNode());
}
node = node.childdren.get(c);
}
node.wordEnd = true;
}
public boolean search(String word) {
TrieNode node = root;
for (int i = 0; i < word.length(); i++) {
Character c = word.charAt(i);
if (!node.childdren.containsKey(c)) {
return false;
}
node = node.childdren.get(c);
}
return node.wordEnd;
}
}
class TrieNode {
Map<Character, TrieNode> childdren;
boolean wordEnd;
public TrieNode() {
childdren = new HashMap<Character, TrieNode>();
wordEnd = false;
}
}
/**
编码表
*/
class Output {
private Integer index;
private Character character;
Output(Integer index, Character character) {
this.index = index;
this.character = character;
}
public Integer getIndex() {
return index;
}
public Character getCharacter() {
return character;
}
}
class LZencode {
@FunctionalInterface
interface Encode {
List<Output> encode(String message);
}
/**
构建多路搜索树
*/
static Trie buildTree(Set<String> keys) {
Trie trie = new Trie();
keys.forEach(trie::insert);
return trie;
}
public static final Encode ENCODE = message -> {
// 构建压缩后的编码表
List<Output> outputs = new ArrayList<>();
Map<String, Integer> treeDict = new HashMap<>();
int mLen = message.length();
int i = 0;
while (i < mLen) {
Set<String> keySet = treeDict.keySet();
// 生成多路搜索树
Trie trie = buildTree(keySet);
char messageI = message.charAt(i);
String messageIStr = String.valueOf(messageI);
// 使用多路树进行搜索
if (!trie.search(messageIStr)) {
outputs.add(new Output(0, messageI));
treeDict.put(messageIStr, treeDict.size() + 1);
i++;
} else if (i == mLen - 1) {
outputs.add(new Output(treeDict.get(messageIStr), ' '));
i++;
} else {
for (int j = i + 1; j < mLen; j++) {
String substring = message.substring(i, j + 1);
String str = message.substring(i, j);
// 使用多路树进行搜索
if (!trie.search(substring)) {
outputs.add(new Output(treeDict.get(str), message.charAt(j)));
treeDict.put(substring, treeDict.size() + 1);
i = j + 1;
break;
}
if (j == mLen - 1) {
outputs.add(new Output(treeDict.get(substring), ' '));
i = j + 1;
}
}
}
}
return outputs;
};
}
2、解压缩算法的实现
package com.wretchant.lz78;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class LZdecode {
@FunctionalInterface
interface Decode {
/**
@param outputs 编码表
@return 解码后的字符串
*/
String decode(List<Output> outputs);
}
/**
根据编码表进行解码
*/
public static final Decode DECODE = (List<Output> outputs) -> {
StringBuilder unpacked = new StringBuilder();
Map<Integer, String> treeDict = new HashMap<>();
for (Output output : outputs) {
Integer index = output.getIndex();
Character character = output.getCharacter();
if (index == 0) {
unpacked.append(character);
treeDict.put(treeDict.size() + 1, character.toString());
continue;
}
String term = "" + treeDict.get(index) + character;
unpacked.append(term);
treeDict.put(treeDict.size() + 1, term);
}
return unpacked.toString();
};
}
3、测试和使用
package com.wretchant.lz78;
import java.io.InputStream;
import java.util.List;
import java.util.Scanner;
import java.util.function.ToIntFunction;
public class LZpack {
public static final ToIntFunction<List<Output>> DICT_PRINT = outputs -> {
outputs.forEach(output -> {
System.out.println("index :" + output.getIndex() + " char :" + output.getCharacter());
});
return 1;
};
public static void main(String[] args) {
Scanner scanner = new Scanner(System.in);
System.out.println("Please input text ");
String input = scanner.nextLine();
LZencode.Encode encode = LZencode.ENCODE;
List<Output> outputs = encode.encode(input);
DICT_PRINT.applyAsInt(outputs);
}
}
测试结果如下
4、Python 版本的实现代码
def compress(message):
tree_dict, m_len, i = {}, len(message), 0
while i < m_len:
# case I
if message[i] not in tree_dict.keys():
yield (0, message[i])
tree_dict[message[i]] = len(tree_dict) + 1
i += 1
# case III
elif i == m_len - 1:
yield (tree_dict.get(message[i]), '')
i += 1
else:
for j in range(i + 1, m_len):
# case II
if message[i:j + 1] not in tree_dict.keys():
yield (tree_dict.get(message[i:j]), message[j])
tree_dict[message[i:j + 1]] = len(tree_dict) + 1
i = j + 1
break
# case III
elif j == m_len - 1:
yield (tree_dict.get(message[i:j + 1]), '')
i = j + 1
def uncompress(packed):
unpacked, tree_dict = '', {}
for index, ch in packed:
if index == 0:
unpacked += ch
tree_dict[len(tree_dict) + 1] = ch
else:
term = tree_dict.get(index) + ch
unpacked += term
tree_dict[len(tree_dict) + 1] = term
return unpacked
if __name__ == '__main__':
messages = ['ABBCBCABABCAABCAAB', 'BABAABRRRA', 'AAAAAAAAA']
for m in messages:
pack = compress(m)
unpack = uncompress(pack)
print(unpack == m)
以上是关于LZ78 压缩算法的 Java 实现的主要内容,如果未能解决你的问题,请参考以下文章