哈夫曼编码器
用python简单写了一个哈夫曼编码/解码器,有以下功能:
1.编码,对字母进行编码,目前只支持英文字母
2.解码,对输入的序列进行解码
3.生成哈夫曼树图片
4.将编码结果写入文件中保存
要用到的第三方库有:
random:生成随机数
string:用于处理字符
tkinter:用于构建GUI
代码由三个类构成:哈夫曼树(HuffmanTree)类,节点(HuffmanNode)类,以及构建GUI的HufmanApp类。
HuffmanNode类
首先用__init__方法初始化节点
HuffmanTree类
__init__方法:初始化节点,并构造哈夫曼树(每次选择权重做大的两个节点进行合并)
select方法:用于选择节点
print_codes方法:打印每个字符的编码结果
decode方法:对输入的字符串进行解码
print_tree:绘制出建立的哈夫曼树
HuffmanApp类
构建一个Huffman解码器的界面,主要使用tkinter库来添加按钮和窗口
运行结果
简易的窗口
保存的哈夫曼树图片
编码结果保存在文件中
完整代码
安装好相应的第三方库后就能直接运行
from tkinter import simpledialog, scrolledtext
import tkinter as tk
from graphviz import Digraph
import string
import random
# 哈夫曼树节点类
class HuffmanNode:
# 构造函数
def __init__(self, weight=0, ch='', code='', lchild=-1, rchild=-1, parent=-1):
# weight表示节点的权重
self.weight = weight
# ch表示节点代表的字符
self.ch = ch
# 字符对应的哈夫曼编码
self.code = code
# lchild表示左孩子节点
self.lchild = lchild
# rchild表示右孩子节点
self.rchild = rchild
# 父节点
self.parent = parent
# 定义哈夫曼树类
class HuffmanTree:
def __init__(self, weights, chars):
# 先初始化节点
self.nodes = [HuffmanNode(weights[i], chars[i]) for i in range(len(weights))]
n = len(weights)
self.nodes += [HuffmanNode() for _ in range(n - 1)] # 2n - 1 nodes
for k in range(n, 2 * n - 1):
# 每次选择权重最小的两个节点
i1, i2 = self.select(k)
self.nodes[i1].parent = k
self.nodes[i2].parent = k
# 合并节点
self.nodes[k].weight = self.nodes[i1].weight + self.nodes[i2].weight
self.nodes[k].lchild = i1
self.nodes[k].rchild = i2
# 用于选择节点的函数
def select(self, n):
a, b = -1, -1
min_weight_a = float('inf')
for i in range(n):
if self.nodes[i].parent == -1 and self.nodes[i].weight < min_weight_a:
min_weight_a = self.nodes[i].weight
a = i
min_weight_b = float('inf')
for i in range(n):
if self.nodes[i].parent == -1 and i != a and self.nodes[i].weight < min_weight_b:
min_weight_b = self.nodes[i].weight
b = i
return a, b
# 显示编码结果
def print_codes(self):
for i in range((len(self.nodes) + 1) // 2):
code = ""
j = i
while self.nodes[j].parent != -1:
k = self.nodes[j].parent
if self.nodes[k].lchild == j:
code = "0" + code
else:
code = "1" + code
j = self.nodes[j].parent
self.nodes[i].code = code
print(f"字符 {self.nodes[i].ch} 的编码:{code}")
# 解码函数
def decode(self, encoded_string):
decoded_string = ""
temp_code = ""
for bit in encoded_string:
temp_code += bit
for node in self.nodes:
if node.code == temp_code:
decoded_string += node.ch
temp_code = ""
break
return decoded_string
# 绘制哈夫曼树
def print_tree(self):
dot = Digraph(comment='Huffman Tree')
# 使用字典来映射节点索引到graphviz中的节点ID,以便处理可能的重复索引
node_ids = {}
def add_node(index, label):
if index not in node_ids:
node_id = f"node_{index}"
dot.node(node_id, label)
node_ids[index] = node_id
return node_ids[index]
# 添加所有节点
for i, node in enumerate(self.nodes):
label = f"{node.ch}({node.weight})" if node.ch else f"({node.weight})"
node_id = add_node(i, label)
# 添加边
for i, node in enumerate(self.nodes):
if node.lchild != -1:
dot.edge(add_node(i, ""), add_node(node.lchild, ""))
if node.rchild != -1:
dot.edge(add_node(i, ""), add_node(node.rchild, ""))
# 随机生成一个十位的数字字符串作为保存图片的名字
letters = string.ascii_letters + string.digits # 包含所有字母和数字
random_string = ''.join(random.choice(letters) for i in range(10))
dot.render(random_string, format='png', cleanup=True)
print("哈夫曼树图片已经保存为" + random_string + ".png")
# 构造图形化界面
class HuffmanApp:
def __init__(self, root):
self.root = root
# 界面的标题
self.root.title("哈夫曼编码解码器")
self.frame_input = tk.Frame(root)
self.frame_input.pack(pady=20)
# 输入字符数量的输入框
self.label_n = tk.Label(self.frame_input, text="请输入英文字符个数(最多26个):")
self.label_n.pack(side=tk.LEFT, padx=(0, 20))
self.entry_n = tk.Entry(self.frame_input, width=20)
self.entry_n.pack(side=tk.LEFT, expand=True, fill=tk.X)
# 初始化按钮
self.button_init = tk.Button(self.frame_input, text="初始化", command=self.initialize)
self.button_init.pack(side=tk.RIGHT, padx=(20, 0))
# 创建一个文本框
self.text_codes = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=100, height=30)
self.text_codes.pack(pady=20)
# 创建一个框架
self.frame_decode = tk.Frame(root)
self.frame_decode.pack(pady=20)
# 解码按钮和输入框
self.label_decode = tk.Label(self.frame_decode, text="输入编码进行解码:")
self.label_decode.pack(side=tk.LEFT, padx=(0, 20))
self.entry_decode = tk.Entry(self.frame_decode, width=60)
self.entry_decode.pack(side=tk.LEFT, expand=True, fill=tk.X)
self.button_decode = tk.Button(self.frame_decode, text="解码", command=self.decode_text)
self.button_decode.pack(side=tk.RIGHT, padx=(20, 0))
# 打印按钮
self.button_print_tree = tk.Button(self.frame_input, text="打印哈夫曼树", command=self.print_and_notify_tree)
self.button_print_tree.pack(side=tk.RIGHT, padx=(40, 0))
self.huff_tree = None
def print_and_notify_tree(self):
if self.huff_tree:
self.huff_tree.print_tree()
tk.messagebox.showinfo("图片已保存")
else:
tk.messagebox.showerror("错误", "请先初始化哈夫曼树!")
def initialize(self):
# 初始化GUI
try:
n = int(self.entry_n.get())
chars = simpledialog.askstring("输入字符", "请输入英文字符(用逗号分隔):", parent=self.root)
if not chars:
return
chars = chars.split(',')
weights = list(map(float, simpledialog.askstring("输入频度", "请输入每个字符的频度(用逗号分隔):",
parent=self.root).split(',')))
if len(chars) != len(weights) or len(chars) != n:
raise ValueError("字符数、频度数和输入的数量必须一致!")
else:
with open("code.txt", "w") as outfile:
outfile.write("字符 频度\n")
for ch, weight in zip(chars, weights):
outfile.write(f"{ch} {weight}\n")
tk.messagebox.showinfo("数据已经写入")
self.huff_tree = HuffmanTree(weights, chars)
self.huff_tree.print_codes()
self.update_codes_text()
except ValueError as e:
self.text_codes.delete(1.0, tk.END)
self.text_codes.insert(tk.END, f"错误: {e}\n")
def update_codes_text(self):
self.text_codes.delete(1.0, tk.END)
for i in range(len(self.huff_tree.nodes) // 2 + 1):
self.text_codes.insert(tk.END, f"字符 {self.huff_tree.nodes[i].ch} 的编码:{self.huff_tree.nodes[i].code}\n")
# 解码函数
def decode_text(self):
if not self.huff_tree:
self.text_codes.insert(tk.END, "请先初始化哈夫曼树!\n")
return
encoded_string = self.entry_decode.get()
decoded_string = self.huff_tree.decode(encoded_string)
self.text_codes.insert(tk.END, f"解码后为:{decoded_string}\n")
# 打印哈夫曼树
def print_tree(self):
if self.huff_tree:
self.huff_tree.print_tree()
# 主函数
def main():
root = tk.Tk()
app = HuffmanApp(root)
root.mainloop()
# 调用主函数
if __name__ == "__main__":
main()
缺陷
- 界面还很粗糙没有优化
- 编码结果只能保存在名为code.txt的文件中,当前文件夹存在code.txt文件时写入格式可能会有些错误
- 只支持对英文字母编码
本作品采用 知识共享署名-相同方式共享 4.0 国际许可协议 进行许可。
很强
😂