1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
| import heapq import os
class HuffmanNode: def __init__(self, char, freq): self.char = char self.freq = freq self.left = None self.right = None
def __lt__(self, other): return self.freq < other.freq
def build_huffman_tree(frequencies): heap = [HuffmanNode(char, freq) for char, freq in frequencies.items()] heapq.heapify(heap)
while len(heap) > 1: left = heapq.heappop(heap) right = heapq.heappop(heap) merged = HuffmanNode(None, left.freq + right.freq) merged.left = left merged.right = right heapq.heappush(heap, merged)
return heap[0]
def build_huffman_codes(node, current_code, huffman_codes): if node is None: return
if node.char is not None: huffman_codes[node.char] = current_code return
build_huffman_codes(node.left, current_code + '0', huffman_codes) build_huffman_codes(node.right, current_code + '1', huffman_codes)
def compress(input_file, output_file): with open(input_file, 'rb') as f: data = f.read()
frequencies = {} for byte in data: if byte not in frequencies: frequencies[byte] = 0 frequencies[byte] += 1
root = build_huffman_tree(frequencies) huffman_codes = {} build_huffman_codes(root, '', huffman_codes)
compressed_data = '' for byte in data: compressed_data += huffman_codes[byte]
padding = 8 - len(compressed_data) % 8 compressed_data += '0' * padding
with open(output_file, 'wb') as f: f.write(bytes([len(frequencies)])) for byte, freq in frequencies.items(): f.write(bytes([byte, (freq >> 24) & 0xFF, (freq >> 16) & 0xFF, (freq >> 8) & 0xFF, freq & 0xFF]))
for i in range(0, len(compressed_data), 8): byte = compressed_data[i:i+8] f.write(bytes([int(byte, 2)]))
def build_huffman_tree_from_bytes(frequencies): heap = [HuffmanNode(byte, freq) for byte, freq in frequencies.items()] heapq.heapify(heap)
while len(heap) > 1: left = heapq.heappop(heap) right = heapq.heappop(heap) merged = HuffmanNode(None, left.freq + right.freq) merged.left = left merged.right = right heapq.heappush(heap, merged)
return heap[0]
def decompress(input_file, output_file): with open(input_file, 'rb') as f: frequencies_count = ord(f.read(1)) frequencies = {}
for _ in range(frequencies_count): byte = ord(f.read(1)) freq = (ord(f.read(1)) << 24) + (ord(f.read(1)) << 16) + (ord(f.read(1)) << 8) + ord(f.read(1)) frequencies[byte] = freq
root = build_huffman_tree_from_bytes(frequencies)
huffman_codes = {} build_huffman_codes(root, '', huffman_codes) reverse_huffman_codes = {v: k for k, v in huffman_codes.items()}
decompressed_data = bytearray() code = '' while True: byte = f.read(1) if len(byte) == 0: break
byte = ord(byte) for i in range(7, -1, -1): if byte & (1 << i): code += '1' else: code += '0'
if code in reverse_huffman_codes: decompressed_data.append(reverse_huffman_codes[code]) code = ''
with open(output_file, 'wb') as f: f.write(decompressed_data)
if __name__ == "__main__": compressed_file = 'compressed.bin' decompressed_file = 'decompressed.txt'
decompress(compressed_file, decompressed_file)
|