#include "encoding.h" #include /* * Read from a input stream until EOF and return the text's frequency table, i.e. * how many times each character was found in the text. */ map buildFrequencyTable(istream& input) { map freq_table; while (true) { int c = input.get(); if (input.eof()) { break; } if (!freq_table.count(c)) { freq_table[c] = 0; } freq_table[c] += 1; } freq_table[PSEUDO_EOF] = 1; return freq_table; } /* * Build and return a Huffman tree from a frequency table. * * Remember to free the tree when you're done! */ HuffmanNode* buildEncodingTree(const map& freq_table) { std::priority_queue nodes; for (const auto& freq_element : freq_table) { nodes.push(HuffmanNode(freq_element.first, freq_element.second)); } while (nodes.size() > 1) { HuffmanNode* node_left = new HuffmanNode(nodes.top()); nodes.pop(); HuffmanNode* node_right = new HuffmanNode(nodes.top()); nodes.pop(); HuffmanNode merged_node = HuffmanNode(); merged_node.count = node_left->count + node_right->count; merged_node.zero = node_left; merged_node.one = node_right; nodes.push(merged_node); } HuffmanNode* root = new HuffmanNode(nodes.top()); return root; } /* * Merge 'from' into 'to', overwriting if a key already exists. */ template void merge_map(map& into, const map& from) { for (const auto& element : from) { into[element.first] = element.second; } } map encoding_map_helper(HuffmanNode *encoding_tree, const string& cur_string) { map encoding_map; if (encoding_tree) { if (encoding_tree->isLeaf()) { encoding_map[encoding_tree->character] = cur_string; } else { encoding_map = encoding_map_helper(encoding_tree->zero, cur_string + "0"); merge_map(encoding_map, encoding_map_helper(encoding_tree->one, cur_string + "1")); } } return encoding_map; } /* * Build a Huffman encoding map (char -> bit-string) from a Huffman tree. */ map buildEncodingMap(HuffmanNode* encoding_tree) { return encoding_map_helper(encoding_tree, ""); } /* * Write a bit-string as bits to an obitstream. */ void encode_char(int c, const map& encoding_map, obitstream& output) { for (const auto& bit : encoding_map.at(c)) { output.writeBit(bit == '0' ? 0 : 1); } } /* * Write 'input' Huffman-compressed to 'output' encoded with 'encoding_map'. */ void encodeData(istream& input, const map& encoding_map, obitstream& output) { while (true) { int c = input.get(); if (input.eof()) { break; } encode_char(c, encoding_map, output); } encode_char(PSEUDO_EOF, encoding_map, output); } /* * Read a single character from the Huffman-compressed 'input' encded with * 'encoding_map'. */ int read_char(ibitstream& input, HuffmanNode* encoding_tree) { if (encoding_tree->isLeaf()) { return encoding_tree->character; } else { int bit = input.readBit(); if (bit == 0) { return read_char(input, encoding_tree->zero); } else { return read_char(input, encoding_tree->one); } } } /* * Read the Huffman-compressed 'input' encoded with 'encoding_tree' and write * it to 'output'. */ void decodeData(ibitstream& input, HuffmanNode* encoding_tree, ostream& output) { while (true) { int next_char = read_char(input, encoding_tree); if (next_char == PSEUDO_EOF) { break; } char c = (char)next_char; output.write(&c, 1); } } /* * Write 'freq_table' (char -> frequency) as a Huffman-header to 'output'. */ void encodeHeader(const map& freq_table, ostream& output) { output.write("{", 1); int i = 0; for (const auto& encode : freq_table) { string char_num_str = to_string(encode.first); string char_freq_str = to_string(encode.second); output.write(char_num_str.c_str(), char_num_str.length()); output.write(":", 1); output.write(char_freq_str.c_str(), char_freq_str.length()); if (i != freq_table.size() - 1) { output.write(", ", 2); } i++; } output.write("}", 1); } /* * Huffman-compress 'input' and write it and its header to 'output'. */ void compress(istream& input, obitstream& output) { map freq_table = buildFrequencyTable(input); HuffmanNode* encoding_tree = buildEncodingTree(freq_table); map encoding_map = buildEncodingMap(encoding_tree); encodeHeader(freq_table, output); input.clear(); input.seekg(0, ios::beg); encodeData(input, encoding_map, output); freeTree(encoding_tree); } /* * Read an integer from 'input'. The char immediately following the integer * is also read and is stored in *next_char, if it's set. */ int read_int(istream& input, int* next_char = nullptr) { string str; while (true) { int c = input.get(); if (c < '0' || c > '9') { if (next_char) { *next_char = c; } return stoi(str); } else { str = str + (char)c; } } } /* * Read a Huffman-header from 'input'. When done, 'input' will point to * the first non-header byte. */ map decodeHeader(istream& input) { map freq_table; if (input.get() != '{') { cerr << "Broken huffman header, expected '{' as first byte" << endl; return freq_table; } while (true) { int next_input, next_char_val, next_char_freq; next_char_val = read_int(input); next_char_freq = read_int(input, &next_input); freq_table[next_char_val] = next_char_freq; if (next_input == '}') { break; } input.get(); // read space, ',' was read by read_int } return freq_table; } /* * Read the Huffman-compressed 'input' (header included) and write it to * 'output'. */ void decompress(ibitstream& input, ostream& output) { map freq_table = decodeHeader(input); HuffmanNode* encoding_tree = buildEncodingTree(freq_table); decodeData(input, encoding_tree, output); freeTree(encoding_tree); } /* * Completely free a Huffman tree. */ void freeTree(HuffmanNode* node) { if (node) { if (!node->isLeaf()) { freeTree(node->zero); freeTree(node->one); } delete node; } else { cerr << "Tried to free null tree" << endl; } }