-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHuffmanEncoding.h
143 lines (109 loc) · 3.23 KB
/
HuffmanEncoding.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
/*
Class that provide huffman encoding precise estimation of compression of data.
Without considering the additional overload added by meta-compression-info.
the encoding schema is based on the frequencies of the symbols of the alphabet.
This method is lossless compression.
The steps are:
-Count frequencies of symbols
-Build a tree based on frequencies that define the new encoding for each symbol
Then optionally:
-print stats on compression
-save the compressed file (not implemented but pretty trivial)
Bortoli Tomas in 2013
*/
#ifndef HUFFMAN_ENCODING_H
#define HUFFMAN_ENCODING_H
#define DEBUG
#include <iostream>
using namespace std;
#include "HuffmanTree.h"
#include "MinPriorityQueue.h"
#define BITS_IN_A_BYTE 8
class HuffmanEncoding{
private:
unsigned int frequencies[256];
HuffmanTree *t;
public:
HuffmanEncoding(){
t=NULL;
for(int i=0;i<256;i++)
frequencies[i]=0;
}
void countFrequencies(unsigned char* data, int len){
for(int i=0;i<len;i++){
frequencies[data[i]]++;
}
#ifdef DEBUG
printf("Frequencies:\n");
for(int i=0;i<256;i++){
if(frequencies[i]!=0){
printf("%c:%d\n",(unsigned char)i,frequencies[i] );
}
}
printf("\n\n");
#endif
}
void createEncoding(){
//Count how many symbols are present, and alloc the necessary space.
int len=0;
for(int i=0;i<256;i++){
if(frequencies[i]!=0)
len++;
}
Node** leafs=new Node*[len];
//Initialize the min priority queue
MinPriorityQueue *q=new MinPriorityQueue();
//Initialize the leaf nodes and inserts it in the queue
int j=0;
for(int i=0;i<256;i++){
if(frequencies[i]!=0){
leafs[j]=new Node(frequencies[i]);
leafs[j]->data=new unsigned char;
*(leafs[j]->data)=i;
q->addElem(leafs[j]);
//printf("%c\n",*(leafs[j]->data) );
j++;
}
}
//q->printQueue();
//Create the tree, extracting the 2 min priority nodes and sum their values to create a new node, put this node into the queue
//Repeat this until the queue will be empty
Node *val1,*val2;
while(!q->lastOne()){
//DEBUG
//q->printQueue(); cin.ignore();
val1=q->removeElem();
val2=q->removeElem();
Node* newNode= new Node(val1->v+val2->v);
val1->p=val2->p=newNode;
newNode->r=val1; newNode->l=val2;
q->addElem(newNode);
}
t=new HuffmanTree(q->removeElem());
#ifdef DEBUG
printf("Encoding:\n");
t->printEncoding();
#endif
delete q;
//Compute and print gain from the initial size, to the final size.
computeGain();
}
void computeGain(){
if(t==NULL) return;
long int initialSize=0;
long int finalSize=0;
for(int i=0;i<256;i++)
initialSize+=frequencies[i]*BITS_IN_A_BYTE;
for(int i=0;i<256;i++)
finalSize+=frequencies[i]*t->bits_needed[i];
#ifdef DEBUG
printf("\n\n");
printf("Outcome of huffman compression:\n");
printf("\tInitial weight of file: %d bytes\n",initialSize/BITS_IN_A_BYTE );
printf("\tWeight after Huffman Compression: %d bytes\n",finalSize/BITS_IN_A_BYTE );
printf("\tThe final file scale %f%% respect to the initial file; without calculating compression meta-info weight\n",((float)finalSize)/((float)initialSize)*100.0f );
printf("\n\n");
#endif
}
};
#endif