Contact Shane Zentz

Shane Zentz implementation of Huffmans Algorithm

/******************************************
* Student: Shane Zentz *
* Class: C455 *
* Insrtructor: Dr. Santean *
* Date: 3.29.2009 *
* File: huffman.cc *
* This file implements the huffman *
* algorithm for finding the minimum ABL *
* of a set of symbols. The set is *
* limited to all lowercase letters *
* (english alphabet) + the space charater *
* for this implementation. The classes *
* list, node, and huffmanTree are used. *
******************************************/

#include
#include
#include "list.h"
#include "node.h"
using namespace std;

int main(int argc, char *argv[] )
{
// If user did not enter a filename, exit.
if ( argc != 2 )
{
cout << "Try: "<< argv[0] <<" filename \n";
return 1;
}

// Otherwise try to open the file for reading.
else {
ifstream file ( argv[1] );

if ( !file.is_open() )
cout << "Could not open file\n";

else{

// Create the list and hard code the symbols into it.

list mainList;
mainList.insert(' ', 0.00, 0);
mainList.insert('z', 0.00, 0);
mainList.insert('y', 0.00, 0);
mainList.insert('x', 0.00, 0);
mainList.insert('w', 0.00, 0);
mainList.insert('v', 0.00, 0);
mainList.insert('u', 0.00, 0);
mainList.insert('t', 0.00, 0);
mainList.insert('s', 0.00, 0);
mainList.insert('r', 0.00, 0);
mainList.insert('q', 0.00, 0);
mainList.insert('p', 0.00, 0);
mainList.insert('o', 0.00, 0);
mainList.insert('n', 0.00, 0);
mainList.insert('m', 0.00, 0);
mainList.insert('l', 0.00, 0);
mainList.insert('k', 0.00, 0);
mainList.insert('j', 0.00, 0);
mainList.insert('i', 0.00, 0);
mainList.insert('h', 0.00, 0);
mainList.insert('g', 0.00, 0);
mainList.insert('f', 0.00, 0);
mainList.insert('e', 0.00, 0);
mainList.insert('d', 0.00, 0);
mainList.insert('c', 0.00, 0);
mainList.insert('b', 0.00, 0);
mainList.insert('a', 0.00, 0);

// print empty list..for testing.
//mainList.print();

char x;

while ( file.get ( x ) )
{
//cout << x;
mainList.count(x);
}

// Close the file.
file.close();

mainList.calculateFrequency();

mainList.buildTree();

mainList.print();

cout << endl;
cout << "Average Bit Length: " << mainList.ABL() << endl;
cout << "Compression factor: " << mainList.compression() <<
endl << endl;
}

}

}


/*****************************************
* Shane Zentz *
* 3.29.2009 *
* list.h *
* this file defines a class to create a *
* list of nodes and perform basic *
* operations on that list. *
* Methods are included to insert a node *
* onto the front of the list, count the *
* total number of symbols in the list, *
* calculate the frequency of a symbol *
* relative to the total number of symbols*
* and other methods needed to implement *
* the Huffman minimum ABL algorithm. *
*****************************************/
#include "node.h"
#include
using namespace std;

class list
{
protected:
node_ptr head, tail;
int size;

public:
list();

~list();

void insert(char letter, float freq, int count);

void print();

void count(char x);

int getTotal();

void calculateFrequency();

node_ptr smallest();

void buildTree();

bool isEmpty();

float compression();

float ABL();
};


/*********************************************
* Student: Shane Zentz *
* Class: C455 *
* Instructor: Dr. Santean *
* Date: 3.29.2009 *
* File: node.h *
* This file is the defenition of a class *
* which will be a node that holds data used *
* in the implementation of Huffman's minimum *
* ABL algorithm. *
* Each node will contain fields for a char *
* the count of the character, the frequancy *
* of the character, the huffman binary *
* code of the character as well a boolean *
* variable that is used/needed by the list *
* wrapper class. *
*********************************************/
#ifndef NODE_H
#define NODE_H

#include
using namespace std; // needed for string var.

class node
{
private:

int count; // Total occurences of symbol in text.
float frequency; // Frequency of symbol.
char letter; // The symbol itself.
string code; // The huffman min ABL binary code.
bool checked; // Bool used by the list class.
node *next; // Pointer to next node in list.

public:

// a constructor.
node(char lt);

// another constructor
node(char symbol, float freq, int sum, node *link);

// a destructor.
~node();

// Accessor for letter.
char getChar();

// Accessor for freuency.
float getFrequency();

// Accessor for code
string getCode();

// Accessor for count
int getCount();

// Mutator for frequency
void setFrequency(float freq);

// Mutator for checked bool var
void setChecked(bool b);

// Mutator for binary code member variable.
void setCode(string cd);

// Mutator for count
void setCount(int number);

// Mutator for count member variable.
int incrementCount();

friend class list;

};

typedef node *node_ptr;

#endif


/********************************************
* Shane Zentz *
* 3.29.2009 *
* huffman.h *
* This file defines the huffmanTree class, *
* which implements the Huffman algorithm *
* for the minimum ABL of a set of symbols. *
* Various methods are included to create *
* the Huffman tree and to return the binary *
* code for a symbol from the resulting tree.*
********************************************/
#ifndef HUFFMANTREE_H
#define HUFFMANTREE_H

// namespace needed for string return type.
#include
using namespace std;

class huffmanTree
{
private:

struct treeNode
{
float freq;
char symbol;
treeNode *left;
treeNode *right;
};
treeNode* root;

public:

// Constructor
huffmanTree();

// Destructor
~huffmanTree();

bool isEmpty();

void insert(float key, char x);

string returnCode(char x);

// Just a test function. Ignore.
void printer();

};

#endif


/****************************************
* Shane Zentz *
* 3.29.2009 *
* list.cc *
* This file implements the class *
* defined in list.h which creates a list*
* of nodes and performs various *
* operations on the list needed for *
* implementing Huffmans minimum ABL *
* algorithm.
****************************************/
#include
#include
#include "list.h"
#include "node.h"
#include "huffmanTree.h"

using namespace std;

// create a list of nodes with all letters predefined.
list::list()
: head(NULL), tail(NULL), size(0){}

// Clean up after use.
list::~list(){}

// Insert a node into the front of the list.
void list::insert(char letter, float freq,
int count)
{
node_ptr new_node = new node(letter, freq, count, head);
size++;
head = new_node;

if (size == 1)
tail = new_node;
}

// Print selected contents of each node in the list.
void list::print()
{
node_ptr temp = head;
cout << endl;
cout << "Symbol" << "\t" << "frequency" <<
"\t" << "Code" << endl << endl;
while(temp)
{
cout << temp->letter << "\t" << temp->frequency <<
"\t\t" << temp->code << endl;
temp = temp->next;
}
}

// Mutator for the nodes count variable
// which keeps track of the total
// occurences of a symbol in the text.
void list::count(char x)
{
node_ptr temp = head;
while(temp)
{
if (temp->letter == x)
{
temp->setCount(temp->getCount() + 1);
//cout << "found node" << endl;
break;
}
else
temp = temp->next;
}
}

// Method to count the total number
// of symbols in the list (which
// have been processed from a
// given text).
int list::getTotal()
{
int sum = 0;
node_ptr temp = head;
while(temp)
{
sum = sum + temp->getCount();
temp = temp->next;
}
return sum;
}


// Method to find the frequency of a
// symbol relative to the total
// number of symbols in a given text.
void list::calculateFrequency()
{
int total = getTotal();
//cout << "total: " << total << endl;
node_ptr temp = head;
while(temp)
{
temp->setFrequency(static_cast(temp->getCount())/total);
temp = temp->next;
}
}


// Method to find the smallest frequency in the list.
// Once the smallest frequency is found, its nodes
// checked bool variable is set to true so that it is
// not selected as the smallest again.
node_ptr list::smallest()
{
node_ptr junk = new node('X', 0.00, 0, NULL);
node_ptr temp = head;
node_ptr send = NULL;
float min = temp->frequency + 100;
while (temp && ! isEmpty())
{
if(temp->getFrequency() < min && temp->checked == false)
{
min = temp->frequency;
send = temp;
}
temp = temp->next;
}

if (send == NULL) // reached end of list (processed whole list).
return junk;
else
{
send->setChecked(true);
return send;
}
}

// Method to find out if list is empty.
bool list::isEmpty(){ return head == NULL; }


// Compression method to calculate the
// compression factor of input text.
float list::compression()
{
float encodedText = 0.00;
float inputText = 0.00;
float lengthCount = 0.00;
node_ptr temp = head;
string s;
inputText = getTotal() * 8; // Total symbols * 8 bits.

while (temp)
{
s = temp->code;
lengthCount = s.length() * temp->count;
encodedText += lengthCount;

temp = temp->next;
}

return (encodedText/inputText);
}

// ABL method to calculate the
// average bit length of the
// encoding.
float list::ABL()
{
float sum = 0.00;
node_ptr temp = head;
string s;

while(temp)
{
s = temp->code;
sum += temp->frequency * s.length();

temp = temp->next;
}
return sum;
}

// This method assumes that the list is
// built and complete. Each node in the
// list is sent to the huffman class
// to build the huffman tree. The nodes
// are sent in ascending order.
// Then the resulting binary code (in string form)
// is added to each node in the list.
void list::buildTree()
{
// first create a huffman object then
// send elements to the tree one by one.
// in sorted order.
huffmanTree hf;
node_ptr small = head;

while(small->letter != 'X')
{
small = smallest();
if (small->letter != 'X')
{
//cout << "sending smallest: " << small->frequency << endl;
hf.insert(small->frequency, small->letter);
}

}

//hf.printer();

node_ptr temp = head;
while(temp)
{
//cout << "Code for " << temp->letter << " is " <<
// hf.returnCode(temp->letter) << endl;
temp->setCode(hf.returnCode(temp->letter));
temp = temp->next;
}

}


/*****************************************
* Student: Shane Zentz *
* Class: C455 *
* Instructor: Dr. Santean *
* Date: 3.29.2009 *
* File: node.cc *
* This file implements the node class, *
* where each node holds information about*
* a symbol found in a text. These nodes *
* are used in the implementation of *
* Huffmans minimum ABL algorithm. Not all*
* accessors/mutators are needed / used. *
*****************************************/
#include
#include "node.h"
using namespace std;

// Constructor (not used in this implementation).
node::node(char lt)
: count(0) , frequency(0), code(0)
{ letter = lt; }

// 5 argument constructor.
node::node(char lt, float freq, int sum, node *link)
{
letter = lt;
code = "X";
frequency = freq;
count = sum;
next = link;
checked = false;
}

//Destructor
node::~node(){}

// Accessor for character.
char node::getChar() { return letter; }

// Accessor for frequency.
float node::getFrequency() { return frequency; }

// accessor for count
int node::getCount() { return count; }

// Accessor for code
string node::getCode() { return code; }

// mutator for count
void node::setCount(int number) { count = number; }

// Mutator for checked bool var.
void node::setChecked(bool b) { checked = b; }

// Mutator for binary code member variable.
void node::setCode(string cd) { code = cd; }

// Mutator for frequency
void node::setFrequency(float freq) { frequency = freq; }

// Mutator for count variable.
int node::incrementCount() { return count++; }


/***************************************
* Student: Shane Zentz *
* Class: C455 *
* Instructor: Dr. Santean *
* Date: 3.29.2009 *
* File: huffman.cc *
* Description: This file implements *
* the huffman class which*
* creates a huffman tree *
* for the binary encoding*
* of a set of symbols. *
* Methods are implemented*
* to build the huffman *
* tree and then to *
* retrieve the binary *
* code for any given *
* symbol. *
***************************************/
#include
#include "huffmanTree.h"
#include "list.h"
using namespace std;


// Constructor for empty tree.
huffmanTree::huffmanTree() { root=NULL; }

// Destructor
huffmanTree::~huffmanTree(){}

// Method to find out if tree is empty or not.
bool huffmanTree::isEmpty() { return root == NULL; }

// test method to print
void huffmanTree::printer()
{
treeNode* p = root;
// just print left part of tree?
while(p)
{
cout << "p->symbol: = " << p->symbol << endl;
p = p->left;
}
}

// Method to return the binary code that
// results from construction of the tree.
string huffmanTree::returnCode(char symbol)
{
string code = "";
treeNode* p = root;
while(p && p->symbol != symbol)
{
if(p->right->symbol == symbol)
{
code+="1";
break;
}
if(p->left->symbol == symbol)
{
code +="0";
break;
}
if (p->left->symbol != symbol)
code +="0";

p = p->left;
}
return code;
}


// HuffmanTree method builds the huffman Tree.
// Creates a new node for the tree which includes
// the symbol and its frequency. The node is then
// added to the tree in a position which is based
// on how the tree is constructed.
void huffmanTree::insert(float freq, char x)
{
// first create a new node for the tree
treeNode* t = new treeNode;
treeNode* parent;
t->freq = freq;
t->symbol = x;
t->left = NULL;
t->right = NULL;
parent = root;


// CASE 1 //
// if empty tree first node will be left child of parent
if (isEmpty())
{
root = new treeNode;
root->left = t;
root->freq = 0;
root->symbol = 'X';
root->right = NULL;
}

// CASE 2 //
// Only one node (left child of root.).
else if (!isEmpty() && !root->right && root->left)
{
root->right = t;
root->freq = parent->left->freq + t->freq;
//root->symbol = 'X';
}

// CASE 3 //
// The root exists with two children (and the children may
// have children), then insert node as the right child
// of a new root and set old root to left child of new root.
else
{
treeNode* newRoot = new treeNode;

newRoot->right = t;
newRoot->left = root;
newRoot->freq = root->freq + t->freq;
newRoot->symbol = 'X';
//newRoot->left = root;
root = newRoot;
}
}



/*****************************************
* Student: Shane Zentz *
* Class: C455 *
* Instructor: Dr. Santean *
* Date: 3.29.2009 *
* File: node.cc *
* This file implements the node class, *
* where each node holds information about*
* a symbol found in a text. These nodes *
* are used in the implementation of *
* Huffmans minimum ABL algorithm. Not all*
* accessors/mutators are needed / used. *
*****************************************/
#include
#include "node.h"
using namespace std;

// Constructor (not used in this implementation).
node::node(char lt)
: count(0) , frequency(0), code(0)
{ letter = lt; }

// 5 argument constructor.
node::node(char lt, float freq, int sum, node *link)
{
letter = lt;
code = "X";
frequency = freq;
count = sum;
next = link;
checked = false;
}

//Destructor
node::~node(){}

// Accessor for character.
char node::getChar() { return letter; }

// Accessor for frequency.
float node::getFrequency() { return frequency; }

// accessor for count
int node::getCount() { return count; }

// Accessor for code
string node::getCode() { return code; }

// mutator for count
void node::setCount(int number) { count = number; }

// Mutator for checked bool var.
void node::setChecked(bool b) { checked = b; }

// Mutator for binary code member variable.
void node::setCode(string cd) { code = cd; }

// Mutator for frequency
void node::setFrequency(float freq) { frequency = freq; }

// Mutator for count variable.
int node::incrementCount() { return count++; }




/******************************************************************
EXAMPLE OF RUNNING PROGRAM:

to start: C:\>test data.dat

input file (data.dat) :

a a a a a
bbbbbbbbbb
ccccccccccccccc
dddddddddddddddddddd
eeeeeeeeeeeeeeeeeeeeeeeee
ffffffffffffffffffffffffffffff
ggggggggggggggggggggggggggggggggggg
hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj
kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk
llllllllllllllllllllllllllllllllllllllllllllllllllllllllllll
mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm
nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn
oooooooooooooooooooooooooooooooooooooooooooooooooo
ppppppppppppppppppppppppppppppppppppppppppppp
qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq
rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr
ssssssssssssssssssssssssssssss
ttttttttttttttttttttttttt
uuuuuuuuuuuuuuuuuuuu
vvvvvvvvvvvvvvv
wwwwwwwwww
xxxxx
yyyyy
z


Output:

[cs08 ~/C455/huffman/latest]$ g++ huffman.cc list.cc node.cc
huffmanTree.cc -o huff
[cs08 ~/C455/huffman/latest]$ huff data.dat

Symbol frequency Code

a 0.00632911 0000000000000000000000001
b 0.0126582 0000000000000000000001
c 0.0189873 00000000000000000001
d 0.0253165 000000000000000001
e 0.0316456 0000000000000001
f 0.0379747 00000000000001
g 0.0443038 000000000001
h 0.0506329 0000000001
i 0.056962 00000001
j 0.0632911 000001
k 0.0696203 0001
l 0.0759494 01
m 0.0759494 1
n 0.0696203 001
o 0.0632911 00001
p 0.056962 0000001
q 0.0506329 000000001
r 0.0443038 00000000001
s 0.0379747 0000000000001
t 0.0316456 000000000000001
u 0.0253165 00000000000000001
v 0.0189873 0000000000000000001
w 0.0126582 000000000000000000001
x 0.00632911 000000000000000000000001
y 0.00632911 00000000000000000000001
z 0.00126582 00000000000000000000000000
0.00506329 00000000000000000000000001

Average Bit Length: 9.0443
Compression factor: 1.13054

[cs08 ~/C455/huffman/latest]$



*******************************************************************/