這是我的第一篇博客,希望大神們批評指正。
首先介紹以下什麼是哈夫曼樹(來自百度百科)
哈夫曼樹─即最優二叉樹,帶權路徑長度最小的二叉樹,經常應用於數據壓縮。 在計算機信息處理中,“哈夫曼編碼”是一種一致性編碼法(又稱“熵編碼法”),用於數據的無損耗壓縮。這一術語是指使用一張特殊的編碼表將源字符(例如某文件中的一個符號)進行編碼。這張編碼表的特殊之處在於,它是根據每一個源字符出現的估算概率而建立起來的(出現概率高的字符使用較短的編碼,反之出現概率低的則使用較長的編碼,這便使編碼之後的字符串的平均期望長度降低,從而達到無損壓縮數據的目的)。
構造哈夫曼樹的主要思想:
構造哈夫曼樹非常簡單,將所有的節點放到一個隊列中,用一個節點替換兩個頻率最低的節點,新節點的頻率就是這兩個節點的頻率之和。這樣,新節點就是兩個被替換節點的父節點了。如此循環,直到隊列中只剩一個節點(樹根)。
這裡用到了最小優先隊列。
我這裡用STL來實現。(這裡有優先隊列的介紹)
template<typename T>
struct cmp
{
bool operator()(TreeNode<T>* t1, TreeNode<T>* t2)
{
return !(*t1 < *t2);
}
};
優先隊列的定義:
priority_queue<TreeNode*,vector<TreeNode* >,cmp > pri_que;
哈夫曼樹節點結構
template<typename T>
class TreeNode
{
public:
TreeNode():pfather(NULL),plchild(NULL),prchild(NULL)
{
}
T data;
TreeNode *pfather;
TreeNode *plchild;
TreeNode *prchild;
bool operator < (const TreeNode& rhs)
{
return data < rhs.data;
}
};
構造哈夫曼樹
每次從最小優先隊列取頭兩個節點,合並後放回最小優先隊列,如此重復。
template<typename T>
TreeNode<T>* MakeHuffmanTree(T* begin, T* end) //構造哈夫曼樹
{
priority_queue<TreeNode<T>*,vector<TreeNode<T>* >,cmp<T> > pri_que;
T *iter = begin;
TreeNode<T>* pNode;
TreeNode<T>* pf = NULL;
while(iter != end)
{
pNode = new TreeNode<T>;
pNode->data = *iter++;
pNode->pfather = pf;
pri_que.push(pNode);
}
TreeNode<T>* plchild;
TreeNode<T>* prchild;
while(pri_que.size() > 1)//取兩個小的合並
{
plchild = pri_que.top();
pri_que.pop();
prchild = pri_que.top();
pri_que.pop();
pNode = new TreeNode<T>;
pNode->plchild = plchild;
pNode->prchild = prchild;
pNode->data = plchild->data + prchild->data;
pri_que.push(pNode);
}
pNode = pri_que.top();
pri_que.pop();
return pNode;
}
構造哈夫曼樹這個函數的參數是一個結構體,保存著對應字符,其頻率,編碼值。
重載它的+運算符,為了合並時的+運算(只是頻率相加)。
到此為止,已經可以把哈夫曼樹生成出來了。
template<typename T>
struct mydata
{
mydata(){}
mydata(int i):freq(i)
{
}
string coded;
int freq;
T data;
bool operator<(const mydata& rhs)
{
return freq < rhs.freq;
}
mydata operator+(mydata& rhs)
{
return mydata(freq + rhs.freq);
}
};
我們可以通過DFS將每個葉子節點的路徑記錄下來(用一個全局變量數組path),然後得到它的編碼。
當發現當前節點是葉子節點,就把當前的路徑賦值至該葉子節點的編碼屬性(coded)。
const int MAXLEN = 20;
char path[MAXLEN] = {0};
template<typename T>
void DFS(T* root,int deep = -1, char a = '-') //DFS 得到葉子節點的編碼
{
if(root == NULL)
return;
if(a != '-')
path[deep] = a;
if(root->plchild == NULL || root->prchild == NULL)//leaf
(root->data).coded = string(path,path + deep + 1);
if(root->plchild != NULL)
DFS(root->plchild, deep + 1, '0');
if(root->prchild != NULL)
DFS(root->prchild, deep + 1, '1');
}
這樣整個哈夫曼編碼工作已經完成,為了查看我們的編碼結果,我們可以用BFS跟DFS來看到我們的結果。在這裡我選擇了BFS。
當遍歷到葉子節點,就將其編碼屬性(coded)和其對應字符輸出。
template<typename T,typename U>
void BFS(T* root, mydata<U>* data) //BFS 將葉子節點的編碼,提到data指向的數據
{
queue<T*> que;
que.push(root);
T* pT = NULL;
while(!que.empty())
{
pT = que.front();
//cout<<pT->data.freq<<endl;
que.pop();
if(pT->plchild != NULL)
que.push(pT->plchild);
if(pT->prchild != NULL)
que.push(pT->prchild);
if(pT->plchild == NULL || pT->prchild == NULL)// leaf 提取葉子節點的編碼
{
//cout<<(pT->data).data<<":"<<(pT->data).coded<<endl;
mydata<U>* pd = data;
while((pT->data).data != pd->data)
pd++;
assert(pd->data == (pT->data).data);
pd->coded = (pT->data).coded;
}
}
}
測試驅動代碼
mydata<char> *pdata = new mydata<char>[4];
pdata[0].data = 'a';
pdata[0].freq = 7;
pdata[1].data = 'b';
pdata[1].freq = 5;
pdata[2].data = 'c';
pdata[2].freq = 2;
pdata[3].data = 'd';
pdata[3].freq = 4;
TreeNode<mydata<char> >* pihuffmanTree = MakeHuffmanTree(pdata, pdata + 4);
DFS(pihuffmanTree);
BFS(pihuffmanTree);
為了更方便的使用我將這些封裝到一個類裡面。
template<typename T>
class Huffman
{
public:
void Coded(string& coded);//傳入待輸出的編碼
void DeCode(const string& codedstr,string& decodestr);//輸入待解碼字符串,輸出解碼字符串
void InputData(T* begin,T* end);//傳入數據
private:
string FindVal(char c);
void m_CalcFreq(T* begin, T* end);//計算輸入數據的頻率
TreeNode<mydata<T> > *root;//huffman根節點
mydata<T>* data;
int data_size;
T* m_begin;//保存原始數據的開始與結束的位置
T* m_end;
//string codedstr;
};
輸入數據並計算其頻率。
用map容器來統計輸入字符每個出現的個數。
template<typename T>
void Huffman<T>::InputData(T* begin, T* end)
{
this->m_begin = begin;
this->m_end = end;
m_CalcFreq(begin, end);
}
template<typename T>
void Huffman<T>::m_CalcFreq(T* begin, T* end)
{
int len = end - begin;
//data_size = len;
if(len == 0)
return;
map<T,int> countMap;
map<T,int>::iterator mapIter = countMap.begin();
T *pT = begin;
while(pT != end)
{
mapIter = countMap.find(*pT);
if(mapIter != countMap.end())//在map裡有沒有字符*iter
++mapIter->second;
else
{
countMap.insert(make_pair(*pT,1));
}
pT++;
}
data_size = countMap.size();
data = new mydata<T>[data_size];
int i = 0;
for (mapIter = countMap.begin(); mapIter != countMap.end(); ++mapIter)
{
data[i].data = mapIter->first;
data[i].freq = mapIter->second;
i++;
}
}
編碼
template<typename T>
void Huffman<T>::Coded(string& coded)
{
root = MakeHuffmanTree(data,data + data_size);
DFS(root);
BFS(root,data);
cout<<"code:"<<endl;
for (int i = 0; i < data_size; ++i)
{
cout<<data[i].data<<":"<<data[i].coded<<endl;
}
T *begin = m_begin;
while (begin != m_end)
{
coded += FindVal(*begin);
begin++;
}
//string subcode =
}
解碼
template<typename T>
void Huffman<T>::DeCode(const string& codedstr,string& decodestr)
{
string::const_iterator iter = codedstr.begin();
TreeNode<mydata<T> >* curNode = root;
while (iter != codedstr.end())
{
if (curNode->plchild == NULL || curNode->prchild == NULL)
{
decodestr += (curNode->data).data;
curNode = root;
continue;
}
if (*iter == '0')
curNode = curNode->plchild;
if(*iter == '1')
curNode = curNode->prchild;
iter++;
}
}
測試驅動程序
char *pmystr = "cbcddddbbbbaaaaaaa";
Huffman<char> h;
h.InputData(pmystr, pmystr + 18);
cout<<"originstr: "<<pmystr<<endl;
string coded;
h.Coded(coded);
cout<<"coded: "<<coded<<endl;
string decode;
h.DeCode(coded,decode);
cout<<"decode: "<<decode<<endl;
完整程序(環境:VS2012)
#include <iostream>
//#include <algorithm>
#include <queue>
#include <string>
#include <vector>
#include <cassert>
#include <map>
using namespace std;
template<typename T>
class TreeNode
{
public:
TreeNode():pfather(NULL),plchild(NULL),prchild(NULL)
{
}
T data;
TreeNode *pfather;
TreeNode *plchild;
TreeNode *prchild;
bool operator < (const TreeNode& rhs)
{
return data < rhs.data;
}
};
template<typename T>
struct cmp
{
bool operator()(TreeNode<T>* t1, TreeNode<T>* t2)
{
return !(*t1 < *t2);
}
};
template<typename T>
TreeNode<T>* MakeHuffmanTree(T* begin, T* end) //構造哈夫曼樹
{
priority_queue<TreeNode<T>*,vector<TreeNode<T>* >,cmp<T> > pri_que;
T *iter = begin;
TreeNode<T>* pNode;
TreeNode<T>* pf = NULL;
while(iter != end)
{
pNode = new TreeNode<T>;
pNode->data = *iter++;
pNode->pfather = pf;
pri_que.push(pNode);
}
TreeNode<T>* plchild;
TreeNode<T>* prchild;
while(pri_que.size() > 1)//取兩個小的合並
{
//cout<<static_cast<TreeNode<T>* >(pri_que.top())->data<<endl;
//pri_que.pop();
plchild = pri_que.top();
pri_que.pop();
prchild = pri_que.top();
pri_que.pop();
pNode = new TreeNode<T>;
pNode->plchild = plchild;
pNode->prchild = prchild;
pNode->data = plchild->data + prchild->data;
pri_que.push(pNode);
}
pNode = pri_que.top();
pri_que.pop();
return pNode;
}
template<typename T>
struct mydata
{
mydata(){}
mydata(int i):freq(i)
{
}
string coded;
int freq;
T data;
bool operator<(const mydata& rhs)
{
return freq < rhs.freq;
}
mydata operator+(mydata& rhs)
{
return mydata(freq + rhs.freq);
}
};
template<typename T,typename U>
void BFS(T* root, mydata<U>* data) //BFS 將葉子節點的編碼,提到data指向的數據
{
queue<T*> que;
que.push(root);
T* pT = NULL;
while(!que.empty())
{
pT = que.front();
//cout<<pT->data.freq<<endl;
que.pop();
if(pT->plchild != NULL)
que.push(pT->plchild);
if(pT->prchild != NULL)
que.push(pT->prchild);
if(pT->plchild == NULL || pT->prchild == NULL)// leaf 提取葉子節點的編碼
{
//cout<<(pT->data).data<<":"<<(pT->data).coded<<endl;
mydata<U>* pd = data;
while((pT->data).data != pd->data)
pd++;
assert(pd->data == (pT->data).data);
pd->coded = (pT->data).coded;
}
}
}
const int MAXLEN = 20;
char path[MAXLEN] = {0};
template<typename T>
void DFS(T* root,int deep = -1, char a = '-') //DFS 得到葉子節點的編碼
{
if(root == NULL)
return;
if(a != '-')
path[deep] = a;
if(root->plchild == NULL || root->prchild == NULL)//leaf
(root->data).coded = string(path,path + deep + 1);
if(root->plchild != NULL)
DFS(root->plchild, deep + 1, '0');
if(root->prchild != NULL)
DFS(root->prchild, deep + 1, '1');
}
template<typename T>
class Huffman
{
public:
void Coded(string& coded);
void DeCode(const string& codedstr,string& decodestr);
void InputData(T* begin,T* end);
private:
string FindVal(char c);
void m_CalcFreq(T* begin, T* end);
TreeNode<mydata<T> > *root;
mydata<T>* data;
int data_size;
T* m_begin;
T* m_end;
//string codedstr;
};
template<typename T>
void Huffman<T>::InputData(T* begin, T* end)
{
this->m_begin = begin;
this->m_end = end;
m_CalcFreq(begin, end);
}
template<typename T>
void Huffman<T>::m_CalcFreq(T* begin, T* end)
{
int len = end - begin;
//data_size = len;
if(len == 0)
return;
map<T,int> countMap;
map<T,int>::iterator mapIter = countMap.begin();
T *pT = begin;
while(pT != end)
{
mapIter = countMap.find(*pT);
if(mapIter != countMap.end())
++mapIter->second;
else
{
countMap.insert(make_pair(*pT,1));
}
pT++;
}
data_size = countMap.size();
data = new mydata<T>[data_size];
int i = 0;
for (mapIter = countMap.begin(); mapIter != countMap.end(); ++mapIter)
{
data[i].data = mapIter->first;
data[i].freq = mapIter->second;
i++;
}
}
template<typename T>
void Huffman<T>::Coded(string& coded)
{
root = MakeHuffmanTree(data,data + data_size);
DFS(root);
BFS(root,data);
cout<<"code:"<<endl;
for (int i = 0; i < data_size; ++i)
{
cout<<data[i].data<<":"<<data[i].coded<<endl;
}
T *begin = m_begin;
while (begin != m_end)
{
coded += FindVal(*begin);
begin++;
}
//string subcode =
}
template<typename T>
void Huffman<T>::DeCode(const string& codedstr,string& decodestr)
{
string::const_iterator iter = codedstr.begin();
TreeNode<mydata<T> >* curNode = root;
while (iter != codedstr.end())
{
if (curNode->plchild == NULL || curNode->prchild == NULL)
{
decodestr += (curNode->data).data;
curNode = root;
continue;
}
if (*iter == '0')
curNode = curNode->plchild;
if(*iter == '1')
curNode = curNode->prchild;
iter++;
}
}
template<typename T>
string Huffman<T>::FindVal(char c)
{
for (int i = 0; i < data_size ; ++i)
{
if (c != data[i].data)
continue;
return data[i].coded;
}
return string();
}
int main()
{
//mydata<char> *pdata = new mydata<char>[4];
//pdata[0].data = 'a';
//pdata[0].freq = 7;
//pdata[1].data = 'b';
//pdata[1].freq = 5;
//pdata[2].data = 'c';
//pdata[2].freq = 2;
//pdata[3].data = 'd';
//pdata[3].freq = 4;
////int a[12]={14,10,56,7,83,22,36,91,3,47,72,0};
//TreeNode<mydata<char> >* pihuffmanTree = MakeHuffmanTree(pdata, pdata + 4);
//DFS(pihuffmanTree);
//BFS(pihuffmanTree);
//string str = "cbcddddbbbbaaaaaaa";
char *pmystr = "cbcddddbbbbaaaaaaa";
Huffman<char> h;
h.InputData(pmystr, pmystr + 18);
cout<<"originstr: "<<pmystr<<endl;
string coded;
h.Coded(coded);
cout<<"coded: "<<coded<<endl;
string decode;
h.DeCode(coded,decode);
cout<<"decode: "<<decode<<endl;
return 0;
}