Huffman算法也是一種無損壓縮算法,但與上篇文章LZW壓縮算法不同,Huffman需要得到每種字符出現概率的先驗知識。通過計算字符序列中每種字符出現的頻率,為每種字符進行唯一的編碼設計,使得頻率高的字符占的位數短,而頻率低的字符長,來達到壓縮的目的。通常可以節省20%~90%的空間,很大程度上依賴數據的特性!Huffman編碼是變長編碼,即每種字符對應的編碼長度不唯一。
前綴碼:任何一個字符的編碼都不是同一字符集中另一種字符編碼的前綴。Huffman編碼為最優前綴碼,即壓縮後數據量最小。
---------------------------------------------------------------------------------------------------------------
Huffman算法:
1.統計字符序列的每種字符的頻率,並為每種字符建立一個節點,節點權重為其頻率;
2.初始化最小優先隊列中,把上述的結點全部插入到隊列中;
3.取出優先隊列的前兩種符號節點,並從優先隊列中刪除;
4.新建一個父節點,並把上述兩個節點作為其左右孩子節點,父節點的權值為左右節點之和;
5.如果此時優先隊列為空,則退出並返回父節點的指針!否則把父節點插入到優先隊列中,重復步驟3;
----------------------------------------------------------------------------------------------------------------
通過上述建造的Huffman樹,可以看到,每種字符結點都是葉子結點,編碼方法:從根節點開始向左定義編碼'0',向右定義為'1',遍歷到葉子結點所得到的二值碼串,即為此種字符的編碼值。由於字符碼字為前綴碼,在譯碼過程中,每種字符可以參照Huffman樹被唯一的譯碼出,但是前綴碼的缺點是,錯誤具有傳播功能,當有1位碼字錯誤,此後的譯碼過程很可能都不正確;

代碼實現:
/* CSDN 勿在浮沙築高台 http://blog.csdn.net/luoshixian099 數據壓縮--Huffman編碼 2015年12月21日 */ #include#include #include "compress.h" using namespace std; void ShowCode(PNode root, vector &code); int main() { char A[] = "xxznxznnvvccncvzzbzzvxxczbzvmnzvnnz";//原始數據 UINT Length = sizeof(A)-1; Priority_Q queue(A, Length); //建立優先隊列 //輸出每組字符的頻率 for (UINT i = 0; i <= queue.Heap_Size;i++) { cout << (char)(queue.table[i]->key) << " Frequency: " << queue.table[i]->Frequency << endl; } cout << "--------------------" << endl; PNode root = Build_Huffman_Tree(queue);//構建Huffman樹 vector code; ShowCode(root, code); //顯示編碼數據 return 0; } void ShowCode(PNode root,vector &code) { if (root!=NULL) { if (root->_left == NULL && root->_right == NULL) //葉子結點 { cout << (char)(root->key) << " code : " ; for (UINT i = 0; i < code.size() ; i++) { cout << (int)code[i]; } cout << endl; return; } code.push_back(0); ShowCode(root->_left,code); code[code.size()-1] = 1; ShowCode(root->_right,code); code.resize(code.size()-1); } }

/*
compress.cpp
*/
#include "compress.h"
Priority_Q::Priority_Q(char *A,int Length) //統計各種字符的頻率
{
for (int i = 0; i < 256; i++)
{
table[i] = new Node;
}
Heap_Size = 0;
for (int i = 0; i < Length; i++) //統計字符頻率
{
bool Flag = true;
for (int j = 0; j < Heap_Size; j++)
{
if ( table[j]->key == *(A+i) )
{
table[j]->Frequency = table[j]->Frequency + 1;
Flag = false;
break;
}
}
if (Flag) //加入新的字符
{
table[Heap_Size]->key = *(A + i);
table[Heap_Size]->Frequency = table[Heap_Size]->Frequency + 1;
Heap_Size++;
}
}
Heap_Size--;
Build_Min_Heap(Heap_Size); //建立優先隊列
}
void Priority_Q::Build_Min_Heap(UINT Length)
{
for (int i = (int)(Length / 2); i >= 0; i--)
{
Min_Heapify(i);
}
}
void Priority_Q::Min_Heapify(UINT i)
{
UINT Smaller = i;
UINT Left = 2 * i + 1;
UINT Right = 2 * i + 2;
if (Left <= Heap_Size && table[Left]->Frequency < table[i]->Frequency) //判斷是否小於其孩子的值
{
Smaller = Left;
}
if (Right <= Heap_Size && table[Right]->Frequency < table[Smaller]->Frequency)
{
Smaller = Right;
}
if (Smaller != i) //如果小於,就與其中最大的孩子調換位置
{
Swap(i, Smaller);
Min_Heapify(Smaller);
}
}
void Priority_Q::Swap(int x, int y) //交換兩個元素的數據
{
PNode temp = table[x];
table[x] = table[y];
table[y] = temp;
}
PNode CopyNode(PNode _src, PNode _dst)//拷貝數據
{
_dst->Frequency = _src->Frequency;
_dst->key = _src->key;
_dst->_left = _src->_left;
_dst->_right = _src->_right;
return _dst;
}
PNode Priority_Q::Extract_Min() //輸出隊列最前結點
{
if (Heap_Size == EMPTY)
return NULL;
if (Heap_Size == 0)
{
Heap_Size = EMPTY;
return table[0];
}
if (Heap_Size >= 0)
{
Swap(Heap_Size, 0);
Heap_Size--;
Min_Heapify(0);
}
return table[Heap_Size+1];
}
void Priority_Q::Insert(PNode pnode)//優先隊列的插入
{
Heap_Size++;
CopyNode(pnode, table[Heap_Size]);
delete pnode;
UINT i = Heap_Size;
while ( i > 0 && table[Parent(i)]->Frequency > table[i]->Frequency )
{
Swap(i, Parent(i));
i = Parent(i);
}
}
PNode Build_Huffman_Tree(Priority_Q &queue) //建立Huffman樹
{
PNode parent=NULL,left=NULL,right=NULL;
while (queue.Heap_Size != EMPTY)
{
left = new Node;
right = new Node;
parent = new Node;
CopyNode(queue.Extract_Min(), left); //取出兩個元素
CopyNode(queue.Extract_Min(), right);
//復制左右節點數據
parent->Frequency = right->Frequency + left->Frequency;//建立父節點
parent->_left = left;
parent->_right = right;
if (queue.Heap_Size == EMPTY)
break;
queue.Insert(parent); //再插入回優先隊列
}
return parent;
}
/* compress.h */ #ifndef COMPRESS #define COMPRESS #include參考:#define UINT unsigned int #define UCHAR unsigned char #define EMPTY 0xFFFFFFFF #define Parent(i) (UINT)(((i) - 1) / 2) typedef struct Node //結點 { Node::Node():key(EMPTY), Frequency(0),_left(NULL),_right(NULL){} UINT key; UINT Frequency; struct Node * _left; struct Node * _right; }Node,*PNode; class Priority_Q //優先隊列 { public: Priority_Q(char *A, int Length); void Insert(PNode pnode); //插入 PNode Extract_Min(); //取出元素 UINT Heap_Size; //隊列的長度 PNode table[256]; //建立256種結點 private: void Build_Min_Heap(UINT Length); //建立隊列 void Swap(int x, int y); //交換兩個元素 void Min_Heapify(UINT i); //維護優先隊列的性質 }; PNode Build_Huffman_Tree(Priority_Q &queue);//構建優先隊列 #endif // COMPRESS
http://wenku.baidu.com/view/04a8a13b580216fc700afd2e.html