程序師世界是廣大編程愛好者互助、分享、學習的平台,程序師世界有你更精彩!
首頁
編程語言
C語言|JAVA編程
Python編程
網頁編程
ASP編程|PHP編程
JSP編程
數據庫知識
MYSQL數據庫|SqlServer數據庫
Oracle數據庫|DB2數據庫
 程式師世界 >> 編程語言 >> C語言 >> C++ >> C++入門知識 >> C++:UTF-8與GB2312之間的互換

C++:UTF-8與GB2312之間的互換

編輯:C++入門知識

話不多說,前幾天我就遇到了字符之間的麻煩,在網頁中出現了亂碼,為此我還寫了個百度經驗,呵呵。

現在就是解決這個問題的時候了,當然用txt自帶的“另存為”就可以簡單的轉換,但是現在討論的是如何利用c++中的函數來改變的。

下面介紹一下WinAPI的兩個函數:WideCharToMultiByte、MultiByteToWideChar。

函數原型:

int WideCharToMultiByte(
 UINT CodePage, // code page
 DWORD dwFlags, // performance and mapping flags
 LPCWSTR lpWideCharStr, // wide-character string
 int cchWideChar, // number of chars in string
 LPSTR lpMultiByteStr, // buffer for new string
 int cbMultiByte, // size of buffer
 LPCSTR lpDefaultChar, // default for unmappable chars
 LPBOOL lpUsedDefaultChar // set when default char used
); //將寬字符轉換成多個窄字符

int MultiByteToWideChar(
 UINT CodePage, // code page
 DWORD dwFlags, // character-type options
 LPCSTR lpMultiByteStr, // string to map
 int cbMultiByte, // number of bytes in string
 LPWSTR lpWideCharStr, // wide-character buffer
 int cchWideChar // size of buffer
);//將多個窄字符轉換成寬字符
需要用到的一些函數:

CString CTest::HexToBin(CString string)//將16進制數轉換成2進制
{
 if( string == "0") return "0000";
 if( string == "1") return "0001";
 if( string == "2") return "0010";
 if( string == "3") return "0011";
 if( string == "4") return "0100";
 if( string == "5") return "0101";
 if( string == "6") return "0110";
 if( string == "7") return "0111";
 if( string == "8") return "1000";
 if( string == "9") return "1001";
 if( string == "a") return "1010";
 if( string == "b") return "1011";
 if( string == "c") return "1100";
 if( string == "d") return "1101";
 if( string == "e") return "1110";
 if( string == "f") return "1111";
 return "";
}

CString CTest::BinToHex(CString BinString)//將2進制數轉換成16進制
{
 if( BinString == "0000") return "0";
 if( BinString == "0001") return "1";
 if( BinString == "0010") return "2";
 if( BinString == "0011") return "3";
 if( BinString == "0100") return "4";
 if( BinString == "0101") return "5";
 if( BinString == "0110") return "6";
 if( BinString == "0111") return "7";
 if( BinString == "1000") return "8";
 if( BinString == "1001") return "9";
 if( BinString == "1010") return "a";
 if( BinString == "1011") return "b";
 if( BinString == "1100") return "c";
 if( BinString == "1101") return "d";
 if( BinString == "1110") return "e";
 if( BinString == "1111") return "f";
 return "";
}

int CTest::BinToInt(CString string)//2進制字符數據轉換成10進制整型
{
 int len =0;
 int tempInt = 0;
 int strInt = 0;
 for(int i =0 ;i < string.GetLength() ;i ++)
 {
         tempInt = 1;
         strInt = (int)string.GetAt(i)-48;
         for(int k =0 ;k < 7-i ; k++)
         {
   tempInt = 2*tempInt;
         }
         len += tempInt*strInt;
 }
 return len;
}
UTF-8轉換成GB2312先把UTF-8轉換成Unicode.然後再把Unicode通過函數WideCharToMultiByte轉換成GB2312

WCHAR* CTest::UTF_8ToUnicode(char *ustart)  //把UTF-8轉換成Unicode
{
 char char_one;
 char char_two;
 char char_three;
 int Hchar;
 int Lchar;
 char uchar[2];
 WCHAR *unicode;
 CString string_one;
 CString string_two;
 CString string_three;
 CString combiString;
 char_one = *ustart;
 char_two = *(ustart+1);
 char_three = *(ustart+2);
 string_one.Format("%x",char_one);
 string_two.Format("%x",char_two);
 string_three.Format("%x",char_three);
 string_three = string_three.Right(2);
 string_two = string_two.Right(2);
 string_one = string_one.Right(2);
 string_three = HexToBin(string_three.Left(1))+HexToBin(string_three.Right(1));
 string_two = HexToBin(string_two.Left(1))+HexToBin(string_two.Right(1));
 string_one = HexToBin(string_one.Left(1))+HexToBin(string_one.Right(1));
 combiString = string_one +string_two +string_three;
 combiString = combiString.Right(20);
 combiString.Delete(4,2);
 combiString.Delete(10,2);
 Hchar = BinToInt(combiString.Left(8));
 Lchar = BinToInt(combiString.Right(8));
 uchar[1] = (char)Hchar;
 uchar[0] = (char)Lchar;
 unicode = (WCHAR *)uchar;
 return unicode;
}

char * CTest::UnicodeToGB2312(unsigned short uData)  //把Unicode 轉換成 GB2312
{
 char *buffer ;
 buffer = new char[sizeof(WCHAR)];
 WideCharToMultiByte(CP_ACP,NULL,&uData,1,buffer,sizeof(WCHAR),NULL,NULL);
 return buffer;
}
GB2312轉換成UTF-8:先把GB2312通過函數MultiByteToWideChar轉換成Unicode.然後再把Unicode通過拆開Unicode後拼裝成UTF-8。

作者“李木空間 ”

  1. 上一頁:
  2. 下一頁:
Copyright © 程式師世界 All Rights Reserved