程序師世界是廣大編程愛好者互助、分享、學習的平台,程序師世界有你更精彩!
首頁
編程語言
C語言|JAVA編程
Python編程
網頁編程
ASP編程|PHP編程
JSP編程
數據庫知識
MYSQL數據庫|SqlServer數據庫
Oracle數據庫|DB2數據庫
 程式師世界 >> 編程語言 >> C語言 >> VC >> 關於VC++ >> UTF-8與GB2312之間的互換

UTF-8與GB2312之間的互換

編輯:關於VC++

相信一定有不少的程序開發人員時常會遇到字符編碼的問題,而這個問題也是非常讓人頭 痛的。因為這些都是潛在的錯誤,要找出這些錯誤也得要有這方面的開發經驗才行。特別是 在處理xml文檔時 ,該問題的出現就更加的頻繁了,有一次用java寫服務器端程序,用vc寫 客戶端與之交互。交互的協議都是用xml寫的。結果在通訊時老是發現數據接受不正確。納悶 !於是用抓取網絡數據包工具抓取數據,後來才發現原來是java上xml的頭是這樣的<?xml version="1.0" encoding="UTF-8"?>,而vc上默認的是GB2312。所 以一遇到漢字數據就不正確了。去網上找資料,這方面的文章好象特別少,針對像這樣的問 題,下面我介紹一下我自己寫的一個轉換程序。當然,程序很簡單。如果有畫蛇添足的地方 ,還望各位高手一笑了之。

如果您對UTF-8、Unicode、GB2312等還是很陌生的話,請查看,我這裡就不浪費口舌了。 下面介紹一下WinAPI的兩個函數:WideCharToMultiByte、MultiByteToWideChar。

函數原型:

int WideCharToMultiByte(
  UINT CodePage, // code page
  DWORD dwFlags, // performance and mapping flags
  LPCWSTR lpWideCharStr, // wide-character string
  int cchWideChar, // number of chars in string
  LPSTR lpMultiByteStr, // buffer for new string
  int cbMultiByte, // size of buffer
  LPCSTR lpDefaultChar, // default for unmappable chars
  LPBOOL lpUsedDefaultChar // set when default char used
); //將寬字符轉換成多個窄字符
int MultiByteToWideChar(
  UINT CodePage, // code page
  DWORD dwFlags, // character-type options
  LPCSTR lpMultiByteStr, // string to map
  int cbMultiByte, // number of bytes in string
  LPWSTR lpWideCharStr, // wide-character buffer
  int cchWideChar // size of buffer
);//將多個窄字符轉換成寬字符

需要用到的一些函數:

CString CXmlProcess::HexToBin(CString string)//將16進制數轉換成2進制
{
  if( string == "0") return "0000";
  if( string == "1") return "0001";
  if( string == "2") return "0010";
  if( string == "3") return "0011";
  if( string == "4") return "0100";
  if( string == "5") return "0101";
  if( string == "6") return "0110";
  if( string == "7") return "0111";
  if( string == "8") return "1000";
  if( string == "9") return "1001";
  if( string == "a") return "1010";
  if( string == "b") return "1011";
  if( string == "c") return "1100";
  if( string == "d") return "1101";
  if( string == "e") return "1110";
  if( string == "f") return "1111";
  return "";
}
CString CXmlProcess::BinToHex(CString BinString)//將2進制數轉換成16進制
{
  if( BinString == "0000") return "0";
  if( BinString == "0001") return "1";
  if( BinString == "0010") return "2";
  if( BinString == "0011") return "3";
  if( BinString == "0100") return "4";
  if( BinString == "0101") return "5";
  if( BinString == "0110") return "6";
  if( BinString == "0111") return "7";
  if( BinString == "1000") return "8";
  if( BinString == "1001") return "9";
  if( BinString == "1010") return "a";
  if( BinString == "1011") return "b";
  if( BinString == "1100") return "c";
  if( BinString == "1101") return "d";
  if( BinString == "1110") return "e";
  if( BinString == "1111") return "f";
  return "";
}
int CXmlProcess::BinToInt(CString string)//2進制字符數據轉換成10進制整型
{
  int len =0;
  int tempInt = 0;
  int strInt = 0;
  for(int i =0 ;i < string.GetLength() ;i ++)
  {
      tempInt = 1;
      strInt = (int)string.GetAt(i)-48;
      for(int k =0 ;k < 7-i ; k++)
      {
      tempInt = 2*tempInt;
      }
      len += tempInt*strInt;
  }
  return len;
}

UTF-8轉換成GB2312先把UTF-8轉換成Unicode.然後再把Unicode通過函數 WideCharToMultiByte轉換成GB2312

WCHAR* CXmlProcess::UTF_8ToUnicode(char *ustart) //把UTF-8轉換成Unicode
{
  char char_one;
  char char_two;
  char char_three;
  int Hchar;
  int Lchar;
  char uchar[2];
  WCHAR *unicode;
  CString string_one;
  CString string_two;
  CString string_three;
  CString combiString;
  char_one = *ustart;
  char_two = *(ustart+1);
  char_three = *(ustart+2);
  string_one.Format("%x",char_one);
  string_two.Format("%x",char_two);
  string_three.Format("%x",char_three);
  string_three = string_three.Right(2);
  string_two = string_two.Right(2);
  string_one = string_one.Right(2);
  string_three = HexToBin(string_three.Left(1))+HexToBin(string_three.Right (1));
  string_two = HexToBin(string_two.Left(1))+HexToBin(string_two.Right(1));
  string_one = HexToBin(string_one.Left(1))+HexToBin(string_one.Right(1));
  combiString = string_one +string_two +string_three;
  combiString = combiString.Right(20);
  combiString.Delete(4,2);
  combiString.Delete(10,2);
  Hchar = BinToInt(combiString.Left(8));
  Lchar = BinToInt(combiString.Right(8));
  uchar[1] = (char)Hchar;
  uchar[0] = (char)Lchar;
  unicode = (WCHAR *)uchar;
  return unicode;
}
char * CXmlProcess::UnicodeToGB2312(unsigned short uData) //把Unicode 轉換成 GB2312
{
  char *buffer ;
  buffer = new char[sizeof(WCHAR)];
  WideCharToMultiByte(CP_ACP,NULL,&uData,1,buffer,sizeof (WCHAR),NULL,NULL);
  return buffer;
}

 

GB2312轉換成UTF-8:先把GB2312通過函數MultiByteToWideChar轉換成Unicode.然後再把 Unicode通過拆開Unicode後拼裝成UTF-8。WCHAR * CXmlProcess::Gb2312ToUnicode (char *gbBuffer) //GB2312 轉換成 Unicode
{
  WCHAR *uniChar;
  uniChar = new WCHAR[1];
  ::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,uniChar,1);
  return uniChar;
}
char * CXmlProcess::UnicodeToUTF_8(WCHAR *UniChar) // Unicode 轉換成UTF-8
{
  char *buffer;
  CString strOne;
  CString strTwo;
  CString strThree;
  CString strFour;
  CString strAnd;
  buffer = new char[3];
  int hInt,lInt;
  hInt = (int)((*UniChar)/256);
  lInt = (*UniChar)%256;
  CString string ;
  string.Format("%x",hInt);
  strTwo = HexToBin(string.Right(1));
  string = string.Left(string.GetLength() - 1);
  strOne = HexToBin(string.Right(1));
  string.Format("%x",lInt);
  strFour = HexToBin(string.Right(1));
  string = string.Left(string.GetLength() -1);
  strThree = HexToBin(string.Right(1));
  strAnd = strOne +strTwo + strThree + strFour;
  strAnd.Insert(0,"1110");
  strAnd.Insert(8,"10");
  strAnd.Insert(16,"10");
  strOne = strAnd.Left(8);
  strAnd = strAnd.Right(16);
  strTwo = strAnd.Left(8);
  strThree = strAnd.Right(8);
  *buffer = (char)BinToInt(strOne);
  buffer[1] = (char)BinToInt(strTwo);
  buffer[2] = (char)BinToInt(strThree);
  return buffer;
}

例子:將GB2312轉換成UTF-8的調用:char * CXmlProcess::translateCharToUTF_8(char *xmlStream, int len)
{
  int newCharLen =0 ;
  int oldCharLen = 0;
  int revCharLen = len;
  char* newCharBuffer;
  char* finalCharBuffer;
  char *buffer ;
  CString string;
  buffer = new char[sizeof(WCHAR)];
  newCharBuffer = new char[int(1.5*revCharLen)];//設置最大的一個緩沖區
  while(oldCharLen < revCharLen)
  {
    if( *(xmlStream + oldCharLen) >= 0)
    {
      *(newCharBuffer+newCharLen) = *(xmlStream +oldCharLen);
      newCharLen ++;
      oldCharLen ++;
    }//如果是英文直接復制就可以
    else
    {
      WCHAR *pbuffer = this->Gb2312ToUnicode(xmlStream+oldCharLen);
      buffer = this->UnicodeToUTF_8(pbuffer);
      *(newCharBuffer+newCharLen) = *buffer;
      *(newCharBuffer +newCharLen +1) = *(buffer + 1);
      *(newCharBuffer +newCharLen +2) = *(buffer + 2);
      newCharLen += 3;
      oldCharLen += 2;
    }
  }
  newCharBuffer[newCharLen] = ''\'';
  CString string1 ;
  string1.Format("%s",newCharBuffer);
  finalCharBuffer = new char[newCharLen+1];
  memcpy(finalCharBuffer,newCharBuffer,newCharLen+1);
  return finalCharBuffer;
}

程序都非常的簡單,由於實在太窮。已經吃了兩天的方便面。所以現在頭昏,程 序的詳細說明就不寫了。程序員到了像我這樣的地步也真是少見。工資低沒有辦法。哎!! !!

  1. 上一頁:
  2. 下一頁:
Copyright © 程式師世界 All Rights Reserved