C#若何解析http報文。本站提示廣大學習愛好者:(C#若何解析http報文)文章只能為提供參考,不一定能成為您想要的結果。以下是C#若何解析http報文正文
上面經由過程一段內容有文字解釋有代碼剖析,並附有展現圖供年夜家進修。
要解析HTTP報文,須要完成以下操作:
讀取HTTP報頭供給的各類屬性
剖析屬性值,從中獲得內容編碼和字符集編碼
將報頭數據和內容停止分別
斷定內容能否文本照樣二進制,假如是二進制的則不停止處置
假如內容是文本,按報頭中供給的內容編碼和字符集編碼停止解緊縮息爭碼
今朝沒有找到.Net框架內置的解析辦法,實際上HttpClient等類在外部應當曾經完成懂得析,但不知為什麼沒有地下這些處置辦法。(亦或是我沒找到)
那末只能本身來解析這些數據了。
我們先來看看這個經由gzip緊縮的文本內容的HTTP報文:

這裡供給一個老外寫的粗陋的解析類(曾經過修正,原代碼中存在一些嚴重BUG):
public enum HTTPHeaderField
{
Accept = 0,
Accept_Charset = 1,
Accept_Encoding = 2,
Accept_Language = 3,
Accept_Ranges = 4,
Authorization = 5,
Cache_Control = 6,
Connection = 7,
Cookie = 8,
Content_Length = 9,
Content_Type = 10,
Date = 11,
Expect = 12,
From = 13,
Host = 14,
If_Match = 15,
If_Modified_Since = 16,
If_None_Match = 17,
If_Range = 18,
If_Unmodified_Since = 19,
Max_Forwards = 20,
Pragma = 21,
Proxy_Authorization = 22,
Range = 23,
Referer = 24,
TE = 25,
Upgrade = 26,
User_Agent = 27,
Via = 28,
Warn = 29,
Age = 30,
Allow = 31,
Content_Encoding = 32,
Content_Language = 33,
Content_Location = 34,
Content_Disposition = 35,
Content_MD5 = 36,
Content_Range = 37,
ETag = 38,
Expires = 39,
Last_Modified = 40,
Location = 41,
Proxy_Authenticate = 42,
Refresh = 43,
Retry_After = 44,
Server = 45,
Set_Cookie = 46,
Trailer = 47,
Transfer_Encoding = 48,
Vary = 49,
Warning = 50,
WWW_Authenticate = 51
};
class HTTPHeader
{
#region PROPERTIES
private string[] m_StrHTTPField = new string[52];
private byte[] m_byteData = new byte[4096];
public string[] HTTPField
{
get { return m_StrHTTPField; }
set { m_StrHTTPField = value; }
}
public byte[] Data
{
get { return m_byteData; }
set { m_byteData = value; }
}
#endregion
// convertion
System.Text.ASCIIEncoding encoding = new System.Text.ASCIIEncoding();
#region CONSTRUCTEUR
/// <summary>
/// Constructeur par défaut - non utilisé
/// </summary>
private HTTPHeader()
{ }
public HTTPHeader(byte[] ByteHTTPRequest)
{
string HTTPRequest = encoding.GetString(ByteHTTPRequest);
try
{
int IndexHeaderEnd;
string Header;
// Si la taille de requête est supérieur ou égale à 1460, alors toutes la chaine est l'entête http
if (HTTPRequest.Length <= 1460)
Header = HTTPRequest;
else
{
IndexHeaderEnd = HTTPRequest.IndexOf("\r\n\r\n");
Header = HTTPRequest.Substring(0, IndexHeaderEnd);
Data = ByteHTTPRequest.Skip(IndexHeaderEnd + 4).ToArray();
}
HTTPHeaderParse(Header);
}
catch (Exception)
{ }
}
#endregion
#region METHODES
private void HTTPHeaderParse(string Header)
{
#region HTTP HEADER REQUEST & RESPONSE
HTTPHeaderField HHField;
string HTTPfield, buffer;
int Index;
foreach (int IndexHTTPfield in Enum.GetValues(typeof(HTTPHeaderField)))
{
HHField = (HTTPHeaderField)IndexHTTPfield;
HTTPfield = "\n" + HHField.ToString().WordStr('_', '-') + ": "; //Ajout de \n devant pour éviter les doublons entre cookie et set_cookie
// Si le champ n'est pas présent dans la requête, on passe au champ suivant
Index = Header.IndexOf(HTTPfield);
if (Index == -1)
continue;
buffer = Header.Substring(Index + HTTPfield.Length);
Index = buffer.IndexOf("\r\n");
if (Index == -1)
m_StrHTTPField[IndexHTTPfield] = buffer.Trim();
else
m_StrHTTPField[IndexHTTPfield] = buffer.Substring(0, Index).Trim();
//Console.WriteLine("Index = " + IndexHTTPfield + " | champ = " + HTTPfield.Substring(1) + " " + m_StrHTTPField[IndexHTTPfield]);
}
// Affichage de tout les champs
/*for (int j = 0; j < m_StrHTTPField.Length; j++)
{
HHField = (HTTPHeaderField)j;
Console.WriteLine("m_StrHTTPField[" + j + "]; " + HHField + " = " + m_StrHTTPField[j]);
}
*/
#endregion
}
#endregion
}
編寫以下代碼以完成解析文件:
class Program
{
static void Main(string[] args)
{
SRART: Console.WriteLine("輸出待解析的HTTP報文數據文件完全途徑:");
var filename = Console.ReadLine();
try
{
FileStream fs = new FileStream(filename, FileMode.Open);
BinaryReader br = new BinaryReader(fs);
var data = br.ReadBytes((int)fs.Length);
var header = new HTTPHeader(data);
var x = 0;
foreach (var f in header.HTTPField)
{
if (!String.IsNullOrEmpty(f))
{
Console.WriteLine($"[{x:00}] - {(HTTPHeaderField) x} : {f}");
}
x++;
}
Console.WriteLine($"總數據尺寸{fs.Length}字節,現實數據尺寸{header.Data.Length}字節");
Console.WriteLine(Encoding.UTF8.GetString(header.Data));
Console.WriteLine();
br.Close();
fs.Close();
}
catch (Exception e)
{
Console.WriteLine(e);
}
goto SRART;
}
}
這裡還未完成gzip解緊縮和字符解碼,直接用UTF8解碼輸入的。(須要時再寫吧,都是膂力活兒~)
後果圖展現:


上面的圖是沒有經由gzip緊縮過的數據。
以上就是用C#若何解析http報文的全體內容,哪位年夜俠還有好的辦法迎接提出名貴看法,愛好年夜家愛好以上內容所述。