編碼:UTF8 字節序:Little-Endian
返回多個字段信息(如:亞洲|中國|香港|九龍|油尖旺|新世界電訊|810200|Hong Kong|HK|114.17495|22.327115)
------------------------ 文件結構 ---------------------------
//文件頭 16字節(4-4-4-4)
[索引區第一條流位置][索引區最後一條流位置][前綴區第一條的流位置][前綴區最後一條的流位置]
//內容區 長度無限制
[地區信息][地區信息]……唯一不重復
//索引區 12字節(4-4-3-1)
[起始IP][結束IP][地區流位置][流長度]
//前綴區 9字節(1-4-4)
[0-255][索引區start索引][索引區end索引]
------------------------ 文件結構 ---------------------------
優勢:索引區分為[起始IP][結束IP][地區偏移][長度],減少多級偏移跳轉步驟和長度的解析,提高效率;
根據ip第一位字節作為前綴,解析出以這個數字為前綴的第一個索引和最後一個索引,縮小查詢區間,
然後在這區間再用二分查找快速查找到對應區間,效率提高幾個等級
壓縮:原版txt為15M,生成這種dat結構為2.45M
性能:普通電腦測試,解析,100萬ip耗時2.012439秒,1千萬耗時21.10258秒,好點的電腦測試會更高效
對比:相比其他dat更簡潔更高效
創建:qqzeng-ip 於 2015-08-01
public class IPSearch
{
private Dictionary<uint, PrefixIndex> prefixDict;
private byte[] indexBuffer;
private byte[] data;
long firstStartIpOffset;//索引區第一條流位置
long lastStartIpOffset;//索引區最後一條流位置
long prefixStartOffset;//前綴區第一條的流位置
long prefixEndOffset;//前綴區最後一條的流位置
long ipCount; //ip段數量
long prefixCount; //前綴數量
/// <summary>
/// 初始化二進制dat數據
/// </summary>
/// <param name="dataPath"></param>
public IPSearch(string dataPath)
{
using (FileStream fs = new FileStream(dataPath, FileMode.Open, FileAccess.Read, FileShare.Read))
{
data = new byte[fs.Length];
fs.Read(data, 0, data.Length);
}
firstStartIpOffset = BytesToLong(data[0], data[1], data[2], data[3]);
lastStartIpOffset = BytesToLong(data[4], data[5], data[6], data[7]);
prefixStartOffset = BytesToLong(data[8], data[9], data[10], data[11]);
prefixEndOffset = BytesToLong(data[12], data[13], data[14], data[15]);
//prefixCount 不固定為256 方便以後自由定制 國內版 國外版 全球版 或者某部分 都可以
ipCount = (lastStartIpOffset - firstStartIpOffset) / 12 + 1; //索引區塊每組 12字節
prefixCount = (prefixEndOffset - prefixStartOffset) / 9 + 1; //前綴區塊每組 9字節
//初始化前綴對應索引區區間
indexBuffer = new byte[prefixCount * 9];
Array.Copy(data, prefixStartOffset, indexBuffer, 0, prefixCount * 9);
prefixDict = new Dictionary<uint, PrefixIndex>();
for (var k = 0; k < prefixCount; k++)
{
int i = k * 9;
uint prefix = (uint)indexBuffer[i];
long start_index = BytesToLong(indexBuffer[i + 1], indexBuffer[i + 2], indexBuffer[i + 3], indexBuffer[i + 4]);
long end_index = BytesToLong(indexBuffer[i + 5], indexBuffer[i + 6], indexBuffer[i + 7], indexBuffer[i + 8]);
prefixDict.Add(prefix, new PrefixIndex() { prefix = prefix, start_index = start_index, end_index = end_index });
}
}
public static uint IpToInt(string ip)
{
byte[] bytes = IPAddress.Parse(ip).GetAddressBytes();
return (uint)bytes[3] + (((uint)bytes[2]) << 8) + (((uint)bytes[1]) << 16) + (((uint)bytes[0]) << 24);
}
public static string IntToIP(uint ip_Int)
{
return new IPAddress(ip_Int).ToString();
}
/// <summary>
/// 根據ip查詢多維字段信息
/// </summary>
/// <param name="ip">ip地址(123.4.5.6)</param>
/// <returns>亞洲|中國|香港|九龍|油尖旺|新世界電訊|810200|Hong Kong|HK|114.17495|22.327115</returns>
public string Query(string ip)
{
uint intIP = IpToInt(ip);
uint high = 0;
uint low = 0;
uint startIp = 0;
uint endIp = 0;
uint local_offset = 0;
uint local_length = 0;
uint ip_prefix_value = uint.Parse(ip.Split('.')[0]);
if (prefixDict.ContainsKey(ip_prefix_value))
{
low = (uint)prefixDict[ip_prefix_value].start_index;
high = (uint)prefixDict[ip_prefix_value].end_index;
}
else
{
return "";
}
uint my_index = low == high? low : BinarySearch(low, high, intIP);
GetIndex(my_index, out startIp, out endIp, out local_offset, out local_length);
if ((startIp <= intIP) && (endIp >= intIP))
{
return GetLocal(local_offset, local_length);
}
else
{
return "";
}
}
/// <summary>
/// 二分逼近算法
/// </summary>
public uint BinarySearch(uint low, uint high, uint k)
{
uint M = 0;
while (low <= high)
{
uint mid = (low + high) / 2;
uint endipNum = GetEndIp(mid);
if (endipNum >= k)
{
M = mid; //mid有可能是解
high = mid - 1;
}
else
low = mid + 1;
}
return M;
}
/// <summary>
/// 在索引區解析
/// </summary>
/// <param name="left">ip第left個索引</param>
/// <param name="startip">返回開始ip的數值</param>
/// <param name="endip">返回結束ip的數值</param>
/// <param name="local_offset">返回地址信息的流位置</param>
/// <param name="local_length">返回地址信息的流長度</param>
private void GetIndex(uint left, out uint startip, out uint endip, out uint local_offset, out uint local_length)
{
long left_offset = firstStartIpOffset + (left * 12);
startip = BytesToLong(data[left_offset], data[1 + left_offset], data[2 + left_offset],data[3 + left_offset]);
endip = BytesToLong(data[4+left_offset], data[5 + left_offset], data[6 + left_offset], data[7 + left_offset]);
local_offset = (uint)data[8 + left_offset] + (((uint)data[9 + left_offset]) << 8) + (((uint)data[10 + left_offset]) << 16);
local_length = (uint)data[11 + left_offset];
}
/// <summary>
/// 只獲取結束ip的數值
/// </summary>
/// <param name="left">索引區第left個索引</param>
/// <returns>返回結束ip的數值</returns>
private uint GetEndIp(uint left)
{
long left_offset = firstStartIpOffset + (left * 12);
return BytesToLong(data[4 + left_offset], data[5 + left_offset], data[6 + left_offset], data[7 + left_offset]);
}
/// <summary>
/// 返回地址信息
/// </summary>
/// <param name="local_offset">地址信息的流位置</param>
/// <param name="local_length">地址信息的流長度</param>
/// <returns></returns>
private string GetLocal(uint local_offset, uint local_length)
{
byte[] buf = new byte[local_length];
Array.Copy(data, local_offset, buf, 0, local_length);
return Encoding.UTF8.GetString(buf, 0, (int)local_length);
}
/// <summary>
/// 字節轉整形 小節序
/// </summary>
/// <param name="a"></param>
/// <param name="b"></param>
/// <param name="c"></param>
/// <param name="d"></param>
/// <returns></returns>
private uint BytesToLong(byte a, byte b, byte c, byte d)
{
return ((uint)a << 0) | ((uint)b << 8) | ((uint)c << 16) | ((uint)d << 24);
}
}
/*
(調用例子):
IPSearch finder = new IPSearch("qqzeng-ip.dat");
string result = finder.Query("1.2.3.4");
--> result="亞洲|中國|香港|九龍|油尖旺|新世界電訊|810200|Hong Kong|HK|114.17495|22.327115"
*/
public class PrefixIndex
{
public uint prefix { get; set; }
public long start_index { get; set; }
public long end_index { get; set; }
}
//將來將添加多種語言解析