程序師世界是廣大編程愛好者互助、分享、學習的平台,程序師世界有你更精彩!
首頁
編程語言
C語言|JAVA編程
Python編程
網頁編程
ASP編程|PHP編程
JSP編程
數據庫知識
MYSQL數據庫|SqlServer數據庫
Oracle數據庫|DB2數據庫
 程式師世界 >> 編程語言 >> .NET網頁編程 >> C# >> C#入門知識 >> 點滴積累【C#】---抓取頁面中想要的數據,

點滴積累【C#】---抓取頁面中想要的數據,

編輯:C#入門知識

點滴積累【C#】---抓取頁面中想要的數據,


效果

描述:此功能是抓取外國的一個檢測PM2.5的網站。實時讀取網站的數據,然後保存到數據庫裡面。每隔一小時刷新一次。

地址為:http://beijing.usembassy-china.org.cn/070109air.html

篩選後的地址為:http://utils.usembassy.gov/feed2js/feed2js.php?src=http%3A%2F%2Fwww.stateair.net%2Fweb%2Frss%2F1%2F1.xml&desc=1&num=7&targ=y&utf=y&pc=y&words=40&

思路:先抓取到頁面的所有數據,保存到txt裡面,再一行一行的讀取txt,然後用split,substring截取到自己想要的數據,最後保存到數據庫,在進行插入數據庫的時候查看一下是否已經存在,如果不存在則插入。

代碼

using System;
using System.Collections.Generic;
using System.Configuration;
using System.Data;
using System.Data.SqlClient;
using System.IO;
//using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
//using System.Threading.Tasks;

/********************************
 * 創建人:青蘋果
 * 創建時間:2015-12-28
 * 描述:獲取美利堅合眾國的 PM2.5
 * ******************************/

namespace GetUSAData
{
    class Program
    {
        //public static string GetURL = System.Configuration.ConfigurationSettings.AppSettings["GetURL"];//獲取數據的地址
        public static string GetURL = "http://utils.usembassy.gov/feed2js/feed2js.php?src=http%3A%2F%2Fwww.stateair.net%2Fweb%2Frss%2F1%2F1.xml&desc=1&num=7&targ=y&utf=y&pc=y&words=40&";
        public static string txtURL = System.Configuration.ConfigurationSettings.AppSettings["txtURL"];//保存為txt文件的路徑
        public static string conn = ConfigurationManager.ConnectionStrings["ConnectionString"].ToString();

        static void Main(string[] args)
        {

            LoadGO();
        }

        public static void LoadGO()
        {
            GetUSA();
            List<string[]> getlist = Read(txtURL);
            //刪除txt
            if (File.Exists(txtURL))
            {
                //如果存在則刪除
                File.Delete(txtURL);
            }
            if (getlist.Count > 0)
            {
                for (int i = getlist.Count-1; i >-1; i--)
                {
                    DateTime dtime = DateTime.Parse(getlist[i][0].ToString());
                    string getTime = dtime.ToString("yyyy-MM-dd HH:mm");
                    string controlTime = dtime.ToString("yyyy-MM-dd");
                    float LatestHourdata1 = float.Parse(getlist[i][2]);
                    int LatestHourdata2 = Convert.ToInt32(getlist[i][3]);
                    float Avgdata1 = 0;
                    int Avgdata2 = 0;
                    string Avgdata3 = getlist[i][4].ToString();

                    List<SqlParameter> listWhere = new List<SqlParameter>();
                    listWhere.Add(new SqlParameter("@strDatetime", controlTime));
                    string sqlSelect = @"SELECT count(*) as allcount,sum(LatestHourdata1) as LatestHourdata1_avg, sum(LatestHourdata2) as LatestHourdata2_avg
 FROM T_twitter  where ([LatestHourdata1] is not null
 or [LatestHourdata2] is not null or [Avgdata1] is not null
  or [AvgData2] is not null) and   CONVERT(varchar(100), [datetime], 23)=@strDatetime";

                    DataTable sumDT = ControlDB(sqlSelect, listWhere, "select");    //查詢總和用於計算日均值
                    if (sumDT.Rows.Count > 0)
                    {
                        foreach (DataRow itemDR in sumDT.Rows)
                        {
                            int allcount = Convert.ToInt32(itemDR["allcount"].ToString());    //數據庫中當前日期數量總和
                            if (allcount > 0)
                            {
                                if (itemDR["LatestHourdata1_avg"] != null)
                                {
                                    Avgdata1 = float.Parse(itemDR["LatestHourdata1_avg"].ToString());   //數據庫中LatestHourdata1_avg總和
                                    Avgdata1 = (Avgdata1 + LatestHourdata1) / (allcount + 1);//(數據庫的總和+最新的一條)/(數據庫的總和數量+1)=日平均值
                                }
                                if (itemDR["LatestHourdata2_avg"] != null)
                                {
                                    Avgdata2 = Convert.ToInt32(itemDR["LatestHourdata2_avg"].ToString());   //數據庫中LatestHourdata2_avg總和
                                    Avgdata2 = (Avgdata2 + LatestHourdata2) / (allcount + 1);//(數據庫的總和+最新的一條)/(數據庫的總和數量+1)=日平均值
                                }
                                //根據網站規則判斷PM2.5的平均嚴重性

                                if (Avgdata2 >= 0 && Avgdata2 <= 50)
                                {
                                    Avgdata3 = " Good (at 24-hour exposure at this level)";
                                }
                                else if (Avgdata2 >= 51 && Avgdata2 <= 100)
                                {
                                    Avgdata3 = " Moderate (at 24-hour exposure at this level)";
                                }
                                else if (Avgdata2 >= 101 && Avgdata2 <= 150)
                                {
                                    Avgdata3 = " Unhealthy for Sensitive Groups (at 24-hour exposure at this level)";
                                }
                                else if (Avgdata2 >= 151 && Avgdata2 <= 200)
                                {
                                    Avgdata3 = " Unhealthy (at 24-hour exposure at this level)";
                                }
                                else if (Avgdata2 >= 201 && Avgdata2 <= 300)
                                {
                                    Avgdata3 = " Very Unhealthy (at 24-hour exposure at this level)";
                                }
                                else
                                {
                                    Avgdata3 = " Hazardous (at 24-hour exposure at this level)";
                                }
                            }
                            else
                            {
                                Avgdata1 = LatestHourdata1;
                                Avgdata2 = LatestHourdata2;
                            }
                        }
                    }

                    List<SqlParameter> pars = new List<SqlParameter>();
                    pars.Add(new SqlParameter("@whereDatetime", getTime));
                    pars.Add(new SqlParameter("@datetime", getTime));
                    pars.Add(new SqlParameter("@LatestHourdata1", LatestHourdata1));
                    pars.Add(new SqlParameter("@LatestHourdata2", LatestHourdata2));
                    pars.Add(new SqlParameter("@LatestHourdata3", getlist[i][4].ToString()));
                    pars.Add(new SqlParameter("@Avgdata1", Avgdata1));
                    pars.Add(new SqlParameter("@Avgdata2", Avgdata2));
                    pars.Add(new SqlParameter("@Avgdata3", Avgdata3));

                    string sql = @"if not exists(select * from  dbo.T_twitter where  datetime=@whereDatetime) begin
insert T_twitter (datetime,LatestHourdata1,LatestHourdata2,LatestHourdata3,Avgdata1,AvgData2,AvgData3)
VALUES(@datetime,@LatestHourdata1,@LatestHourdata2,@LatestHourdata3,@Avgdata1,@Avgdata2,@Avgdata3) end";
                    ControlDB(sql, pars, "");//插入數據
                }
            }
        }

        /// <summary>
        /// 獲取頁面數據保存至txt
        /// </summary>
        public static void GetUSA()
        {
            WebRequest request = WebRequest.Create(GetURL);
            WebResponse response = request.GetResponse();
            StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("gb2312"));
            //reader.ReadToEnd() 表示取得網頁的源碼

            FileStream fs = new FileStream(txtURL, FileMode.Create);
            byte[] data = System.Text.Encoding.Default.GetBytes(reader.ReadToEnd());
            //開始寫入
            fs.Write(data, 0, data.Length);
            //清空緩沖區、關閉流
            fs.Flush();
            fs.Close();
        }

        /// <summary>
        /// 根據路徑讀取txt文件
        /// </summary>
        /// <param name="path">txt路徑</param>
        /// <returns></returns>
        public static List<string[]> Read(string path)
        {
            List<string[]> list = new List<string[]>();
            StreamReader sr = new StreamReader(path, Encoding.Default);
            String line;
            while ((line = sr.ReadLine()) != null)
            {
                int i = line.ToString().IndexOf("title");
                if (i > 0)
                {
                    string titleStr = line.ToString().Substring(i + 7); //截取到title後面的值
                    string[] titlelist = titleStr.Split('"');        //以"  截取
                    string titledata = titlelist[0];
                    string[] datalist = titledata.Split('&');  //以& 截取
                    string data = datalist[0];
                    string[] datastrlist = data.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries);//以; 截取
                    list.Add(datastrlist);
                }
            }
            sr.Close();
            return list;
        }

        /// <summary>
        /// 增查表
        /// </summary>
        /// <returns></returns>
        public static DataTable ControlDB(string sql, List<SqlParameter> par, string type)
        {
            DataAccess controData = new DataAccess();
            DataTable resultDT = new DataTable();
            if (type == "select")
            {
                resultDT = controData.GetDataTable(sql, par.ToArray());
            }
            else
            {
                int result = controData.ExecuteSql(sql, par.ToArray());
            }
            return resultDT;
        }
    }
}

 Demo下載:

 http://files.cnblogs.com/files/xinchun/GetUSAData.zip

  1. 上一頁:
  2. 下一頁:
Copyright © 程式師世界 All Rights Reserved