C# 抓取頁面table數據並分析到數據庫


//抓取數據並做分析(保存到數據庫)

public partial class Form1 : Form
{
DataTable dt = new DataTable();
public Form1()
{
InitializeComponent();
label.Visible = false;
progressBar1.Visible = false;
}

public void Bind()
{
// string strStartCity = HttpUtility.UrlEncode(tbStartCity.Text, System.Text.Encoding.GetEncoding("gb2312"));
// string strEndCtiy = HttpUtility.UrlEncode(tbEndCity.Text, System.Text.Encoding.GetEncoding("gb2312"));
#region MyRegion
string firstPage = "C:\\Documents and Settings\\Administrator\\桌面\\c.html";
//string firstPage = "http://www.linkosky.com/UI/AirTicket/SingleFlightShowAllV.aspx? CT=00&JT=01&OC=SHA&DD=2010-05-12&DT=00&DC=PEK&AL=ALL&DR=true&ET=True&SPID=00015032&ORGID=15144";
try
{
WebClient astoWebClient = new WebClient();
astoWebClient.Credentials = CredentialCache.DefaultCredentials; //獲取或設置用於對向Internet資源的請求進行身份驗證的網絡憑據。
Byte[] pageData = astoWebClient.DownloadData(firstPage); //從指定網站下載數據
string pageHtml = Encoding.Default.GetString(pageData); //獲取的網站頁面采用的是GB2312格式
//string pageHtml = Encoding.UTF8.GetString(pageData); //獲取的網站頁面采用的是UTF-8格式
pageHtml = pageHtml.Trim(); //先去掉頭部多余的空格

int m = pageHtml.IndexOf(" <!-- 去程航班表 -->"); //找出"<!--航班信息start-->"的位置
if (m == -1)
{
return; //沒有查找到數據,直接返回
}
string pageText = pageHtml.Remove(0, m + 18); //刪除"<!--航班信息start-->"以上的html文本

int n = pageText.IndexOf("<!--去程航班分頁控制-->"); //找出"<!--航班信息end-->"的位置
string keyText = pageText.Remove(n - 86); //刪除"<!--航班信息end-->"以下的html文本
GetData(keyText);
}
catch (WebException webEx)
{
MessageBox.Show(webEx.ToString());
}
#endregion
}

//分析HTML 數據
private void GetData(string ddd)
{
System.Data.DataRow dr;
dt = new DataTable();
dt.Columns.Add(new System.Data.DataColumn("航空公司", typeof(System.String)));
dt.Columns.Add(new System.Data.DataColumn("航班號", typeof(System.String)));
dt.Columns.Add(new System.Data.DataColumn("機型", typeof(System.String)));
dt.Columns.Add(new System.Data.DataColumn("起飛時間-城市", typeof(System.String)));
dt.Columns.Add(new System.Data.DataColumn("到達時間-城市", typeof(System.String)));
dt.Columns.Add(new System.Data.DataColumn("艙位類型", typeof(System.String)));
dt.Columns.Add(new System.Data.DataColumn("剩余座位", typeof(System.String)));
dt.Columns.Add(new System.Data.DataColumn("票面價", typeof(System.String)));
dt.Columns.Add(new System.Data.DataColumn("返點", typeof(System.String)));
dt.Columns.Add(new System.Data.DataColumn("凈價", typeof(System.String)));

string fileConent = string.Empty;
string tableContent = string.Empty;
string rowContent = string.Empty;
string columnConent = string.Empty;

string rowPatterm = @"<tr[^>]*>[\s\S]*?<\/tr>";
string columnPattern = @"<td[^>]*>[\s\S]*?<\/td>";

dr = dt.NewRow();
MatchCollection rowCollection = Regex.Matches(ddd, rowPatterm, RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture); //對tr進行篩選
for (int i = 1; i < rowCollection.Count; i++)
{
rowContent = rowCollection[i].Value;
MatchCollection columnCollection = Regex.Matches(rowContent, columnPattern, RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture); //對td進行篩選
if (i % 3 != 0)
{
#region 數據篩選
if (i > 2)
{
if (i % 3 !=0 && i % 3 != 2)
{
dr = dt.NewRow();
}
}
else
{
if (i % 2 != 0)
{
dr = dt.NewRow();
}
}
for (int j = 0; j < columnCollection.Count; j++)
{
if (j < 5)
{
columnConent = columnCollection[j].Value;
int iBodyStart = columnConent.IndexOf(">", 0);
int iTableEnd = columnConent.IndexOf("</td>", iBodyStart);
string strWeb = columnConent.Substring(iBodyStart + 1, iTableEnd - iBodyStart - 1); //獲取最終數據
if (i > 2)
{
if (i % 3 != 0 && i % 3 != 2)
{
dr[j] = strWeb;
}
else
{
dr[j + 5] = strWeb;
}
}
else
{
if (i % 2 != 0 )
{
dr[j] = strWeb;
}
else
{
dr[j + 5] = strWeb;
}
}
}
}
if (i > 2)
{
if ((i % 3 == 0) || (i % 3 == 2))
{
dt.Rows.Add(dr);
add(dr[0].ToString(), dr[1].ToString(), dr[2].ToString(), dr[3].ToString(), dr[4].ToString(), dr[5].ToString(), dr[6].ToString(), dr[7].ToString(), dr[8].ToString(), dr[9].ToString());
}
}
else
{
if (i % 2 == 0)
{
dt.Rows.Add(dr);
add(dr[0].ToString(), dr[1].ToString(), dr[2].ToString(), dr[3].ToString(), dr[4].ToString(), dr[5].ToString(), dr[6].ToString(), dr[7].ToString(), dr[8].ToString(), dr[9].ToString());
}
}
#endregion
}
}
}

//添加到數據庫
public void add(string fAirlineName,string fAirlineNo, string fAirlineType, string fsTime_City, string feTime_City, string fSeatType,string fSeatNum, string fPrice, string fBackNum, string fNetPrice)
{
SqlParameter[] ps = new SqlParameter[]
{
new SqlParameter("@fAirlineName",fAirlineName),
new SqlParameter("@fAirlineNo",fAirlineNo),
new SqlParameter("@fAirlineType",fAirlineType),
new SqlParameter("@fsTime_City",fsTime_City),
new SqlParameter("@feTime_City",feTime_City),
new SqlParameter("@fSeatType",fSeatType),
new SqlParameter("@fSeatNum",fSeatNum),
new SqlParameter("@fPrice",fPrice),
new SqlParameter("@fBackNum",fBackNum),
new SqlParameter("@fNetPrice",fNetPrice)
};
try
{
WindowsFormsApplication1.SqlHelper.RunProcedureReturnBool("tAirline_Add", ps);
}
catch (System.Exception e)
{
throw e;
}
}


--------------------- 以下數據和方法是用於在WINFROM下執行(以上部分是關鍵)----------------------------------

private void btnSearch_Click(object sender, EventArgs e)
{
label.Text = "請稍后,系統正在解析數據...";
label.Visible = true;
progressBar1.Visible = true;
btnSearch.Enabled = false;

worker = new BackgroundWorker();
worker.WorkerReportsProgress = true;
worker.WorkerSupportsCancellation = true;
worker.DoWork += new DoWorkEventHandler(worker_DoWork);
worker.ProgressChanged += new ProgressChangedEventHandler(worker_ProgressChanged);
worker.RunWorkerCompleted += new RunWorkerCompletedEventHandler(worker_RunWorkerCompleted);
worker.RunWorkerAsync();
}

private void worker_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
{
try
{
Bind();
if (e.Cancelled)
{
label.Text = "Cancelled";
}
else if (e.Error != null)
{
label.Text = "Error";
}
else
{
btnSearch.Enabled = true;
if (dt != null && dt.Rows.Count > 0)
{
dataGridView1.DataSource = dt;
}
progressBar1.Value = 0;
progressBar1.Visible = false;
label.Visible = false;
}
}
catch (Exception exts)
{
MessageBox.Show(exts.ToString());
}
}

private void worker_DoWork(object sender, DoWorkEventArgs e)
{
MoveList((BackgroundWorker)sender, e);
}

private BackgroundWorker worker = null;

private void MoveList(BackgroundWorker backgroundWorker, DoWorkEventArgs e)
{
for (int i = 0; i < 10; i++)
{
if (worker.CancellationPending)
{
e.Cancel = true;
break;
}
else
{
worker.ReportProgress((i + 1) * (100 / 10), i);
Thread.Sleep(500);
}
}
}

private void worker_ProgressChanged(object sender, ProgressChangedEventArgs e)
{
progressBar1.Value = e.ProgressPercentage;

}

 

   最后執行結果如下圖

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM