關於使用HtmlAgilityPack


請直接看代碼:

 

復制代碼
         ///   <summary>
        
///  根據輸入的地址獲取其文檔節點對象
        
///   </summary>
        
///   <param name="url"> 地址 </param>
        
///   <returns></returns>
         public  static HtmlAgilityPack.HtmlNode GetHtmlNodeFromLink( string url)
        {
             try{
                Uri uri =  new Uri(url);

                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri);
                WebResponse response = request.GetResponse();

                Stream stream = response.GetResponseStream();
                StreamReader read =  new StreamReader(stream, Encoding.GetEncoding( " gb2312 "));
                 string str = read.ReadToEnd();

                HtmlAgilityPack.HtmlDocument html =  new HtmlAgilityPack.HtmlDocument();
                html.LoadHtml(str);
                 return html.DocumentNode;
            }
             catch{ return  null;}
        }

         ///   <summary>
        
///  根據輸入的URL地址輸出指定XPATH下的節點集合
        
///   </summary>
        
///   <param name="url"> 地址 </param>
        
///   <param name="xPath"> 過濾地址 </param>
        
///   <param name="imgs"> 過濾地址 </param>
        
///   <param name="links"> 過濾地址 </param>
        
///   <param name="title"> 標題 </param>
        
///   <returns></returns>
         public  static  bool GetGalleryInfo(HtmlAgilityPack.HtmlNode htmlNode, string xPath, ref  string[] imgs,  ref  string[] links, ref  string[] title)
        {
             try
            {
                HtmlNodeCollection hnc = htmlNode.SelectNodes(xPath); // " // div[@class='slideBannerA homeSlideAD1']"
                 if (hnc.Count <  1)
                     return  false;
                links =  new  string[hnc.Count];
                title =  new  string[hnc.Count];
                imgs =  new  string[hnc.Count];
                 int i =  0;
                 string cateDataRegex =  @" background-image:url\((?<image>.+)\) ";
                Regex re =  new Regex(cateDataRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace);
                 foreach (HtmlNode node  in hnc)
                {
                    HtmlAttributeCollection hac = node.Attributes;
                    links[i] = hac[ " href "].Value;
                    imgs[i] = hac[ " style "] ==  null ? hac[ " src2 "].Value : re.Match(hac[ " style "].Value).Groups[ " image "].Value;
                    title[i++] =  string.IsNullOrEmpty(hac[ " title "].Value) ? hac[ " alt "].Value : hac[ " title "].Value;
                }
                 return  true;
            }
             catch {  return  false; }
        }
        
         // 調用 
        
             string[] strLink;
             string[] strLinAlt;
             string[] strImg;
             string urls =  " http://www.newegg.com.cn ";
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink(urls);
            GetGalleryInfo(nodes,  " //div[@class='slideBannerA homeSlideAD1']/div[1]/div[1]/a "out strImg,  out strLink, out strLinAlt);
復制代碼

 

淘寶今日活動:

復制代碼
///   <summary>
        
///  淘寶今日活動
        
///   </summary>
        
///   <param name="htmlNode"> 頁面節點集合 </param>
        
///   <param name="xPath"> 選擇的路徑 </param>
        
///   <param name="imgs"> 圖片集合 </param>
        
///   <param name="links"> 鏈接集合 </param>
        
///  調用:
        
///     string[] strLink;
        
///     string[] strImg;
        
///     HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink(" http://www.taobao.com ");
        
///     GetTaobaoGalleryInfo(nodes, "//div[@class='sub-promotion-content']/div[@class='ks-switchable-content zoom']/ul/li", out strImg, out strLink);
        
///   <returns></returns>
         public  static  bool GetTaobaoGalleryInfo(HtmlAgilityPack.HtmlNode htmlNode,  string xPath,  out  string[] imgs,  out  string[] links) // , ref string[] title)
        {
            HtmlNodeCollection hnc = htmlNode.SelectNodes(xPath); // " // div[@class='slideBannerA homeSlideAD1']"
            links =  new  string[hnc.Count];
            imgs =  new  string[hnc.Count];
             try
            {
                 if (hnc.Count <  1)
                     return  false;
                 int i =  0;
                 foreach (HtmlNode node  in hnc)
                {
                    links[i] = node.ChildNodes[ 1].Attributes[ " href "].Value;
                    imgs[i++] = node.ChildNodes[ 1].ChildNodes[ 0].Attributes[ " src "].Value;
                }
                 return  true;
            }
             catch {  return  false; }
        }
復制代碼

 

 

復制代碼
  // 今日炸彈
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://www.newegg.com.cn/ ");

            HtmlAgilityPack.HtmlNode node = nodes.SelectSingleNode( " //div[@class='colSub']/div[@class='picBanner shellShocker ']/a "); // " // div[@class='slideBannerA homeSlideAD1']"
           
             string strImg = node.Attributes[ " href "].Value;
             string strSrc= node.ChildNodes[ 0].Attributes[ " src "].Value;
復制代碼

 

 

復制代碼
             // 淘寶類別活動
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://www.taobao.com ");
            HtmlAgilityPack.HtmlNodeCollection node = nodes.SelectNodes( " //span[@class='category-pop']/a "); // " // div[@class='slideBannerA homeSlideAD1']"

             string[] strLink =  new  string[node.Count];
             string[] strText =  new  string[node.Count];

             try
            {
                 int i =  0;
                 foreach (HtmlNode htmlNode  in node)
                {
                    strLink[i] = htmlNode.Attributes[ " href "].Value;
                    strText[i++] = htmlNode.InnerText;
                }
            }
             catch { }
復制代碼

 

 

復制代碼
// 淘寶-服侍-新品推薦
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://fushi.taobao.com ");
            HtmlAgilityPack.HtmlNodeCollection node = nodes.SelectNodes( " //div[@class='new-product-image-list']/ul[@class='image-list']/li "); // " // div[@class='slideBannerA homeSlideAD1']"

             string[] strLink =  new  string[node.Count];
             string[] strImg =  new  string[node.Count];
             string[] strAlt =  new  string[node.Count];

             try
            {
                 int i =  0;
                 foreach (HtmlNode htmlNode  in node)
                {
                    strLink[i] = htmlNode.ChildNodes[ 0].Attributes[ " href "].Value;
                    strAlt[i] = htmlNode.ChildNodes[ 0].ChildNodes[ 1].InnerHtml;
                    strImg[i++] = htmlNode.ChildNodes[ 0].ChildNodes[ 0].Attributes[ " src "].Value;
                }
            }
             catch { }
復制代碼

 

 

復制代碼
// 針織衫推薦
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://rihan.vancl.com/ ", " UTF-8 ");
            HtmlAgilityPack.HtmlNodeCollection node = nodes.SelectNodes( " //div[@class='prod_area']/ul/li "); // " // div[@class='slideBannerA homeSlideAD1']"

             string[] strLink =  new  string[node.Count];
             string[] strImg =  new  string[node.Count];
             string[] strAlt =  new  string[node.Count];
             string[] strPrice =  new  string[node.Count];
             string[] strCurrentPrice =  new  string[node.Count];

             int i =  0;
             foreach (HtmlNode htmlNode  in node)
            {
                 try
                {
                    strLink[i] = htmlNode.ChildNodes[ 0].Attributes[ " href "].Value;
                    strAlt[i] = htmlNode.ChildNodes[ 4].ChildNodes[ 1].InnerHtml.Trim();
                    strImg[i] = htmlNode.ChildNodes[ 0].ChildNodes[ 1].Attributes[ " src "].Value;
                    strPrice[i] = htmlNode.ChildNodes[ 6].ChildNodes[ 1].ChildNodes[ 1].InnerHtml.Trim().Replace( " """);
                    strCurrentPrice[i++] = htmlNode.ChildNodes[ 6].ChildNodes[ 2].InnerHtml.Trim().Replace( " 售價¥ """);
                }
                 catch { }
            }
復制代碼

 

 

復制代碼
         private  void button8_Click( object sender, EventArgs e)
        {
             // http://www.masamaso.com   商品列表
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://www.masamaso.com/ "" UTF-8 ");
            HtmlAgilityPack.HtmlNodeCollection node = nodes.SelectNodes( " //ul/li/div[@class='goods_case'] "); // " // div[@class='slideBannerA homeSlideAD1']"

             string[] strLink =  new  string[node.Count];
             string[] strImg =  new  string[node.Count];
             string[] strAlt =  new  string[node.Count];
             string[] strPrice =  new  string[node.Count];
             string[] strCurrentPrice =  new  string[node.Count];

             int i =  0;
             foreach (HtmlNode htmlNode  in node)
            {
                 try
                {
                    strLink[i] =  " http://www.masamaso.com/ " + htmlNode.ChildNodes[ 1].ChildNodes[ 0].Attributes[ " href "].Value;
                    strAlt[i] = htmlNode.ChildNodes[ 1].ChildNodes[ 0].Attributes[ " title "].Value;
                    strImg[i] = htmlNode.ChildNodes[ 1].ChildNodes[ 0].ChildNodes[ 0].Attributes[ " src "].Value;
                     // strPrice[i] = htmlNode.ChildNodes[6].ChildNodes[1].ChildNodes[1].InnerHtml.Trim().Replace("¥", "");
                    strCurrentPrice[i++] = htmlNode.ChildNodes[ 3].ChildNodes[ 1].ChildNodes[ 1].ChildNodes[ 0].InnerHtml.Trim().Replace( " &yen; """);
                }
                 catch 
                { }
            }
        }

         private  void button9_Click( object sender, EventArgs e)
        {
             // http://www.masamaso.com/   彈出廣告
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://www.masamaso.com/ "" UTF-8 ");
            HtmlAgilityPack.HtmlNodeCollection node = nodes.SelectNodes( " //div[@class='foot_img tabContainer']/div[@class='tabBox']/div[@class='hd_tp'] "); // " // div[@class='slideBannerA homeSlideAD1']"

             string[] strLink =  new  string[node.Count];
             string[] strImg =  new  string[node.Count];
             string[] strAlt =  new  string[node.Count];
             string[] strPrice =  new  string[node.Count];
             string[] strCurrentPrice =  new  string[node.Count];

             int i =  0;
             foreach (HtmlNode htmlNode  in node)
            {
                 try
                {
                    strLink[i] = htmlNode.ChildNodes[ 0].Attributes[ " href "].Value;
                     // strAlt[i] = htmlNode.ChildNodes[1].ChildNodes[0].Attributes["title"].Value;
                    strImg[i++] = htmlNode.ChildNodes[ 0].ChildNodes[ 0].Attributes[ " src "].Value;
                     // strPrice[i] = htmlNode.ChildNodes[6].ChildNodes[1].ChildNodes[1].InnerHtml.Trim().Replace("¥", "");
                    
// strCurrentPrice[i++] = htmlNode.ChildNodes[3].ChildNodes[1].ChildNodes[1].ChildNodes[0].InnerHtml.Trim().Replace("&yen;", "");
                }
                 catch 
                { }
            }

            Func();
        }

         private  void Func()
        {
             // http://www.vivian.cn/   彈出廣告
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://www.vivian.cn/ "" UTF-8 ");
            HtmlAgilityPack.HtmlNodeCollection node = nodes.SelectNodes( " //div[@class='foot_img tabContainer']/div[@class='tabBox']/div[@class='hd_tp'] "); // " // div[@class='slideBannerA homeSlideAD1']"

             string[] strLink =  new  string[node.Count];
             string[] strImg =  new  string[node.Count];
             string[] strAlt =  new  string[node.Count];
             string[] strPrice =  new  string[node.Count];
             string[] strCurrentPrice =  new  string[node.Count];

             int i =  0;
             foreach (HtmlNode htmlNode  in node)
            {
                 try
                {
                    strLink[i] = htmlNode.ChildNodes[ 0].Attributes[ " href "].Value;
                     // strAlt[i] = htmlNode.ChildNodes[1].ChildNodes[0].Attributes["title"].Value;
                    strImg[i++] = htmlNode.ChildNodes[ 0].ChildNodes[ 0].Attributes[ " src "].Value;
                     // strPrice[i] = htmlNode.ChildNodes[6].ChildNodes[1].ChildNodes[1].InnerHtml.Trim().Replace("¥", "");
                    
// strCurrentPrice[i++] = htmlNode.ChildNodes[3].ChildNodes[1].ChildNodes[1].ChildNodes[0].InnerHtml.Trim().Replace("&yen;", "");
                }
                 catch 
                { }
            }

        }

         private  void button10_Click( object sender, EventArgs e)
        {
             // http://www.vivian.cn/ "   產品列表
             HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://www.vivian.cn/ "" UTF-8 ");
             HtmlAgilityPack.HtmlNodeCollection node = nodes.SelectNodes( " //div[@class='goods_list']/ul/li "); // " // div[@class='slideBannerA homeSlideAD1']"

             string[] strLink =  new  string[node.Count];
             string[] strImg =  new  string[node.Count];
             string[] strAlt =  new  string[node.Count];
             string[] strPrice =  new  string[node.Count];
             string[] strCurrentPrice =  new  string[node.Count];

             int i =  0;
             foreach (HtmlNode htmlNode  in node)
            {
                 try
                {
                    strLink[i] =  " http://www.vivian.cn/ " + htmlNode.ChildNodes[ 1].ChildNodes[ 1].ChildNodes[ 0].Attributes[ " href "].Value;
                    strAlt[i] = htmlNode.ChildNodes[ 1].ChildNodes[ 1].ChildNodes[ 0].Attributes[ " title "].Value;
                    strImg[i] = htmlNode.ChildNodes[ 1].ChildNodes[ 1].ChildNodes[ 0].ChildNodes[ 0].Attributes[ " src "].Value;
                     // strPrice[i] = htmlNode.ChildNodes[6].ChildNodes[1].ChildNodes[1].InnerHtml.Trim().Replace("¥", "");
                    strCurrentPrice[i++] = htmlNode.ChildNodes[ 1].ChildNodes[ 3].ChildNodes[ 3].ChildNodes[ 0].ChildNodes[ 1].InnerHtml.Trim().Replace( " &yen; """);
                }
                 catch 
                { }
            }
        }
復制代碼

 

 

 

 

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM