爬取鏈家、貝殼、大唐二手房數據


一、大唐二手房

public List<HouseInfo> GetData(int page = 1)
        {
            string baseurl = $"https://datang.anjuke.com/gongsi-esf/p{page}";
            HtmlWeb web = new HtmlWeb();
            HtmlDocument doc = web.Load(baseurl);
            HtmlNode ul = doc.GetElementbyId("houselist-mod");
            var list = new List<HouseInfo>();
            foreach (var li in ul.ChildNodes)
            {
                try
                {
                    if (!li.Name.ToLower().Contains("li")) continue;
                    var itemInfo = li.SelectSingleNode("div[@class='house-details']");
                    var url = itemInfo.SelectSingleNode("div[@class='house-title']").ChildNodes[1].Attributes["href"].Value;
                    var title = itemInfo.SelectSingleNode("div[@class='house-title']").InnerText.Replace("\r", "").Replace("\n", "");
                    var detail = itemInfo.SelectNodes("div[@class='details-item']");
                    var info = detail[0].InnerText.Replace("\r", "").Replace("\n", "");
                    var addressInfo = detail[1].InnerText.Replace("\r", "").Replace("\n", "");
                    var tag = itemInfo.SelectSingleNode("div[@class='tags-bottom']").InnerText.Replace("\r", "").Replace("\n", "");

                    var priceInfo = li.SelectSingleNode("div[@class='pro-price']");
                    var amount = priceInfo.SelectSingleNode("span[@class='price-det']").InnerText.Replace("\r", "").Replace("\n", "");
                    var price = priceInfo.SelectSingleNode("span[@class='unit-price']").InnerText.Replace("\r", "").Replace("\n", "");
                    list.Add(new HouseInfo
                    {
                        title = title.Trim(),
                        plot = addressInfo.Split("&nbsp;")[0].Trim(),
                        tag = tag,
                        url = url,
                        address = addressInfo.Split("&nbsp;")[1].Trim(),
                        addressInfo = addressInfo.Trim(),
                        area = info.Split('|')[1].Trim(),
                        housetype = info.Split('|')[0].Trim(),
                        floortype = info.Split("|")[2].Trim(),
                        year = info.Split("|")[3].Split(" &#xE147;")[0].Trim(),
                        itemInfo = info.Trim(),
                        city = addressInfo.Split("&nbsp;")[1].Split("-")[0],
                        amount = amount.Trim(),
                        price = price.Trim()
                    });
                }
                catch (Exception)
                {

                }   
            }
            return list;
        }
View Code

二、貝殼二手房

public List<HouseInfo> GetData(int page = 1)
        {
            string baseurl = $"https://cd.ke.com/ershoufang/pg{page}";
            HtmlWeb web = new HtmlWeb();
            HtmlDocument doc = web.Load(baseurl);
            HtmlNode ul = doc.DocumentNode.SelectSingleNode("//ul[@class='sellListContent']");
            var list = new List<HouseInfo>();
            foreach (var li in ul.ChildNodes)
            {
                try
                {
                    if (!li.HasClass("clear")) continue;
                    var itemInfo = li.SelectSingleNode("div[@class='info clear']");
                    var url = itemInfo.SelectSingleNode("div[@class='title']").ChildNodes[1].Attributes["href"].Value;
                    var title = itemInfo.SelectSingleNode("div[@class='title']").InnerText.Replace("\r", "").Replace("\n", "");

                    var detail = itemInfo.SelectSingleNode("div[@class='address']");
                    var plot = detail.SelectSingleNode("div[@class='flood']").InnerText.Replace("\r", "").Replace("\n", "");

                    var info = detail.SelectSingleNode("div[@class='houseInfo']").InnerText.Replace("\r", "").Replace("\n", "");
                    var tag = detail.SelectSingleNode("div[@class='tag']").InnerText.Replace("\r", "").Replace("\n", "");

                    var priceInfo = detail.SelectSingleNode("div[@class='priceInfo']");
                    var amount = priceInfo.SelectSingleNode("div[@class='totalPrice']").InnerText.Replace("\r", "").Replace("\n", "");
                    var price = priceInfo.SelectSingleNode("div[@class='unitPrice']").InnerText.Replace("\r", "").Replace("\n", "");
                    list.Add(new HouseInfo
                    {
                        title = title.Trim(),
                        plot = plot.Trim(),
                        tag = tag.Trim(),
                        url = url.Trim(),
                        area = info.Split('|')[3].Trim(),
                        toward = info.Split('|')[3].Trim(),
                        housetype = info.Split('|')[2].Trim(),
                        floortype = info.Split("|")[0].Trim(),
                        year = info.Split("|")[1].Trim(),
                        itemInfo = info.Trim(),
                        amount = amount.Trim(),
                        price = price.Trim()
                    });
                }
                catch (Exception)
                {

                }
            }
            return list;
        }
View Code

三、鏈家二手房

public List<HouseInfo> GetData(int page = 1)
        {
            string baseurl = $"https://cd.lianjia.com/ershoufang/pg{page}";
            HtmlWeb web = new HtmlWeb();
            HtmlDocument doc = web.Load(baseurl);
            HtmlNode ul = doc.DocumentNode.SelectSingleNode("//ul[@class='sellListContent']");
            var list = new List<HouseInfo>();
            foreach (var li in ul.ChildNodes)
            {
                try
                {
                    if (!li.HasClass("clear")) continue;
                    var itemInfo = li.SelectSingleNode("div[@class='info clear']");
                    var url = itemInfo.SelectSingleNode("div[@class='title']").ChildNodes[0].Attributes["href"].Value;
                    var title = itemInfo.SelectSingleNode("div[@class='title']").InnerText?.Replace("\r", "").Replace("\n", "");

                    var detail = itemInfo.SelectSingleNode("div[@class='address']");
                    var plot = itemInfo.SelectSingleNode("div[@class='flood']").InnerText?.Replace("\r", "").Replace("\n", "");

                    var info = detail.SelectSingleNode("div[@class='houseInfo']").InnerText?.Replace("\r", "").Replace("\n", "");
                    var tag = itemInfo.SelectSingleNode("div[@class='tag']").InnerText?.Replace("\r", "").Replace("\n", "");

                    var priceInfo = itemInfo.SelectSingleNode("div[@class='priceInfo']");
                    var amount = priceInfo.SelectSingleNode("div[@class='totalPrice']").InnerText?.Replace("\r", "").Replace("\n", "");
                    var price = priceInfo.SelectSingleNode("div[@class='unitPrice']").InnerText?.Replace("\r", "").Replace("\n", "");
                    list.Add(new HouseInfo
                    {
                        title = title.Trim(),
                        plot = plot.Trim(),
                        city=plot.Split('-').Length>1?plot.Split('-')[1].Trim():"",
                        tag = tag.Trim(),
                        url = url.Trim(),
                        area = info.Split('|')[1].Trim(),
                        toward = info.Split('|')[3].Trim(),
                        housetype = info.Split('|')[0].Trim(),
                        floortype = info.Split("|")[4].Trim(),
                        year = info.Split("|")[5].Trim(),
                        itemInfo = info.Trim(),
                        amount = amount.Trim(),
                        price = price.Trim()
                    });
                }
                catch (Exception)
                {

                }
            }
            return list;
        }
View Code

 

四、實體類

public class HouseInfo
    {
        /// <summary>
        /// 標題
        /// </summary>
        public string title { get; set; }
        /// <summary>
        /// 詳情
        /// </summary>
        public string itemInfo { get; set; }
        /// <summary>
        /// 區域
        /// </summary>
        public string addressInfo { get; set; }
        /// <summary>
        /// 地址
        /// </summary>
        public string url { get; set; }
        /// <summary>
        /// 房屋類型
        /// </summary>
        public string housetype { get; set; }
        /// <summary>
        /// 面積
        /// </summary>
        public string area { get; set; }
        /// <summary>
        /// 樓層類型
        /// </summary>
        public string floortype { get; set; }
        /// <summary>
        /// 建設年份
        /// </summary>
        public string year { get; set; }
        /// <summary>
        /// 區域
        /// </summary>
        public string city { get; set; }
        /// <summary>
        /// 具體地址
        /// </summary>
        public string address { get; set; }
        /// <summary>
        /// 小區
        /// </summary>
        public string plot { get; set; }
        /// <summary>
        /// 標簽
        /// </summary>
        public string tag { get; set; }
        /// <summary>
        /// 總價
        /// </summary>
        public string amount { get; set; }
        /// <summary>
        /// 面積
        /// </summary>
        public string price { get; set; }
        /// <summary>
        /// 朝向
        /// </summary>
        public string toward { get; set; }
    }
View Code

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM