python3 爬蟲之爬取安居客二手房資訊(第一版)

本文轉載自查看原文 2017-04-17 00:27 2968

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Author;Tsukasa



import requests
from bs4 import BeautifulSoup
import pandas
import time


url_all = []
url_in = input('輸入你所需要城市的字母簡寫：\n如：中山 zs ， 廣州 gz\n！！！不要亂輸入，不然運行不了')
url_number = 1+int(input('輸入爬取頁數：'))

okl = []
def open(nobe):
    res = requests.get(nobe)
    soup = BeautifulSoup(res.text,'html5lib')
    http_start = []
    url_start = 'http://esf.'+url_in+'.fang.com'
    for title in soup.select('.houseList dl'):  #網址鏈接列表
        url_end = title.select('.title a ')[0]['href']
        http_start.append(url_start + url_end)
    return http_start


#獲取詳細信息
def content(url):
    info = {}
    info['網頁'] = url
    res = requests.get(url)
    soup = BeautifulSoup(res.text,'html5lib')
    info['標題'] = soup.select('h1')[0].text.strip()  #獲取標題
    info['總價'] = soup.select('.red20b')[0].text + '萬'   #總價
    info['聯系電話'] = soup.select('#mobilecode')[0].text   #電話
    for sl in soup.select('span'):  #獲取發布時間
        if '發布時間' in sl.text.lstrip('<span>'):
            key , value = (sl.text.strip().rstrip('(').split('：'))
            info[key] = value + '*' + soup.select('#Time')[0].text
    for dd in soup.select('dd'):  #獲取詳細內容
        if '：' in dd.text.strip():
            key , value = (dd.text.strip().split('：'))
            info[key] = value
    return info




print('----------正在運行，請不要關閉----------')
url_home = ('http://esf.'+ url_in + '.fang.com/house/i3{}/')
for url_next in range(1,url_number):
    url_all.append((url_home.format(url_next)))

home = []
for i in url_all:
    a = (open(i))
    print('正在獲取 -----> ',i,' <-----')
    time.sleep(1)
    for b in a:
        home.append(content(b))
        print('\t正在獲取詳細信息 -> ',b,' <-----')
        time.sleep(2)

    #home.append(content(open(i[0])))
last = pandas.DataFrame(home)
last.to_excel('temp.xlsx',sheet_name='房源信息')
print('----------運行結束----------\n\n----------查看根目錄---------')


abcdefg = input('完成運行')

　　源碼先奉上，以后在填坑

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 Python開發爬蟲之BeautifulSoup解析網頁篇：爬取安居客網站上北京二手房數據 python爬取安居客二手房網站數據（轉） python爬取安居客二手房網站數據（項目）爬取安居客二手房房屋信息 python3 爬蟲教學之爬取鏈家二手房（最下面源碼） //以更新源碼 python 爬取鏈家二手房信息 Python爬取鏈家二手房信息【Python爬蟲】：爬取58同城二手房的所有房產標題 Python爬蟲入門教程03：二手房數據爬取 python爬蟲：爬取鏈家深圳全部二手房的詳細信息