爬蟲—天眼查接口函數

本文轉載自查看原文 2016-04-13 14:17 8613 python

from selenium import webdriver
import time
import re
from bs4 import BeautifulSoup
import urllib

#獲取企業基本信息數據
def get_enterprise_data(ename):
#搜索頁面鏈接地址
keyword = urllib.parse.quote(ename)
url = 'http://www.tianyancha.com/search/'+keyword
#獲得搜索結果頁面
driver = webdriver.PhantomJS(executable_path='/root/phantomjs-2.1.1-linux-x86_64/bin/phantomjs')
driver.maximize_window()
driver.get(url)
time.sleep(2)
#從搜索結果中點擊第一個結果
driver.find_element_by_class_name('query_name').click()
time.sleep(2)
#抓取第一個結果的網頁，匹配出需要的字段
soup = BeautifulSoup(driver.page_source,"html.parser")
basic_info_list = soup.find_all('p',class_="ng-binding ng-scope")
data = []
qiyemingcheng = driver.title.split('】')[1].split('信息查詢')[0]
data.append(qiyemingcheng)
for i in basic_info_list:
data.append(i.get_text().strip())

return data

print(get_enterprise_data('科潤智能'))

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 學習Python3 天眼查爬蟲爬蟲案例—中基協、天眼查數據爬取天眼查sign 算法破解 python應用：selenium之爬取天眼查信息【java爬蟲】---爬蟲+基於接口的網絡爬蟲使用 SpringBoot 寫增刪改查接口 python之retry函數（爬蟲可用） python爬蟲（二） urlparse和urlsplit函數 c++ 查重+排序函數函數式接口