python3 post請求數據獲取案例

本文轉載自查看原文 2017-11-06 15:34 6309 交流經驗/ 爬蟲分享

# coding=utf-8
import requests
from lxml import etree
import json

class TianYuan:

　　def __init__(self):
　　　　self.url = "http://www.tylaw.com.cn/CN/Team.aspx"
　　　　self.headers = {
　　　　"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36"
　　　　}

# 先請求獲取href拼接url 再請求獲取html頁面

# 設置post請求的data,因為每次的data數據里的currentPage都在變，別處無法獲取下一頁的url,所以遍歷增加currentPage的值，
# 每增加一次就發送一次url請求，得到href值去拼接url.
　　def get_href(self):
"""發送8次post請求"""
# 定義一個空的href列表。
　　　　href_lists = []
　　　　tempdata = {
　　　　"__VIEWSTATEGENERATOR": "CB7A4B54",

　　　　"Lan": "CN",
　　　　"MenuID": "00000000000000000004",

　　　　"currentPage": 1
　　　　}
　　　　for x in range(9):

　　　　　　response = requests.post(self.url, data=tempdata, headers=self.headers)
　　　　　　# 請求一次后currentPage＋１
　　　　　　tempdata['currentPage'] += 1

　　　　　　# 獲取響應對象

　　　　　　r = response.content.decode()
　　　　　　h = etree.HTML(r)

　　　　　　# 獲取href
　　　　　　href_list = h.xpath('//h3/a/@href')
　　　　　　href_lists.append(href_list)
　　　　return href_lists

　　def get_url_list(self, href_lists):
　　　　url_list = []
　　　　for href_list in href_lists:
　　　　　　for i in href_list:
　　　　　　　　url = "http://www.tylaw.com.cn/CN/{}".format(i)
　　　　　　　　url_list.append(url)　
　　　　return url_list

　　　def parse_url(self, url):
　　　　　　response = requests.get(url, headers=self.headers)
　　　　　　return etree.HTML(response.content.decode())

　　def get_content_list(self, html):
　　　　content_list = []
　　　　item = {}
　　　　item["name"] = html.xpath('//*[@id="containerLawyer"]/div/div/div[2]/div[2]/div[1]/div[1]/div/div/text()[2]')[0].strip()
　　　　item["email"] = html.xpath('//*[@id="containerLawyer"]/div/div/div[2]/div[2]/div[1]/div[7]/div/div/text()')[0].strip()
　　　　# print(item)
　　　　content_list.append(item)
　　　　return content_list

　　def save_content(self, content_list):
　　　　with open("tianyuan.json", "a") as f:
　　　　　　for content in content_list:
　　　　　　　　json.dump(content, f, ensure_ascii=False, indent=2)
　　　　　　　　f.write(',\n')

　　def run(self):　　

　　　　"""run函數實現主邏輯"""

　　　　id_list = self.get_href()

　　　　url_list = self.get_url_list(id_list)
　　　　for url in url_list:
　　　　　　html = self.parse_url(url)
　　　　　　content_list = self.get_content_list(html)
　　　　　　self.save_content(content_list)

if __name__ == '__main__':
tianyuan = TianYuan()
tianyuan.run()

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 python3 xpath數據獲取案例 python3 re正則匹配數據獲取案例 python爬蟲筆記（1-1）requests模塊：請求數據獲取響應內容在express中獲取post請求數據 koa2-router中間件來請求數據獲取大數據獲取案例：Python網絡爬蟲實例【openresty】獲取post請求數據FormInputNginxModule模塊 WebAPI獲取客戶端POST請求數據 HttpClient的Post請求數據 vue使用axios請求數據獲取返回值時，如果或取到了數據但不是json數據