python 請求網頁



# -*- coding:utf-8 -*-
import urllib.request
from urllib import request, parse
import urllib import re import os import urllib.request from urllib import request, parse url='http://www.baidu.com/' headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'} dict = { 'wd': 'word' } data = bytes(parse.urlencode(dict), encoding='utf8') req = request.Request(url=url, headers=headers) page = request.urlopen(req).read() req = request.Request(url=url, data=data, headers=headers, method='Get') response = request.urlopen(req) req = request.Request(url='www.baidu.com', data=data, method='POST') req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)') url='http://www.baidu.com/s?wd='+urllib.request.quote('') req=urllib.request.Request(url) response=urllib.request.urlopen(req) html = response.read() p=re.compile("<table width=\"30%\".+?</table>",re.S) #HTML_ad存放的是整個推廣版塊的HTML代碼 HTML_ad=p.search(html.decode('utf-8')) if HTML_ad!='none' and HTML_ad !='None' and HTML_ad !=None: HTML_ad=HTML_ad.group()

 

 pyquey:

from pyquery import PyQuery
headerss = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
dict = {        'wd': 'word'    }
d=pq(url=urls,data=dict, headers=headerss)
p=d('div')

 

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM