python 请求网页



# -*- coding:utf-8 -*-
import urllib.request
from urllib import request, parse
import urllib import re import os import urllib.request from urllib import request, parse url='http://www.baidu.com/' headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'} dict = { 'wd': 'word' } data = bytes(parse.urlencode(dict), encoding='utf8') req = request.Request(url=url, headers=headers) page = request.urlopen(req).read() req = request.Request(url=url, data=data, headers=headers, method='Get') response = request.urlopen(req) req = request.Request(url='www.baidu.com', data=data, method='POST') req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)') url='http://www.baidu.com/s?wd='+urllib.request.quote('') req=urllib.request.Request(url) response=urllib.request.urlopen(req) html = response.read() p=re.compile("<table width=\"30%\".+?</table>",re.S) #HTML_ad存放的是整个推广版块的HTML代码 HTML_ad=p.search(html.decode('utf-8')) if HTML_ad!='none' and HTML_ad !='None' and HTML_ad !=None: HTML_ad=HTML_ad.group()

 

 pyquey:

from pyquery import PyQuery
headerss = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
dict = {        'wd': 'word'    }
d=pq(url=urls,data=dict, headers=headerss)
p=d('div')

 

 


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM