python爬取網頁數據方法

本文轉載自查看原文 2019-11-04 20:40 325 python

"""
#最基本,請求地址無參數
# response=urllib.request.urlopen("https://www.scetc.edu.cn")
#
# html=response.read().decode("utf-8")
#
# print(html)

#第二種，傳參數的情況
#參數的轉換參數的原始數據
# key_value={'kw' : '胡歌'}
# #要使用urllib.parse模塊下的urllencode對原始數據進行轉換，並且encode進行編碼
# data=bytes(urllib.parse.urlencode(key_value).encode('utf-8'))
#
# response=urllib.request.urlopen("http://tieba.baidu.com/f?",data=data)
#
# html=response.read().decode('utf-8')
# print(html)

#第三種，傳參數的情況
#timeout是指等待響應的時間

response=urllib.request.urlopen("http://www.scetc.cn",timeout=5)

html=response.read().decode('utf-8')
print(html)

import urllib.request

"""
HttpResponse對象的三個參數屬性
"""

response=urllib.request.urlopen("https://www.tmall.com")

back_url=response.geturl()
print("響應的url:",back_url)

back_code=response.getcode();
print("響應的狀態碼：",back_code)

back_info=response.info()
print("響應的信息：",back_info)

"""
構造Request對象
"""

import urllib.request
import urllib.parse

#頭文件的數據
header={"User-Agent":"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT6.1; Trident/5.0)"}
#發送請求參數數據
params={"news_id":174,"page":1}
data=bytes(urllib.parse.urlencode(params).encode('utf-8'))
#封裝request對象
#地址
url="http://www.scetc.cn/index!detail"
request=urllib.request.Request(url,data=data,headers=header)
#連接類型
request.add_header("Connection", "keep-alive")

#封裝完畢之后openurl方法只需要傳入這個Request對象就可以了
response=urllib.request.urlopen(request)

html=response.read().decode('utf-8')

print(html)

#代理ip
proxy_list=[
    {"http": "124.88.67.81:80"},
    {"http" : "127.88.67.81:80"},
    {"http" : "121.82.67.81:80"},
    {"http" : "124.55.67.81:80"},
    {"http" : "124.56.67.81:80"},
    {"http" : "124.78.67.81:80"},
]
#隨機選取代理服務器地址
ran_proxy=random.choice(proxy_list)
#創建handler對象
httpproxy_handler = urllib.request.ProxyHandler(ran_proxy)
#獲取opener對象
opener = urllib.request.build_opener(httpproxy_handler)

#構建Request對象
header={"User-Agent":"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT6.1; Trident/5.0)"}
request=urllib.request.Request('http://www.scetc.net',headers=header)
#請求訪問
response=opener.open(request)
#獲取響應內容
html=response.read().decode('utf-8')
print(html)

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 python爬取網頁數據 Python：將爬取的網頁數據寫入Excel文件中 C# 爬取網頁數據 curl ——爬取網頁數據 Python 爬蟲爬取多頁數據 python爬蟲教程：實例講解Python爬取網頁數據 python3下scrapy爬蟲(第八卷:循環爬取網頁多頁數據） Asp .Net Core網頁數據爬取筆記如何使用python爬取網頁動態數據使用webdriver+urllib爬取網頁數據(模擬登陸，過驗證碼)