urllib.request基本使用用法
(1)基本用法
url = "http://www.baidu.com/"
# 獲取response對象
response = urllib.request.urlopen(url)
# 讀取內容 bytes類型
data = response.read()
# 轉化為字符串
str_data = data.decode("utf-8")
# 字符串轉化為bytes
str_name = "baidu"
bytes_name =str_name.encode("utf-8")
(2)url參數中含有漢字的需要轉譯
url = "http://www.baidu.com/s?wd="
name = "python中含有漢字"
final_url = url + name
#網址里面包含了漢字;ascii是沒有漢字的;url轉譯
#將包含漢字的網址進行轉譯
encode_new_url = urllib.parse.quote(final_url,safe=string.printable)
# 使用代碼發送網絡請求
response = urllib.request.urlopen(encode_new_url)
print(response)
#讀取內容
data = response.read().decode()
#保存到本地
with open("02-encode.html","w",encoding="utf-8")as f:
f.write(data)
(3)傳入字典類型的參數
url = "http://www.baidu.com/s?"
params = {
"wd":"中文",
"key":"zhang",
"value":"san"
}
# 字典類型轉譯成參數
str_params = urllib.parse.urlencode(params)
final_url = url + str_params
# 將帶有中文的url 轉譯成計算機可以識別的url
end_url = urllib.parse.quote(final_url,safe=string.printable)
response = urllib.request.urlopen(end_url)
data = response.read().decode("utf-8")
print(data)
(4)添加header
第一種添加header的方式
url = "https://www.baidu.com"
# 添加請求頭的信息
headers = {
# 瀏覽器的版本
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
'name':'chen'
}
# 創建請求對象
request = urllib.request.Request(url, headers=headers)
第二種添加header的方式:動態添加
url = "https://www.baidu.com"
# 創建請求對象
request = urllib.request.Request(url)
# 動態添加請求頭信息
request.add_header("User-Agent",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36")
# 請求網絡數據
response = urllib.request.urlopen(request)
data = response.read().decode("utf-8")
# 獲取到完整的url
final_url = request.get_full_url()
print(final_url)
with open("baidu.html", "w", encoding="utf-8") as f:
f.write(data)
# 獲取請求頭的信息
request_headers = request.headers
print(request_headers)
user_agent = request.get_header('User-agent')
print(user_agent)
# 響應頭
print(response.headers)
(5)使用代理
url = 'https://www.cnblogs.com/chenshy'
# 添加代理
proxy = {
'http': '119.102.25.91:9999'
}
# 代理處理器
proxy_handler = urllib.request.ProxyHandler(proxy)
# 創建自己的opener
opener = urllib.request.build_opener(proxy_handler)
# 拿着代理ip發送請求
data = opener.open(url).read().decode("utf-8")
print(data)
(6) cookie
a.在頭部添加cookie
url = 'https://www.yaozh.com/member/'
headers = {'User_Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
'Cookie':'acw_tc=707c9fd115550786016938465e492bb70702a65dacd78c0969a841171ddf8d; PHPSESSID=t4rb1af4vmks8gete5oqfd6ub7; _ga=GA1.2.521923122.1555078606; _gid=GA1.2.523976398.1555078606; Hm_lvt_65968db3ac154c3089d7f9a4cbb98c94=1555078606; MEIQIA_VISIT_ID=1JlnZOkmbJhJwfRjhyv0gTMf14i; MEIQIA_EXTRA_TRACK_ID=1JlnZL47AayFGs373mZAapsuPKv; yaozh_logintime=1555078687; yaozh_user=729821%09lifelover; yaozh_userId=729821; _gat=1; Hm_lpvt_65968db3ac154c3089d7f9a4cbb98c94=1555078691; yaozh_uidhas=1; yaozh_mylogin=1555078693; acw_tc=707c9fd115550786016938465e492bb70702a65dacd78c0969a841171ddf8d; MEIQIA_VISIT_ID=1JlnZOkmbJhJwfRjhyv0gTMf14i; MEIQIA_EXTRA_TRACK_ID=1JlnZL47AayFGs373mZAapsuPKv'}
request = urllib.request.Request(url,headers=headers)
response = urllib.request.urlopen(request)
data = response.read().decode('utf-8')
print(data)
b.登錄之后獲取cookie ,cookiejar的使用
import urllib.request
from http import cookiejar
from urllib import parse
def login():
# 1.代碼登錄 獲取cookie 帶着cookies請求個人中心
url = 'https://www.yaozh.com/login'
# 登錄的參數
login_form_data = {
'username': 'lifelover',
'pwd': 'chen19960319',
'formhash': 'F456373F7B',
'backurl': 'https%3A%2F%2Fwww.yaozh.com%2F'
}
# 發送登錄請求
cook_jar = cookiejar.CookieJar()
cookie_handler = urllib.request.HTTPCookieProcessor(cook_jar)
opener = urllib.request.build_opener(cookie_handler)
# 帶着參數,發送post請求
headers = {'User_Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
# 1.需要轉譯,轉碼 2.需要byte類型
login_str = urllib.parse.urlencode(login_form_data).encode('utf-8')
request = urllib.request.Request(url,headers=headers,data=login_str)
# 如果登錄成功,cookiejar自動保存cookie
response = opener.open(request)
# 帶着cookie請求個人中心
center = 'https://www.yaozh.com/member/'
center_request = urllib.request.Request(center,headers=headers)
response = opener.open(center_request)
data = response.read().decode('utf-8')
with open('test.html','w',encoding='utf-8') as f:
f.write(data)