import requests
import pickle
from bs4 import BeautifulSoup
# 提交表單登錄並獲取cookie
def get_cookie_from_net():
url = "https://accounts.douban.com/j/mobile/login/basic"
# 構建表單
payload = {
'ck': '',
'name': '賬號',
'password': '密碼',
'remember': 'false'
}
# 書上沒有這句代碼,讓我找錯誤半天
# 必須新建session,先GET請求,然后POST才能成功
data_get = s.get(url, headers=headers)
# 登陸多了,會有圖形驗證碼,需要你從網站登陸,取消驗證碼
data = s.post(url, headers=headers, data=payload).json()
# 檢測登錄是否成功
if data["status"] == "success":
print("登陸成功!")
with open('cookies.douban', 'wb') as f:
cookiedict = requests.utils.dict_from_cookiejar(s.cookies)
pickle.dump(cookiedict, f)
print("成功獲取cookies!")
return s.cookies
# 從cookie文件獲取cookie
def get_cookie_from_file():
with open('cookies.douban', 'rb') as f:
cookiedict = pickle.load(f)
cookies = requests.utils.cookiejar_from_dict(cookiedict)
print("解析文件,成功提取cookis...")
return cookies
# 假設這里我要獲取自己的簽名數據
def getdata(html):
soup = BeautifulSoup(html.text, 'lxml')
# print(soup.text)
# 獲取數據
# 看清楚要獲取的數據是id還是class,我一開始學習,總會弄反,找不到數據
# #footer這個是底部數據
mydata = soup.select('#footer')[0].get_text()
'''
這里進行登錄后其他數據的獲取及存儲,這里僅僅獲取了自己的簽名數據。
'''
return mydata
def login_and_getdata():
print('獲取cookis...')
try:
s.cookies = get_cookie_from_file()
except:
print("從文件獲取cookies失敗...\n正在嘗試提交表單登錄以獲取...")
s.cookies = get_cookie_from_net()
html = s.get('https://www.douban.com/people/153003252/', headers=headers)
# print(html.text)
data = getdata(html)
print(data)
if __name__ == '__main__':
# 一些全局變量
s = requests.session()
# 這里務必更換
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"}
# 登錄並獲取數據
login_and_getdata()
