爬蟲需要將網頁獲取的結果保存下來,現在先學習csv保存數據;
最終實現代碼:
import requests from bs4 import BeautifulSoup import csv def db(): url = "https://www.douban.com/group/" headers = { "User-Agent":"Mozilla/5.0", "Cookie":'' } ret = requests.get(url,headers = headers) return ret.content #解析網頁,並獲取帖子的url、標題 def get_data(lst,html_data): soup = BeautifulSoup(html_data,"html.parser") for i in soup.find_all("a",attrs="title"): lst.append([i.attrs["href"],i.attrs["title"]]) #保存url、標題到csv文件中 def save_to_csv(lst): with open('test.csv','w',newline='',encoding='utf-8')as f: f_csv = csv.writer(f) for data in lst: f_csv.writerow(data) def main(): Html = db() lst = [] get_data(lst,Html) save_to_csv(lst) main()