# coding=utf-8 import re from urllib.request import urlopen from bs4 import BeautifulSoup # 獲取網頁標題 def get_url_Title_Description(url): # 獲取網頁全部信息content content = urlopen(url).read().decode('utf-8') # 正則表達式匹配標題 pat = r'<title>(.*?)</title>' title = re.findall(pat,content) # 提取網頁摘要 soup = BeautifulSoup(content,"html.parser") description = soup.find(attrs={"name":"description"})['content'] # 返回標題和摘要 return (title[0],description) # ----------test---------------- # url = "http://www.sina.com.cn/" # title,dsp = get_url_Title_Description(url) # print(title) # print(dsp)