# coding=utf-8 import re from urllib.request import urlopen from bs4 import BeautifulSoup # 获取网页标题 def get_url_Title_Description(url): # 获取网页全部信息content content = urlopen(url).read().decode('utf-8') # 正则表达式匹配标题 pat = r'<title>(.*?)</title>' title = re.findall(pat,content) # 提取网页摘要 soup = BeautifulSoup(content,"html.parser") description = soup.find(attrs={"name":"description"})['content'] # 返回标题和摘要 return (title[0],description) # ----------test---------------- # url = "http://www.sina.com.cn/" # title,dsp = get_url_Title_Description(url) # print(title) # print(dsp)