#!/usr/bin/env python # -*- coding:utf-8 -*- #爬蟲,搜索熱點排行 import urllib.request import urllib import re import json import xlwt import os #獲取網站首頁全部內容 cnt = 50 #只能1-50 url = 'https://zhidao.baidu.com/question/api/hotword?rn='+cnt.__str__()+'&t=1535421904906' print(url) user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6' req = urllib.request.Request(url, headers={'User-Agent': user_agent}) response = urllib.request.urlopen(req) content = response.read().decode('utf-8') #print(content) workbook = xlwt.Workbook() sheet1 = workbook.add_sheet('sheet1',cell_overwrite_ok=True) sheet1.write(0,0,'排名') sheet1.write(0,1,'新聞名稱') sheet1.write(0,2,'搜索人數') sheet1.write(0,3,'變化數量') sheet1.write(0,4,'新的新聞') sheet1.write(0,5,'熱度上升') dataList = json.loads(content)['data'] j = 1 for data in dataList: print(data) sheet1.write(j, 0,j) sheet1.write(j, 1,data['keyword']) sheet1.write(j, 2, data['searches']) sheet1.write(j, 3, data['changeRate']) isNew = data['isNew']; if isNew==0: isNew = '否' elif isNew==1: isNew = '是' sheet1.write(j, 4, isNew.__str__()) trend = data['trend'] style5 = xlwt.XFStyle() font = xlwt.Font() style5.font = font if trend == 'fall': font.colour_index = 3 trend = '下降' elif trend == 'rise': font.colour_index = 2 trend = '上升' sheet1.write(j, 5, trend,style5) j = j + 1 #保存該excel文件,有同名文件時直接覆蓋 path = 'D:\\Python' if not os.path.isdir(path): os.makedirs(path) paths = path + '\\' filename = 'test1' workbook.save('{}{}.xls'.format(paths,filename)) print('創建excel文件完成!')
百度時候總能看到熱搜排行,以上代碼就是爬蟲獲取排行