簡單的爬取頁面數據,並生成詞雲和網絡圖
需要安卓 networkx wordcloud 包
代碼如下

1 # @Author :whyCai 2 # @Time :2020/10/17 10:35 3 from time import sleep 4 5 import requests,json,jieba,wordcloud,networkx as nx,jieba.posseg as jp 6 from matplotlib import pyplot as plt 7 8 """ 9 #爬取頁面的數據 10 def getCommText(): 11 ''' 12 爬取頁面的數據 13 :return: 14 ''' 15 text = '' 16 url = 'https://xxxxxxx' 17 headers = {'content-type': 'application/json'} 18 19 for i in range(0,300): 20 data = {"pageIndex": i+1,"xxxx":1} 21 r = requests.post(url, data=json.dumps(data), headers=headers) 22 res = json.loads(r.text) 23 #獲取接口的字段值 24 resContent = res['result']['items'] 25 lenComm = len(resContent) 26 # 獲取接口的字段值 27 for j in range(0,lenComm): 28 # text = text + resContent[j]['content']+' ' 29 print(resContent[j]['content']) 30 sleep(0.2) 31 # print(text) 32 # return text 33 getCommText() 34 """ 35 36 """ 37 #生成詞雲 38 39 #讀取數據 40 f = open('xxx.txt',encoding='utf-8') 41 text = f.read() 42 txtlist = jieba.lcut(text) 43 txtlist = " ".join(txtlist) 44 w = wordcloud.WordCloud(width=1000,height=700,background_color='white',font_path='msyh.ttc') 45 w.generate(txtlist) 46 #生成詞雲 47 w.to_file('output2-poem.png') 48 """ 49 50 51 """ 52 #生成網絡圖 53 54 #text 為 上面詞雲中的 text = f.read() 55 words = jp.lcut(text) 56 G = nx.MultiDiGraph() 57 # 添加節點 58 for word in words: 59 G.add_node(word.flag) 60 # 添加邊 61 for i in range(len(words) - 1): 62 G.add_edge(words[i].flag, words[i+1].flag) 63 # 繪圖 64 nx.draw(G, alpha=0.8, with_labels=True, node_color='lightgreen', font_size=36, node_size=999, width=2) 65 # 展示 66 plt.show() 67 """
參考博客:
詞雲:https://www.cnblogs.com/wkfvawl/p/11585986.html
網絡圖:https://blog.csdn.net/your_answer/article/details/79189660