1.主題式網絡爬蟲名稱:愛奇藝電視劇熱播數據分析
2.主題式網絡爬蟲爬取的內容:愛奇藝電視劇熱播
3設計方案概述:
實現思路:爬取網站網頁源代碼,得到想要的數據位置,提取數據,之后數據可視化等操作
主題頁面的結構特征分析
1 主題頁面的結構與特征分析
打開網頁 點擊鼠標右鍵 點擊檢查 得到想要的數據位置

可以得到藍框里面的就是我們所需要的數據位置
網絡爬蟲程序設計
1.數據爬取與采集
import requests
def get_url(url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'}
try:
f = requests.get(url,headers=headers)
return f.text
except:
print('產生異常')
def main():
url = 'https://www.iqiyi.com/dianshiju/index.html'
some = get_url(url)
with open('try.txt','w',encoding='utf-8') as f:
f.write(some)
main()

對網頁進行解析:
import bs4
def bs(text):
soup = bs4.BeautifulSoup(text,'html.parser')
return soup
def main():
url='https://list.iqiyi.com/www/2/-------------4-1-1-iqiyi--.html'#電視劇熱門鏈接
some = get_url(url)
soup = bs(some)
with open('test.txt','w',encoding='utf-8')as file:
file .write(soup.text)
main()
解析內容


挑取幾個電視劇做例子
柱狀圖
""" ==================== Horizontal bar chart ==================== This example showcases a simple horizontal bar chart. """ import matplotlib.pyplot as plt plt.rcdefaults() import numpy as np import matplotlib.pyplot as plt plt.rcdefaults() fig, ax = plt.subplots() # Example data #我是余歡水 貓冬 教場 偵探K9 獵狐 people = ('woshiyuhuangshui', 'maodong', 'jiaochang', 'zhentanK9', 'liehu') y_pos = np.arange(len(people)) performance = 3 + 10 * np.random.rand(len(people)) error = np.random.rand(len(people)) ax.barh(y_pos, performance, xerr=error, align='center', color='green', ecolor='black') ax.set_yticks(y_pos) ax.set_yticklabels(people) ax.invert_yaxis() # labels read top-to-bottom ax.set_xlabel('Performance') ax.set_title('iqiyi')#愛奇藝 plt.show()

點狀分布圖
""" =========================== Rotating custom tick labels =========================== Demo of custom tick-labels with user-defined rotation. """ import matplotlib.pyplot as plt x = [1, 2, 3, 4] y = [1, 4, 9, 6] labels = ['liehu', 'maodong', 'jiaochang', 'zhentanL9']#同上用英文代替 plt.plot(x, y, 'ro') # You can specify a rotation for the tick labels in degrees or with keywords. plt.xticks(x, labels, rotation='vertical') # Pad margins so that markers don't get clipped by the axes plt.margins(0.2) # Tweak spacing to prevent clipping of tick-labels plt.subplots_adjust(bottom=0.15) plt.show()

折線圖
import numpy as np from numpy import ma import matplotlib.pyplot as plt x = np.arange(1, 7, 0.4) y0 = np.sin(x) y = y0.copy() + 2.5 plt.step(x, y, label='Y')#Y=我是余歡水 y -= 0.5 plt.step(x, y, where='mid', label='L')#L=獵狐 y -= 0.5 plt.step(x, y, where='post', label='J')#J=教場 y = ma.masked_where((y0 > -0.15) & (y0 < 0.15), y - 0.5) plt.step(x, y, label='M')#M=貓冬 plt.legend() plt.xlim(0, 7) plt.ylim(-0.5, 4) plt.show()

將上述代碼全部合並
import requests def get_url(url): headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'} try: f = requests.get(url,headers=headers) return f.text except: print('產生異常') def main(): url = 'https://list.iqiyi.com/www/2/-------------4-1-1-iqiyi--.html' some = get_url(url) with open('try.txt','w',encoding='utf-8') as f: f.write(some) main() import bs4 def bs(text): soup = bs4.BeautifulSoup(text,'html.parser') return soup def main(): url='https://list.iqiyi.com/www/2/-------------4-1-1-iqiyi--.html'#電視劇熱門鏈接 some = get_url(url) soup = bs(some) with open('test.txt','w',encoding='utf-8')as file: file .write(soup.text) main() #柱狀圖 #隨便舉例幾個 中文圖做不出來 用英文代替 """ ==================== Horizontal bar chart ==================== This example showcases a simple horizontal bar chart. """ import matplotlib.pyplot as plt plt.rcdefaults() import numpy as np import matplotlib.pyplot as plt plt.rcdefaults() fig, ax = plt.subplots() # Example data #我是余歡水 貓冬 教場 偵探K9 獵狐 people = ('woshiyuhuangshui', 'maodong', 'jiaochang', 'zhentanK9', 'liehu') y_pos = np.arange(len(people)) performance = 3 + 10 * np.random.rand(len(people)) error = np.random.rand(len(people)) ax.barh(y_pos, performance, xerr=error, align='center', color='green', ecolor='black') ax.set_yticks(y_pos) ax.set_yticklabels(people) ax.invert_yaxis() # labels read top-to-bottom ax.set_xlabel('Performance') ax.set_title('iqiyi')#愛奇藝 plt.show() """ =========================== Rotating custom tick labels =========================== Demo of custom tick-labels with user-defined rotation. """ import matplotlib.pyplot as plt x = [1, 2, 3, 4] y = [1, 4, 9, 6] labels = ['liehu', 'maodong', 'jiaochang', 'zhentanL9']#同上用英文代替 plt.plot(x, y, 'ro') # You can specify a rotation for the tick labels in degrees or with keywords. plt.xticks(x, labels, rotation='vertical') # Pad margins so that markers don't get clipped by the axes plt.margins(0.2) # Tweak spacing to prevent clipping of tick-labels plt.subplots_adjust(bottom=0.15) plt.show() import numpy as np from numpy import ma import matplotlib.pyplot as plt x = np.arange(1, 7, 0.4) y0 = np.sin(x) y = y0.copy() + 2.5 plt.step(x, y, label='Y')#Y=我是余歡水 y -= 0.5 plt.step(x, y, where='mid', label='L')#L=獵狐 y -= 0.5 plt.step(x, y, where='post', label='J')#J=教場 y = ma.masked_where((y0 > -0.15) & (y0 < 0.15), y - 0.5) plt.step(x, y, label='M')#M=貓冬 plt.legend() plt.xlim(0, 7) plt.ylim(-0.5, 4) plt.show()
結論:
數據爬取要注意細節 比如div 標簽等等 編寫代碼時要注意大小寫 是否英 注意縮進 讓我對bs4庫 seaborn庫 和gallery庫更加了解 是我對python的熱愛更加深了 唯一遺憾的就是學的太慢 代碼經常打錯 很多地方其實不是太懂 所以需要勤加練習阿
