爬取愛奇藝的熱播電視劇


1.主題式網絡爬蟲名稱:愛奇藝電視劇熱播數據分析

2.主題式網絡爬蟲爬取的內容:愛奇藝電視劇熱播

3設計方案概述:

實現思路:爬取網站網頁源代碼,得到想要的數據位置,提取數據,之后數據可視化等操作

 

主題頁面的結構特征分析

1 主題頁面的結構與特征分析

打開網頁 點擊鼠標右鍵 點擊檢查 得到想要的數據位置

 

 

可以得到藍框里面的就是我們所需要的數據位置

 

    網絡爬蟲程序設計

1.數據爬取與采集

 
         

import requests

 
         

def get_url(url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'}
try:
f = requests.get(url,headers=headers)
return f.text

except:
print('產生異常')

def main():
url = 'https://www.iqiyi.com/dianshiju/index.html'

some = get_url(url)

with open('try.txt','w',encoding='utf-8') as f:
f.write(some)

main()

 

 

對網頁進行解析:

 
         

import bs4
def bs(text):
soup = bs4.BeautifulSoup(text,'html.parser')
return soup
def main():
url='https://list.iqiyi.com/www/2/-------------4-1-1-iqiyi--.html'#電視劇熱門鏈接

some = get_url(url)

soup = bs(some)

with open('test.txt','w',encoding='utf-8')as file:
file .write(soup.text)

 
         

main()

 

解析內容

 

 

 

 

 

 

 

挑取幾個電視劇做例子

柱狀圖

"""
====================
Horizontal bar chart
====================

This example showcases a simple horizontal bar chart.
"""
import matplotlib.pyplot as plt
plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt


plt.rcdefaults()
fig, ax = plt.subplots()

# Example data
#我是余歡水 貓冬 教場 偵探K9 獵狐
people = ('woshiyuhuangshui', 'maodong', 'jiaochang', 'zhentanK9', 'liehu')
y_pos = np.arange(len(people))
performance = 3 + 10 * np.random.rand(len(people))
error = np.random.rand(len(people))

ax.barh(y_pos, performance, xerr=error, align='center',
        color='green', ecolor='black')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.invert_yaxis()  # labels read top-to-bottom
ax.set_xlabel('Performance')
ax.set_title('iqiyi')#愛奇藝

plt.show()

 

 

點狀分布圖

"""
===========================
Rotating custom tick labels
===========================

Demo of custom tick-labels with user-defined rotation.
"""
import matplotlib.pyplot as plt


x = [1, 2, 3, 4]
y = [1, 4, 9, 6]
labels = ['liehu', 'maodong', 'jiaochang', 'zhentanL9']#同上用英文代替

plt.plot(x, y, 'ro')
# You can specify a rotation for the tick labels in degrees or with keywords.
plt.xticks(x, labels, rotation='vertical')
# Pad margins so that markers don't get clipped by the axes
plt.margins(0.2)
# Tweak spacing to prevent clipping of tick-labels
plt.subplots_adjust(bottom=0.15)
plt.show()

 

 折線圖

import numpy as np
from numpy import ma
import matplotlib.pyplot as plt

x = np.arange(1, 7, 0.4)
y0 = np.sin(x)
y = y0.copy() + 2.5

plt.step(x, y, label='Y')#Y=我是余歡水

y -= 0.5
plt.step(x, y, where='mid', label='L')#L=獵狐

y -= 0.5
plt.step(x, y, where='post', label='J')#J=教場

y = ma.masked_where((y0 > -0.15) & (y0 < 0.15), y - 0.5)
plt.step(x, y, label='M')#M=貓冬

plt.legend()

plt.xlim(0, 7)
plt.ylim(-0.5, 4)

plt.show()

 

 將上述代碼全部合並

import requests

def get_url(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'}
    try:
        f = requests.get(url,headers=headers)
        return f.text
    
    except:
        print('產生異常')
        
def main():
    url = 'https://list.iqiyi.com/www/2/-------------4-1-1-iqiyi--.html'
        
    some = get_url(url)
        
    with open('try.txt','w',encoding='utf-8') as f:
        f.write(some)
            
main()


import bs4
def bs(text):
    soup = bs4.BeautifulSoup(text,'html.parser')
    return soup
def main():
    url='https://list.iqiyi.com/www/2/-------------4-1-1-iqiyi--.html'#電視劇熱門鏈接
    
    some = get_url(url)
    
    soup = bs(some)
    
    with open('test.txt','w',encoding='utf-8')as file:
        file .write(soup.text)
        
main()

#柱狀圖

#隨便舉例幾個 中文圖做不出來 用英文代替

"""
====================
Horizontal bar chart
====================

This example showcases a simple horizontal bar chart.
"""
import matplotlib.pyplot as plt
plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt


plt.rcdefaults()
fig, ax = plt.subplots()

# Example data
#我是余歡水 貓冬 教場 偵探K9 獵狐
people = ('woshiyuhuangshui', 'maodong', 'jiaochang', 'zhentanK9', 'liehu')
y_pos = np.arange(len(people))
performance = 3 + 10 * np.random.rand(len(people))
error = np.random.rand(len(people))

ax.barh(y_pos, performance, xerr=error, align='center',
        color='green', ecolor='black')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.invert_yaxis()  # labels read top-to-bottom
ax.set_xlabel('Performance')
ax.set_title('iqiyi')#愛奇藝

plt.show()


"""
===========================
Rotating custom tick labels
===========================

Demo of custom tick-labels with user-defined rotation.
"""
import matplotlib.pyplot as plt


x = [1, 2, 3, 4]
y = [1, 4, 9, 6]
labels = ['liehu', 'maodong', 'jiaochang', 'zhentanL9']#同上用英文代替

plt.plot(x, y, 'ro')
# You can specify a rotation for the tick labels in degrees or with keywords.
plt.xticks(x, labels, rotation='vertical')
# Pad margins so that markers don't get clipped by the axes
plt.margins(0.2)
# Tweak spacing to prevent clipping of tick-labels
plt.subplots_adjust(bottom=0.15)
plt.show()

import numpy as np
from numpy import ma
import matplotlib.pyplot as plt

x = np.arange(1, 7, 0.4)
y0 = np.sin(x)
y = y0.copy() + 2.5

plt.step(x, y, label='Y')#Y=我是余歡水

y -= 0.5
plt.step(x, y, where='mid', label='L')#L=獵狐

y -= 0.5
plt.step(x, y, where='post', label='J')#J=教場

y = ma.masked_where((y0 > -0.15) & (y0 < 0.15), y - 0.5)
plt.step(x, y, label='M')#M=貓冬

plt.legend()

plt.xlim(0, 7)
plt.ylim(-0.5, 4)

plt.show()

結論:

數據爬取要注意細節 比如div 標簽等等  編寫代碼時要注意大小寫 是否英 注意縮進  讓我對bs4庫 seaborn庫 和gallery庫更加了解 是我對python的熱愛更加深了 唯一遺憾的就是學的太慢 代碼經常打錯 很多地方其實不是太懂  所以需要勤加練習阿


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM