Python爬蟲可視化之網易雲音樂歌單


一、選題背景

由於現在的音樂版權問題,很多音樂分布在各個平台的音樂播放器,而版權問題也使很多人非常的困擾,從而找不到音樂的資源。因此為幫助使用網易雲的伙伴們,更好的找到各個平台的資源,聽到更多自己喜歡的歌。

 

二、網絡爬蟲設計方案

網絡爬蟲名稱:“網易雲音樂歌單”

內容與數據分析特征:該爬蟲主要獲取性能榜的數據進行分析。

三、主題頁面的結構特征分析

全部歌單 - 歌單 - 網易雲音樂 (163.com)

 

 

 

 

四、網絡爬蟲程序設計

1.數據爬取與采集

from bs4 import BeautifulSoup
import requests
import time

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}

for i in range(0, 1330, 35):
    print(i)
    time.sleep(2)
    url = 'https://music.163.com/discover/playlist/?cat=歐美&order=hot&limit=35&offset=' + str(i)
    response = requests.get(url=url, headers=headers)
    html = response.text
    soup = BeautifulSoup(html, 'html.parser')
    # 獲取包含歌單詳情頁網址的標簽
    ids = soup.select('.dec a')
    # 獲取包含歌單索引頁信息的標簽
    lis = soup.select('#m-pl-container li')
    print(len(lis))
    for j in range(len(lis)):
        # 獲取歌單詳情頁地址
        url = ids[j]['href']
        # 獲取歌單標題
        title = ids[j]['title']
        # 獲取歌單播放量
        play = lis[j].select('.nb')[0].get_text()
        # 獲取歌單貢獻者名字
        user = lis[j].select('p')[1].select('a')[0].get_text()
        # 輸出歌單索引頁信息
        print(url, title, play, user)
        # 將信息寫入CSV文件中
        with open('playlist.csv', 'a+', encoding='utf-8-sig') as f:
            f.write(url + ',' + title + ',' + play + ',' + user + '
') 

 

 

 

from bs4 import BeautifulSoup
import pandas as pd
import requests
import time

df = pd.read_csv('playlist.csv', header=None, error_bad_lines=False, names=['url', 'title', 'play', 'user'])

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}

for i in df['url']:
    time.sleep(2)
    url = 'https://music.163.com' + i
    response = requests.get(url=url, headers=headers)
    html = response.text
    soup = BeautifulSoup(html, 'html.parser')
    # 獲取歌單標題
    title = soup.select('h2')[0].get_text().replace(',', '')
    # 獲取標簽
    tags = []
    tags_message = soup.select('.u-tag i')
    for p in tags_message:
        tags.append(p.get_text())
    # 對標簽進行格式化
    if len(tags) > 1:
        tag = '-'.join(tags)
    else:
        tag = tags[0]
    # 獲取歌單介紹
    if soup.select('#album-desc-more'):
        text = soup.select('#album-desc-more')[0].get_text().replace('
', '').replace(',', '')
    else:
        text = ''
    # 獲取歌單收藏量
    collection = soup.select('#content-operation i')[1].get_text().replace('(', '').replace(')', '')
    # 歌單播放量
    play = soup.select('.s-fc6')[0].get_text()
    # 歌單內歌曲數
    songs = soup.select('#playlist-track-count')[0].get_text()
    # 歌單評論數
    comments = soup.select('#cnt_comment_count')[0].get_text()
    # 輸出歌單詳情頁信息
    print(title, tag, text, collection, play, songs, comments)
    # 將詳情頁信息寫入CSV文件中
    with open('music_message.csv', 'a+', encoding='utf-8-sig') as f:
        f.write(title + ',' + tag + ',' + text + ',' + collection + ',' + play + ',' + songs + ',' + comments + '
')
    # 獲取歌單內歌曲名稱
    li = soup.select('.f-hide li a')
    for j in li:
        with open('music_name.csv', 'a+', encoding='utf-8-sig') as f:
            f.write(j.get_text() + '
')

 

 

 

2.數據可視化

 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('music_message_4.csv', header=None)
# 對播放數取對數
dom = []
for i in df[4]:
    dom.append(np.log(i))
df['collection'] = dom
# 設置圖片顯示屬性,字體及大小
plt.rcParams['font.sans-serif'] = ['STXihei']
plt.rcParams['font.size'] = 12
plt.rcParams['axes.unicode_minus'] = False
# 設置圖片顯示屬性
fig = plt.figure(figsize=(16, 8), dpi=80)
ax = plt.subplot(1, 1, 1)
ax.patch.set_color('white')
# 設置坐標軸屬性
lines = plt.gca()
# 設置坐標軸顏色
lines.spines['right'].set_color('none')
lines.spines['top'].set_color('none')
lines.spines['left'].set_color((64/255, 64/255, 64/255))
lines.spines['bottom'].set_color((64/255, 64/255, 64/255))
lines.xaxis.set_ticks_position('none')
lines.yaxis.set_ticks_position('none')
# 繪制直方圖,設置直方圖顏色
ax.hist(df['collection'], bins=30, alpha=0.7, color=(255/255, 153/255, 0/255))
ax.set_title('華語歌單播放數量分布情況', fontsize=20)
# 顯示圖片
plt.show()

 

 

 

 

 

 

import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('music_message_3.csv', header=None, names=['title'], encoding='utf-8-sig')
# 數據聚合分組
place_message = df.groupby(['title'])
place_com = place_message['title'].agg(['count'])
place_com.reset_index(inplace=True)
place_com_last = place_com.sort_index()
dom = place_com_last.sort_values('count', ascending=False)[0:10]
# 設置顯示數據
names = [i for i in dom.title]
names.reverse()
nums = [i for i in dom['count']]
nums.reverse()
data = pd.Series(nums, index=names)
# 設置圖片顯示屬性,字體及大小
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['font.size'] = 10
plt.rcParams['axes.unicode_minus'] = False
# 設置圖片顯示屬性
fig = plt.figure(figsize=(16, 8), dpi=80)
ax = plt.subplot(1, 1, 1)
ax.patch.set_color('white')
# 設置坐標軸屬性
lines = plt.gca()
# 設置坐標軸顏色
lines.spines['right'].set_color('none')
lines.spines['top'].set_color('none')
lines.spines['left'].set_color((64/255, 64/255, 64/255))
lines.spines['bottom'].set_color((64/255, 64/255, 64/255))
# 設置坐標軸刻度
lines.xaxis.set_ticks_position('none')
lines.yaxis.set_ticks_position('none')
# 繪制柱狀圖,設置柱狀圖顏色
data.plot.barh(ax=ax, width=0.7, alpha=0.7, color=(16/255, 152/255, 168/255))
# 添加標題,設置字體大小
ax.set_title('網易雲音樂華語歌單歌曲 TOP10', fontsize=18, fontweight='light')
# 添加歌曲出現次數文本
for x, y in enumerate(data.values):
    plt.text(y+3.5, x-0.12, '%s' % y, ha='center')
# 顯示圖片
plt.show()

 

 

 

 

 

 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('music_message_4.csv', header=None)
# 對收藏數取對數
dom = []
for i in df[3]:
    dom.append(np.log(int(i.replace('', '0000'))))
df['collection'] = dom
# 設置圖片顯示屬性,字體及大小
plt.rcParams['font.sans-serif'] = ['STXihei']
plt.rcParams['font.size'] = 12
plt.rcParams['axes.unicode_minus'] = False
# 設置圖片顯示屬性
fig = plt.figure(figsize=(16, 8), dpi=80)
ax = plt.subplot(1, 1, 1)
ax.patch.set_color('white')
# 設置坐標軸屬性
lines = plt.gca()
# 設置坐標軸顏色
lines.spines['right'].set_color('none')
lines.spines['top'].set_color('none')
lines.spines['left'].set_color((64/255, 64/255, 64/255))
lines.spines['bottom'].set_color((64/255, 64/255, 64/255))
lines.xaxis.set_ticks_position('none')
lines.yaxis.set_ticks_position('none')
# 繪制直方圖,設置直方圖顏色
ax.hist(df['collection'], bins=30, alpha=0.7, color=(21/255, 47/255, 71/255))
ax.set_title('華語歌單收藏數量分布情況', fontsize=20)
# 顯示圖片
plt.show()

 

 

 

 

 

 

import squarify
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('music_message_4.csv', header=None)
# 處理標簽信息
tags = []
dom2 = []
for i in df[1]:
    c = i.split('-')
    for j in c:
        if j not in tags:
            tags.append(j)
        else:
            continue
for item in tags:
    num = 0
    for i in df[1]:
        type2 = i.split('-')
        for j in range(len(type2)):
            if type2[j] == item:
                num += 1
            else:
                continue
    dom2.append(num)
# 數據創建
data = {'tags': tags, 'num': dom2}
frame = pd.DataFrame(data)
df1 = frame.sort_values(by='num', ascending=False)
name = df1['tags'][:10]
income = df1['num'][:10]
# 繪圖details
colors = ['#993333', '#CC9966',  '#333333', '#663366', '#003366', '#009966', '#FF6600', '#FF0033', '#009999', '#333366']
plot = squarify.plot(sizes=income, label=name, color=colors, alpha=1, value=income, edgecolor='white', linewidth=1.5)
# 設置圖片顯示屬性,字體及大小
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['font.size'] = 8
plt.rcParams['axes.unicode_minus'] = False
# 設置標簽大小為1
plt.rc('font', size=6)
# 設置標題大小
plot.set_title('網易雲音樂華語歌單標簽圖', fontsize=13, fontweight='light')
# 除坐標軸
plt.axis('off')
# 除上邊框和右邊框刻度
plt.tick_params(top=False, right=False)
# 圖形展示
plt.show()

 

 

 五、總結

網易雲音樂的使用還是非常火爆的,以上是對網易雲爬蟲的一次愉快的探索之旅~


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM