python爬取網易雲音樂歌單音樂


在網易雲音樂中第一頁歌單的url:http://music.163.com/#/discover/playlist/

依次第二頁:http://music.163.com/#/discover/playlist/?order=hot&cat=%E5%85%A8%E9%83%A8&limit=35&offset=35

依次第三頁:http://music.163.com/#/discover/playlist/?order=hot&cat=%E5%85%A8%E9%83%A8&limit=35&offset=70

然后從歌單的查看框架的源代碼:

從圖中的源代碼可以得到每個歌單的url:eg:http://music.163.com/#/playlist?id=696806036

然后相應的歌單頁面中可以得到歌單中每首歌的名字和歌手:

然后歌名歌手,在百度音樂搜索api接口獲得songid,api是url = "http://sug.music.baidu.com/info/suggestion"

截圖來自:貼吧

然后從獲取到的songid從百度音樂免費API接口:http://music.baidu.com/data/music/fmlink,獲取songLink進行下載,並且將songLink保存到本地.flac

eg;http://music.baidu.com/data/music/fmlink?rate=320&songIds=242078437&type=flac


下載結果:

代碼:

# -*- coding: utf-8 -*-
import re
import urllib
import urllib2
import os
import stat
import itertools
import re
import sys
import requests
import json
import time
import socket
import urlparse
import csv
import random
from datetime import datetime, timedelta
import lxml.html

from zipfile import ZipFile
from StringIO import StringIO
from downloader import Downloader
from bs4 import BeautifulSoup
from HTMLParser import HTMLParser
from itertools import product
import sys
reload(sys)
sys.setdefaultencoding('utf8')
URL = 'http://music.163.com'
NUM = 5
def download(url, user_agent='wswp', num_try=2):

    headers = {'User_agent': user_agent}
    request = urllib2.Request(url, headers=headers)
    try:
        html = urllib2.urlopen(request).read()
    except urllib2.URLError as e:
        print 'Download error', e.reason
        html = None
        if num_try > 0:
            if hasattr(e, 'code') and 500 <= e.code < 600:
                return download(url, user_agent, num_try - 1)
    return html


def get_song_list(url):
    html = download(url)
    res = r'<ul class="f-hide">(.*?)</ul>'
    mm = re.findall(res,html,re.S | re.M)
    #print mm
    res = r'<li><a .*?>(.*?)</a></li>'
    song_list = re.findall(res, html,re.S | re.M)
    return song_list

#獲取網易雲歌單 eg:/playlist?id=706469943
def get_play_list(html):
    soup = BeautifulSoup(html, "html.parser")
    results = soup.find_all(name='a', attrs={'class': 'tit f-thide s-fc0'})
    list = []
    for each in results:
        ee = each.get('href')
        list.append(ee)
    return list

def download_music(url, song_name):
    print "Downloading song_name:" + song_name
    path = "songs"
    if not os.path.isdir(path):
        os.mkdir(path)
    f = open(path + '/' + song_name + '.flac', 'wb')
    f.write(download(url))
    f.close()

def download_song(song_name,singer):

    url = "http://sug.music.baidu.com/info/suggestion"
    #百度音樂搜索獲得songid
    mess = song_name + singer
    payload = {'word': mess, 'version': '2.1.1', 'from': '0'}
    r = requests.get(url, params=payload)
    contents = r.text
    d = json.loads(contents, encoding="utf-8")
    #print d
    if ('data' not in d):
        print "do not have flac"
        return 0
    if ('song' not in d["data"]):
        print "do not have flac"
        return 0
    song_id = d["data"]["song"][0]["songid"]

    print "song_id:"+song_id

    url = "http://music.baidu.com/data/music/fmlink" #百度音樂免費api接口
    '''
        http://music.baidu.com/data/music/fmlink?rate=320&songIds=242078437&type=&callback=cb_download&_t=1468380564513&format=json
    '''
    payload = {'songIds': song_id, 'type': 'mp3'}
    r = requests.get(url, params=payload)
    contents = r.text
    try:
        d = json.loads(contents, encoding="utf-8")
    except:
        return 0
    if d is not None and 'data' not in d or d['data'] == '':
        return 0
    songlink = d["data"]["songList"][0]["songLink"]
    if (len(songlink) < 10):
        print "do not have flac"
        return 0
    print "Song Source: " + songlink
    download_music(songlink,mess)

def get_song_singer(url):
    html = download(url)
    soup = BeautifulSoup(html, "html.parser")
    results = soup.find_all(name='textarea', attrs={'style': 'display:none;'})
    mess = str(results[0])
    tt = len('<textarea style="display:none;">')
    result = mess[tt:]
    tt = len('</textarea>)')-1
    resu = result[:-tt]
    list = json.loads(resu, encoding="utf-8")
    singer_list = []
    for each in list:
        singer_list.append(each["artists"][0]["name"])
    return singer_list



if __name__ == '__main__':

    num = 0
    for flag in range(1,5):
        if flag > 1:
            page = (flag - 1) * 35
            url = 'http://music.163.com/discover/playlist/?order=hot&cat=%E5%85%A8%E9%83%A8&limit=35&offset='+str(page)
        else:
            url = 'http://music.163.com/discover/playlist'
        print url
        html = download(url)
        list = get_play_list(html)
        for i in list:
            song_list_url = URL + i
            print song_list_url
            singer_list = get_song_list(song_list_url)
            singer_name = get_song_singer(song_list_url)
            tt = len(singer_list)
            mm = len(singer_name)
            index = min(tt,mm)
            num = num + mm
            for j in range(0, index):
                print singer_name[j]
                print singer_list[j]
                download_song(singer_list[j],singer_name[j])
                print "\n"

    print "Download " + str(num) + " music\n"

 

 
       


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM