python 爬取bilibili 視頻彈幕


 1 # -*- coding: utf-8 -*-
 2 # @author: Tele
 3 # @Time : 2019/04/09 下午 4:50
 4 # 爬取彈幕
 5 import requests  6 import json  7 import os  8 from lxml import etree  9 
10 
11 def main(): 12     headers = { 13         "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36", 14  } 15     # av13197279
16     url = "https://api.bilibili.com/x/web-interface/view?aid=13197279"
17     response = requests.get(url, headers=headers) 18     if response.status_code == 200: 19         cid = json.loads(response.content.decode())["data"]["cid"] 20         print("cid:", cid) 21         cid_url = "https://comment.bilibili.com/{}.xml".format(cid) 22 
23         # 解析彈幕的xml
24         result = requests.get(cid_url, headers=headers) 25         comment_element = etree.HTML(result.content) 26         d_list = comment_element.xpath("//d") 27 
28         if os.path.exists("./comment.txt"): 29             os.remove("./comment.txt") 30         with open("./comment.txt", "w", encoding="utf-8") as file: 31             for d in d_list: 32                 file.write(d.xpath("./text()")[0]) 33                 file.write("\n") 34 
35 
36 if __name__ == '__main__': 37     main()

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM