爬取CBO中國票房網所有電影片名和演員名字
# -*- coding: utf-8 -*- # 爬取CBO中國票房網所有電影片名 import json import requests import time with open("moviename.txt", 'a') as fh: for pn in range(1,320): url = 'http://www.cbooo.cn/Mdata/getMdata_movie?area=50&type=0&year=0&initial=%E5%85%A8%E9%83%A8&pIndex=' + str(pn) print(url) time.sleep(2) try: result = requests.get(url).text jresult = json.loads(result) movices = jresult.get('pData') for movie in movices: moviename = movie.get('MovieName') print(moviename) fh.write(moviename + "\n") except: print('第'+ str(pn) + '失敗!') # 爬取CBO中國票房網所有演員 import json import requests import time with open("moviestar.txt", 'a') as fh: for pn in range(1,2665): url = 'http://www.cbooo.cn/Mdata/getMdate_pList?area=50&type=0&year=0&initial=%E5%85%A8%E9%83%A8&pIndex=' + str(pn) print(url) time.sleep(2) try: result = requests.get(url).text jresult = json.loads(result) movices = jresult.get('pData') for movie in movices: moviename = movie.get('cnName') print(moviename) fh.write(moviename + "\n") except: print('第'+ str(pn) + '失敗!')
爬取電視劇名稱
# -*- coding: utf-8 -*- # 爬取所有電視劇名稱 # 來源:齊魯電影網 from bs4 import BeautifulSoup import urllib url = "http://www.qilumovie.com/filmclass-txt/9.html" html = urllib.request.urlopen(url).read() htmldecode = html.decode("gbk") #重點關注 soup = BeautifulSoup(htmldecode,"lxml") body = soup.body maplist = body.find_all("li") with open("tvplay.txt",'a') as fh: for tvl in maplist: tv = tvl.a.text print(tv) fh.write(tv + '\n')
如有錯誤,還請大俠指教一二!