自己邊看邊實踐一些簡單的實際應用,下面的程序是從某個網站上獲取需要的數據。
在編寫的過程中,通過學習陸續了解到一些方法,發現Python真的是很便捷。
尤其是用pandas獲取網頁中的表格數據,真的是太方便了!!!
程序寫的可能並不好,但基本上實現了自己的需求。
希望有高手來指點下~~
Version 04 (Jan 12 2017)【對於獲取表格信息,推薦使用該方法】
1 # Code based on Python 3.x 2 # _*_ coding: utf-8 _*_ 3 # __Author: "LEMON" 4 5 import pandas as pd 6 7 url2 = 'http://www.bjets.com.cn/article/jyxx/?' 8 links = [] 9 for n in range(2, 40): 10 # 頁面總數為39頁,需要自己先從網頁判斷,也可以從頁面抓取,后續可以完善 11 link = url2 + str(n) 12 links.append(link) 13 links.insert(0, url2) 14 15 df2 = pd.DataFrame() # creates a new dataframe that's empty 16 for url in links: 17 # 利用pandas獲取數據,需要安裝 html5lib模塊 18 dfs = pd.read_html(url, header=0) 19 for df in dfs: 20 df2= df2.append(df, ignore_index= True) 21 22 # df2.to_excel('MktDataBJ.xlsx') # 將數據存儲在excel文件里 23 df2.to_csv('MktDataBJ-1.csv') # 將數據存儲在csv文件里
Version 03 (Jan 12 2017)
1 # Code based on Python 3.x 2 # _*_ coding: utf-8 _*_ 3 # __Author: "LEMON" 4 5 from bs4 import BeautifulSoup 6 import requests 7 import csv 8 9 url2 = 'http://www.bjets.com.cn/article/jyxx/?' 10 links = [] 11 for n in range(2, 40): 12 # 頁面總數為39頁,需要自己先從網頁判斷,也可以從頁面抓取,后續可以完善 13 link = url2 + str(n) 14 links.append(link) 15 links.insert(0, url2) 16 17 for url in links: 18 rep = requests.get(url) 19 # content = rep.text.encode(rep.encoding).decode('utf-8') 20 # # 直接用requests時,中文內容需要轉碼 21 22 soup = BeautifulSoup(rep.content, 'html.parser') 23 24 # table = soup.table 25 table = soup.find('table') # 兩種方式都可以 26 27 trs = table.find_all('tr') 28 trs2 = trs[1:len(trs)] 29 list1 = [] 30 for tr in trs2: 31 td = tr.find_all('td') 32 row = [i.text for i in td] 33 list1.append(row) 34 35 with open('MktDataBJ.csv', 'a', errors='ignore', newline='') as f: 36 f_csv = csv.writer(f) 37 f_csv.writerows(list1)
Version 02 (Jan 09 2017)
1 # Code based on Python 3.x 2 # _*_ coding: utf-8 _*_ 3 # __Author: "LEMON" 4 5 from bs4 import BeautifulSoup 6 import requests 7 import csv 8 9 url2 = 'http://www.bjets.com.cn/article/jyxx/?' 10 links = [] 11 for n in range(2, 40): 12 # 頁面總數為39頁,需要自己先從網頁判斷,也可以從頁面抓取,后續可以完善 13 link = url2 + str(n) 14 links.append(link) 15 links.insert(0, url2) 16 # print(links) 17 18 for url in links: 19 rep = requests.get(url) 20 # content = rep.text.encode(rep.encoding).decode('utf-8') 21 # # 直接用requests時,中文內容需要轉碼 22 23 soup = BeautifulSoup(rep.content, 'html.parser') 24 body = soup.body 25 data = body.find('div', {'class': 'list_right'}) 26 27 quotes = data.find_all('tr') 28 quotes1 = quotes[1:len(quotes)] 29 30 list1 = [] 31 for x in quotes1: 32 list2 = [] 33 for y in x.find_all('td'): 34 list2.append(y.text) # 每日的數據做一個單獨的list 35 list1.append(list2) 36 # print(list1) # list1為每日數據的總列表 37 with open('MktDataBJ.csv', 'a', errors='ignore', newline='') as f: 38 f_csv = csv.writer(f) 39 f_csv.writerows(list1)
Version 01 (Jan 08 2017)
1 # Code based on Python 3.x 2 # _*_ coding: utf-8 _*_ 3 # __Author: "LEMON" 4 5 from bs4 import BeautifulSoup 6 import requests 7 import csv 8 9 urllink = 'http://www.bjets.com.cn/article/jyxx/?' 10 links = [] 11 for n in range(2, 40): 12 #頁面總數為39頁,需要自己先從網頁判斷,也可以從頁面抓取,后續可以完善 13 link = urllink + str(n) 14 links.append(link) 15 links.insert(0, urllink) 16 # print(links) 17 18 for url in links: 19 20 rep = requests.get(url) 21 # content = rep.text.encode(rep.encoding).decode('utf-8') 22 # # 直接用requests時,中文內容需要轉碼 23 24 soup = BeautifulSoup(rep.content, 'html.parser') 25 26 # print(soup.prettify()) 27 # # prettify() 28 29 body = soup.body 30 data = body.find('div', {'class': 'list_right'}) 31 32 # table title 33 titles = data.find_all('th') 34 35 title = [] 36 for x in titles: 37 title.append(x.text) 38 # print(title) 39 40 quotes = data.find_all('tr') 41 quotes1 = quotes[1:len(quotes)] 42 # print(quotes1) 43 44 list1 = [] 45 for x in quotes1: 46 for y in x.find_all('td'): 47 list1.append(y.text) 48 # print(list1) # list為每日數據的總列表 49 50 date = [] 51 volumes = [] 52 meanprice = [] 53 totalmoney = [] 54 55 for i in range(0, len(list1)): 56 if i % 4 == 0: 57 date.append(list1[i]) 58 elif i % 4 == 1: 59 volumes.append(list1[i]) 60 elif i % 4 == 2: 61 meanprice.append(list1[i]) 62 else: 63 totalmoney.append(list1[i]) 64 65 # print(date) 66 # print(volumes) 67 # print(meanprice) 68 # print(totalmoney) 69 70 final = [] 71 for i in range(0, len(date)): 72 temp = [date[i], volumes[i], meanprice[i], totalmoney[i]] 73 final.append(temp) 74 # print(final) 75 with open('bj_carbon.csv', 'a', errors='ignore', newline='') as f: 76 f_csv = csv.writer(f) 77 f_csv.writerows(final)