urllib讀取網頁,然后用Py-excel寫excel。
import urllib from xlwt import Workbook import datetime def FetchData(): book = Workbook(encoding='gbk') #如果采集數據有中文,需要添加這個 sheet1 = book.add_sheet('Sheet 2') #表格緩存 i = 0 theday = datetime.date(2009,12,31) while i < 100: #這邊的場景就是采集100個網頁,每個網址都包含日期 i += 1 theday = theday + datetime.timedelta(days = 1) print theday theday_str = str(theday) sheet1.write(i,0,theday_str) #寫表格 check_url = r'http://www.xxx.com/index?date=' + theday_str #網頁地址 try: checkfile = urllib.urlopen(check_url) #網頁保存為文本文件 except Exception,e: print e return type = sys.getfilesystemencoding() for line in checkfile: line = line.decode("UTF-8").encode(type) #網頁編碼為UTF-8 date_west = getdata('date_west', line) #獲取特定數據 if date_west != False: sheet1.write(i,1,date_west) book.save('simple.xls') #保存excel文件 print 'finish!' 'if keywords in the line, get data from > to </' def getdata(keywords, line): data = '' if keywords in line: start = line.find('>',) end = line.find('</', start) data = line[start+1:end] return data return False