1.常見表格的讀操作
#coding=utf-8 import xlrd import os import re import sys import time import datetime import requests import chardet import traceback import csv import warnings warnings.filterwarnings("ignore") reload(sys) sys.setdefaultencoding('utf-8') def print_xls(path): data=xlrd.open_workbook(path) #打開excel table=data.sheets()[0] #打開excel的第幾個sheet nrows=table.nrows #捕獲到有效數據的行數 print nrows books=[] for i in range(nrows): ss=table.row_values(i) #獲取一行的所有值,每一列的值以列表項存在 url = ss[0] # for i in range(len(ss)): # print ss[i] #輸出一行中各個列的值 # print '+++++++++++++++++++' print url pass def read_csv(path): file = open(path) reader = csv.reader(file) for line in reader: site = '' for row in line[0:1]: site = site + row.decode('utf-8') pass for row in line[3:]: site = site + row.decode('utf-8') pass print site pass if __name__ == '__main__': db_client = MongoClient('192.168.86.136',27017) read_csv(u'黃山市學校數據.csv')
print_xls(u'黃山市學校數據.xls')
db_client.close()
2.xls表格寫操作
#coding=utf-8 import os import re import time import requests import json from pymongo import MongoClient import traceback import urlparse import urllib import urllib2 import hashlib import chardet import random import xlwt import sys reload(sys) sys.setdefaultencoding("utf-8") #導出數據 def export_mongo_data(): file = xlwt.Workbook(encoding = 'utf-8') sheet_name = u'name' round = 1 while True: result = db_client.xxxxxx.xxx.find({'xxx':{'$exists':False}}).limit(2000) if round > 1: sheet_name_temp = sheet_name + str(round) else: sheet_name_temp = sheet_name if result.count(): table = file.add_sheet(sheet_name_temp) table.write(0,0,u'id') table.write(0,1,u'url') table.write(0,2,u'name') row_index = 1 for curr_res in result: url = curr_res['url'] if url: try: _id = curr_res['_id'] zzz = curr_res['zzz'] paper_name = zzz + _id + ".html" #print paper_name table.write(row_index,0,curr_res['_id']) table.write(row_index,1,curr_res['url']) table.write(row_index,2,paper_name) db_client.xxx.xxx.update({'_id':curr_res['_id']},{'$set':{'xxx':True}}) print curr_res['_id'] except Exception as e: db_client.crawler_zuowen.gaosanW_byzhinengyuejuan.update({'_id':curr_res['_id']},{'$set':{'xxx':False}}) continue row_index += 1 else: db_client.crawler_zuowen.gaosanW_byzhinengyuejuan.update({'_id':curr_res['_id']},{'$set':{'zzz':False}}) round += 1 else: break file.save(u'name.xls') if __name__ == '__main__': db_client = MongoClient('xxx.xxx.xx.xxxx',27017) export_mongo_data() db_client.close()