1.常見表格的讀操作
#coding=utf-8
import xlrd
import os
import re
import sys
import time
import datetime
import requests
import chardet
import traceback
import csv
import warnings
warnings.filterwarnings("ignore")
reload(sys)
sys.setdefaultencoding('utf-8')
def print_xls(path):
data=xlrd.open_workbook(path) #打開excel
table=data.sheets()[0] #打開excel的第幾個sheet
nrows=table.nrows #捕獲到有效數據的行數
print nrows
books=[]
for i in range(nrows):
ss=table.row_values(i) #獲取一行的所有值,每一列的值以列表項存在
url = ss[0]
# for i in range(len(ss)):
# print ss[i] #輸出一行中各個列的值
# print '+++++++++++++++++++'
print url
pass
def read_csv(path):
file = open(path)
reader = csv.reader(file)
for line in reader:
site = ''
for row in line[0:1]:
site = site + row.decode('utf-8')
pass
for row in line[3:]:
site = site + row.decode('utf-8')
pass
print site
pass
if __name__ == '__main__':
db_client = MongoClient('192.168.86.136',27017)
read_csv(u'黃山市學校數據.csv')
print_xls(u'黃山市學校數據.xls')
db_client.close()
2.xls表格寫操作
#coding=utf-8
import os
import re
import time
import requests
import json
from pymongo import MongoClient
import traceback
import urlparse
import urllib
import urllib2
import hashlib
import chardet
import random
import xlwt
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
#導出數據
def export_mongo_data():
file = xlwt.Workbook(encoding = 'utf-8')
sheet_name = u'name'
round = 1
while True:
result = db_client.xxxxxx.xxx.find({'xxx':{'$exists':False}}).limit(2000)
if round > 1:
sheet_name_temp = sheet_name + str(round)
else:
sheet_name_temp = sheet_name
if result.count():
table = file.add_sheet(sheet_name_temp)
table.write(0,0,u'id')
table.write(0,1,u'url')
table.write(0,2,u'name')
row_index = 1
for curr_res in result:
url = curr_res['url']
if url:
try:
_id = curr_res['_id']
zzz = curr_res['zzz']
paper_name = zzz + _id + ".html"
#print paper_name
table.write(row_index,0,curr_res['_id'])
table.write(row_index,1,curr_res['url'])
table.write(row_index,2,paper_name)
db_client.xxx.xxx.update({'_id':curr_res['_id']},{'$set':{'xxx':True}})
print curr_res['_id']
except Exception as e:
db_client.crawler_zuowen.gaosanW_byzhinengyuejuan.update({'_id':curr_res['_id']},{'$set':{'xxx':False}})
continue
row_index += 1
else:
db_client.crawler_zuowen.gaosanW_byzhinengyuejuan.update({'_id':curr_res['_id']},{'$set':{'zzz':False}})
round += 1
else:
break
file.save(u'name.xls')
if __name__ == '__main__':
db_client = MongoClient('xxx.xxx.xx.xxxx',27017)
export_mongo_data()
db_client.close()