前幾日,爬蟲基本能爬點東西出來了,現在需要實現定時把數據爬到DB里去,可以使用windows定時任務執行py腳本,但好像不徹底,要做一個純(jiao)粹(qing)的程序員,定時任務的重任落到了Aspshceduler上;下文中許多內容來自互聯網。
APScheduler是基於Quartz的一個Python定時任務框架,實現了Quartz的所有功能,使用起來十分方便。提供了基於日期、固定時間間隔以及crontab類型的任務,並且可以持久化任務。基於這些功能,我們可以很方便的實現一個python定時任務系統,寫python還是要比java舒服多了。
使用 easy_install apscheduler安裝;
APScheduler提供了jobstore用於存儲job的執行信息,默認使用的是RAMJobStore,還提供了SQLAlchemyJobStore、ShelveJobStore和MongoDBJobStore。APScheduler允許同時使用多個jobstore,通過別名(alias)區分,在添加job時需要指定具體的jobstore的別名,否則使用的是別名是default的jobstore,即RAMJobStore。
安裝MongoDBJobStore時注意,安裝Mongo時應注意版本,否則報錯,新版的Mongo for python包,沒有connect方法;使用2.0版本后正常; pip uninstall mongo; pip install mongo==2.0
下面以MongoDBJobStore舉例說明。
1 import pymongo
2
from apscheduler.scheduler
import Scheduler
3 from apscheduler.jobstores.mongodb_store import MongoDBJobStore
4 import time
5
6 sched = Scheduler(daemonic = False)
7
8 mongo = pymongo.Connection(host= ' 127.0.0.1 ', port=27017)
9 store = MongoDBJobStore(connection=mongo)
10 sched.add_jobstore(store, ' mongo ') # 別名是mongo
11
12 @sched.cron_schedule(second= ' * ', day_of_week= ' 0-4 ', hour= ' 9-12,13-15 ', jobstore= ' mongo ') # 向別名為mongo的jobstore添加job
13 def job():
14 print ' a job '
15 time.sleep(1)
16
17 sched.start()
3 from apscheduler.jobstores.mongodb_store import MongoDBJobStore
4 import time
5
6 sched = Scheduler(daemonic = False)
7
8 mongo = pymongo.Connection(host= ' 127.0.0.1 ', port=27017)
9 store = MongoDBJobStore(connection=mongo)
10 sched.add_jobstore(store, ' mongo ') # 別名是mongo
11
12 @sched.cron_schedule(second= ' * ', day_of_week= ' 0-4 ', hour= ' 9-12,13-15 ', jobstore= ' mongo ') # 向別名為mongo的jobstore添加job
13 def job():
14 print ' a job '
15 time.sleep(1)
16
17 sched.start()
注意start必須在添加job動作之后調用,否則會拋錯。默認會把job信息保存在apscheduler數據庫下的jobs表:
> db.jobs.findOne()
{
" _id " : ObjectId( " 502202d1443c1557fa8b8d66 "),
" runs " : 20,
" name " : " job ",
" misfire_grace_time " : 1,
" coalesce " : true,
" args " : BinData(0, " gAJdcQEu "),
" next_run_time " : ISODate( " 2012-08-08T14:10:46Z "),
" max_instances " : 1,
" max_runs " : null,
" trigger " : BinData(0, " gAJjYXBzY2hlZHVsZXIudHJpZ2dlcnMuY3JvbgpDcm9uVHJpZ2dlcgpxASmBcQJ9cQMoVQZmaWVsZHNxBF1xBShjYXBzY2hlZHVsZXIudHJpZ2dlcnMuY3Jvbi5maWVsZHMKQmFzZUZpZWxkCnEGKYFxB31xCChVCmlzX2RlZmF1bHRxCYhVC2V4cHJlc3Npb25zcQpdcQtjYXBzY2hlZHVsZXIudHJpZ2dlcnMuY3Jvbi5leHByZXNzaW9ucwpBbGxFeHByZXNzaW9uCnEMKYFxDX1xDlUEc3RlcHEPTnNiYVUEbmFtZXEQVQR5ZWFycRF1YmgGKYFxEn1xEyhoCYhoCl1xFGgMKYFxFX1xFmgPTnNiYWgQVQVtb250aHEXdWJjYXBzY2hlZHVsZXIudHJpZ2dlcnMuY3Jvbi5maWVsZHMKRGF5T2ZNb250aEZpZWxkCnEYKYFxGX1xGihoCYhoCl1xG2gMKYFxHH1xHWgPTnNiYWgQVQNkYXlxHnViY2Fwc2NoZWR1bGVyLnRyaWdnZXJzLmNyb24uZmllbGRzCldlZWtGaWVsZApxHymBcSB9cSEoaAmIaApdcSJoDCmBcSN9cSRoD05zYmFoEFUEd2Vla3EldWJjYXBzY2hlZHVsZXIudHJpZ2dlcnMuY3Jvbi5maWVsZHMKRGF5T2ZXZWVrRmllbGQKcSYpgXEnfXEoKGgJiWgKXXEpY2Fwc2NoZWR1bGVyLnRyaWdnZXJzLmNyb24uZXhwcmVzc2lvbnMKUmFuZ2VFeHByZXNzaW9uCnEqKYFxK31xLChoD05VBGxhc3RxLUsEVQVmaXJzdHEuSwB1YmFoEFULZGF5X29mX3dlZWtxL3ViaAYpgXEwfXExKGgJiWgKXXEyKGgqKYFxM31xNChoD05oLUsMaC5LCXViaCopgXE1fXE2KGgPTmgtSw9oLksNdWJlaBBVBGhvdXJxN3ViaAYpgXE4fXE5KGgJiGgKXXE6aAwpgXE7fXE8aA9Oc2JhaBBVBm1pbnV0ZXE9dWJoBimBcT59cT8oaAmJaApdcUBoDCmBcUF9cUJoD05zYmFoEFUGc2Vjb25kcUN1YmVVCnN0YXJ0X2RhdGVxRE51Yi4= "),
" func_ref " : " __main__:job ",
" kwargs " : BinData(0, " gAJ9cQEu ")
{
" _id " : ObjectId( " 502202d1443c1557fa8b8d66 "),
" runs " : 20,
" name " : " job ",
" misfire_grace_time " : 1,
" coalesce " : true,
" args " : BinData(0, " gAJdcQEu "),
" next_run_time " : ISODate( " 2012-08-08T14:10:46Z "),
" max_instances " : 1,
" max_runs " : null,
" trigger " : BinData(0, " gAJjYXBzY2hlZHVsZXIudHJpZ2dlcnMuY3JvbgpDcm9uVHJpZ2dlcgpxASmBcQJ9cQMoVQZmaWVsZHNxBF1xBShjYXBzY2hlZHVsZXIudHJpZ2dlcnMuY3Jvbi5maWVsZHMKQmFzZUZpZWxkCnEGKYFxB31xCChVCmlzX2RlZmF1bHRxCYhVC2V4cHJlc3Npb25zcQpdcQtjYXBzY2hlZHVsZXIudHJpZ2dlcnMuY3Jvbi5leHByZXNzaW9ucwpBbGxFeHByZXNzaW9uCnEMKYFxDX1xDlUEc3RlcHEPTnNiYVUEbmFtZXEQVQR5ZWFycRF1YmgGKYFxEn1xEyhoCYhoCl1xFGgMKYFxFX1xFmgPTnNiYWgQVQVtb250aHEXdWJjYXBzY2hlZHVsZXIudHJpZ2dlcnMuY3Jvbi5maWVsZHMKRGF5T2ZNb250aEZpZWxkCnEYKYFxGX1xGihoCYhoCl1xG2gMKYFxHH1xHWgPTnNiYWgQVQNkYXlxHnViY2Fwc2NoZWR1bGVyLnRyaWdnZXJzLmNyb24uZmllbGRzCldlZWtGaWVsZApxHymBcSB9cSEoaAmIaApdcSJoDCmBcSN9cSRoD05zYmFoEFUEd2Vla3EldWJjYXBzY2hlZHVsZXIudHJpZ2dlcnMuY3Jvbi5maWVsZHMKRGF5T2ZXZWVrRmllbGQKcSYpgXEnfXEoKGgJiWgKXXEpY2Fwc2NoZWR1bGVyLnRyaWdnZXJzLmNyb24uZXhwcmVzc2lvbnMKUmFuZ2VFeHByZXNzaW9uCnEqKYFxK31xLChoD05VBGxhc3RxLUsEVQVmaXJzdHEuSwB1YmFoEFULZGF5X29mX3dlZWtxL3ViaAYpgXEwfXExKGgJiWgKXXEyKGgqKYFxM31xNChoD05oLUsMaC5LCXViaCopgXE1fXE2KGgPTmgtSw9oLksNdWJlaBBVBGhvdXJxN3ViaAYpgXE4fXE5KGgJiGgKXXE6aAwpgXE7fXE8aA9Oc2JhaBBVBm1pbnV0ZXE9dWJoBimBcT59cT8oaAmJaApdcUBoDCmBcUF9cUJoD05zYmFoEFUGc2Vjb25kcUN1YmVVCnN0YXJ0X2RhdGVxRE51Yi4= "),
" func_ref " : " __main__:job ",
" kwargs " : BinData(0, " gAJ9cQEu ")
}