1.建立行政區規划清單表
use edw; drop table if exists dim_prov_city_adcode; create table if not exists dim_prov_city_adcode ( prov_nm varchar(64), city_nm varchar(64), county_nm varchar(64), prov_code varchar(64), county_code varchar(64), adcode varchar(64), alias_nm varchar(64) ) ENGINE=InnoDB default CHARSET=utf8; --alter table dim_prov_city_adcode add primary key(adcode); alter table dim_prov_city_adcode COMMENT '行政區划清單表'; alter table dim_prov_city_adcode modify column prov_nm varchar(64) COMMENT '省份、直轄市名稱'; alter table dim_prov_city_adcode modify column city_nm varchar(64) COMMENT '城市名稱'; alter table dim_prov_city_adcode modify column county_nm varchar(64) COMMENT '區域名稱'; alter table dim_prov_city_adcode modify column prov_code varchar(64) COMMENT '省GB碼'; alter table dim_prov_city_adcode modify column county_code varchar(64) COMMENT '市GB碼'; alter table dim_prov_city_adcode modify column adcode varchar(64) COMMENT '參考GB碼(adcode)'; alter table dim_prov_city_adcode modify column alias_nm varchar(64) COMMENT '別名';
2.插入行政區規划清單表數據
3.建立天氣數據表
create table if not exists fact_weather_adcode_day( province varchar(64) comment '省份名', city varchar(64) comment '城市名', adcode varchar(64) comment '區域編碼', weather varchar(64) comment '天氣現象(漢字描述)', temperature float comment '實時氣溫', winddirection varchar(64) comment '風向', windpower varchar(64) comment '風力', humidity varchar(64) comment '空氣濕度', reporttime varchar(64) comment '發布時間', date_id varchar(64) comment '日期', hour_id varchar(64) comment '小時' )ENGINE=InnoDB default CHARSET=utf8; alter table fact_weather_adcode_day add primary key(adcode,date_id,hour_id);
3.開發Python腳本調用API獲取數據
#coding:utf8 #------------------------------------------------------ #Filename: getWeather.py #Revision: 1.0 #Date: 2018/08/17 #Author: Jim #Description: #Notes: 通過調用高德API獲取天氣數據 #key: 6ca7b720f2ab2a48f749c1e19c3d1c47 #高德API相關文檔: https://lbs.amap.com/api/ #獲取天氣: https://restapi.amap.com/v3/weather/weatherInfo?city=110101&key=<用戶key> # 如:https://restapi.amap.com/v3/weather/weatherInfo?city=110101&key=6ca7b720f2ab2a48f749c1e19c3d1c47 #------------------------------------------------------ import time, re, os, sys, time,urllib2,shutil,string import json,datetime,MySQLdb from dateutil.parser import parse #設置utf-8編碼格式 reload(sys) sys.setdefaultencoding( "utf-8" ) #獲取當前日期的前n天 def getbeforeDay(n=0): now_time = datetime.datetime.now() beforeday = now_time - datetime.timedelta(n) return beforeday.strftime("%Y%m%d") scriptDir = os.getcwd() if len(sys.argv) > 1 : job_date_id = sys.argv[1] else : job_date_id = getbeforeDay(0) print "當前腳本路徑:%s,當前參數日期:%s" % (scriptDir,job_date_id) #保存數據到文件文件 def saveContext(filename,*name): format = '^' dev_prd_flag = 'prd' context = name[0] for i in name[1:]: context = context + format + str(i) context = str(context).replace('(','(').replace(')',')').replace(',',',').replace(':',':') if dev_prd_flag != 'prd': print context else: #去除文件路徑名首位空格 filename = filename.strip() #讀取目錄名稱 path = os.path.dirname(filename) #如果目錄不存在則創建目錄 if not os.path.exists(path): os.makedirs(path) #讀取文件名稱,以追加的方式寫文件 name = os.path.basename(filename) fp = open(filename,'a') fp.write(context+'\n') fp.close() #獲取網頁文件 def getHtml(url,code='utf-8'): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36 LBBROWSER' } request = urllib2.Request(url,headers=headers) response = urllib2.urlopen(request,data=None,timeout=60) html = unicode(response.read(),str(code)) return html class ConMysql(): def __init__(self,host='192.168.122.140',user='shutong',passwd='shutong',db='edw'): print "連接mysql數據庫" try: conn= MySQLdb.connect( host= host, port = 3306, user=user, passwd=passwd, db =db, charset='utf8', ) self.conn = conn print "連接mysql成功" except : print "連接mysql失敗" def execSQL(self,sql): print "執行語句:%s" % (sql) cur = self.conn.cursor() cur.execute('set character_set_client = utf8') cur.execute('set character_set_server = utf8') cur.execute('set character_set_connection = utf8') cur.execute('set character_set_results = utf8') cur.execute('set collation_connection = utf8_general_ci') cur.execute('set collation_server = utf8_general_ci') result = cur.fetchmany(cur.execute(sql)) cur.close() self.conn.commit() return result '''析構方法''' def __del__(self): if self.conn: self.conn.close() else: pass adcode = "110000" key = "6ca7b720f2ab2a48f749c1e19c3d1c47" def crawl_weather(adcode): url = "https://restapi.amap.com/v3/weather/weatherInfo?city=%s&key=%s" % (adcode,key) html = getHtml(url) jsondata = json.loads(html) #省份 province = jsondata['lives'][0]['province'] #市 city = jsondata['lives'][0]['province'] #區域編碼 adcode = jsondata['lives'][0]['adcode'] #天氣現象 weather = jsondata['lives'][0]['weather'] #實時氣溫 攝氏度 temperature = jsondata['lives'][0]['temperature'] #風向 winddirection = jsondata['lives'][0]['winddirection'] #風力 windpower = jsondata['lives'][0]['windpower'] #空氣濕度 humidity = jsondata['lives'][0]['humidity'] #數據發布時間 reporttime = jsondata['lives'][0]['reporttime'] #日期 date_id = parse(reporttime).strftime('%Y%m%d') #小時 hour_id = parse(reporttime).strftime('%H') print province,city,adcode,weather,temperature,winddirection,windpower,humidity,reporttime,date_id,hour_id sql = "insert ignore into fact_weather_adcode_day values ('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s');" % (province,city,adcode,weather,temperature,winddirection,windpower,humidity,reporttime,date_id,hour_id) print sql conn.execSQL(sql) conn = ConMysql() crawl_weather(adcode)