要一个月的数据 一次跑一周的数据 ,建表用插入数据的方法:
1 import os, sys 2 reload(sys); sys.setdefaultencoding( "utf-8" ) 3 flag_server = int(os.popen('ifconfig | grep "inet addr:172" | wc -l').read().strip()) 4 dir_scripts = '/app/home/zhangb/' if flag_server else '/Users/zhangb/Desktop/'
5 dir_data = '/app/home/zhangbo/' if flag_server else '/Users/zhangb/Desktop/'
6 dir_server = '/app/home/'
7 sys.path.append(dir_scripts) 8
9 import datetime 10 import time 11 db_name = 'zhangb'
12
13 def hive_day_cid(create_date,type11_duration): 14 #原始表geohash表关联,找到cid
15 # ------------------ 建立 geohash 表 -------------------- #
16 print "# ---------------------------------------------------------------------------------- #"
17
18 start_date_str = (create_date - datetime.timedelta(days=type11_duration-1)).strftime("%Y%m%d") 19 end_date_str = create_date.strftime("%Y%m%d") 20
21 hive_command = ( '''
22 hive -e " use %s; 23 create table if not exists hive_day_cid_provider(provider string,day int, cnt_cid bigint,dist_cid bigint ); 24
25 insert into hive_day_cid_provider 26 select a.provider,a.day,count(a.cid) as cnt_cid,count(distinct(a.cid)) as dist_cid from 27 (select day,provider,token_md5 as cid from report_ods_mdp.upload_bi_type11 28 where day >=%s and day <= %s and length(token_md5)>0 and provider in ('gps','network','passive','none') ) a 29 group by a.provider,a.day 30
31 ;" 32 ''' % ( db_name,start_date_str, end_date_str) ) 33
34 print hive_command 35 if flag_server: 36 os.system(hive_command) 37 print "\n"
38
39
40
41
42 if __name__ == '__main__': 43
44 start = time.time() 45 business_name = 'brand48'
46 # ----------------------------------------
47 #7号是这个周期中的最后一天,是周期结束日期
48 for (i,j) in [ (11,7),(11,14),(11,28),(12,5),(12,12),(12,19),(12,26)]: 49 # for (i, j) in [(2, 7), (2, 14), (2, 21), (2, 28)]:
50 create_date = datetime.date(2016,i,j) 51 type11_duration=7
52 hive_day_cid(create_date,type11_duration) 53 print "\r"
54 print '# Time: ', str(datetime.timedelta(seconds=(time.time() - start))) 55 print '# the end'
56 print '\n'
57
58 #hive_imei_time_list(create_date,type11_duration)
59 print '# Time: ', str(datetime.timedelta(seconds=(time.time() - start))) 60
61 # Beintoo_day.hive_output(create_date, cnt_duration=7)
62 '''
63 for i in range(1,30): 64 a=datetime.date(2016, 2, 23) 65 b=a+ datetime.timedelta(7*i) 66 print b 67 '''
68 #===跨年的时候处理方法1
69 '''
70 date_begin = datetime.date(2016,12,1) 71 # date_end = date_begin 72 date_end = datetime.date(2017,1,10) 73 for i in range(0,(date_end - date_begin).days+1,7): 74 create_date = date_begin + datetime.timedelta(days=i) 75
76 print create_date 77
78 #方法2 79 date_begin = datetime.date(2016,12,1) 80 # date_end = date_begin 81 date_end = datetime.date(2017,1,10) 82
83 while date_begin <= date_end: 84 print date_begin 85 date_begin = date_begin + datetime.timedelta(days=7) 86 '''