python 写hive循环脚本


 

要一个月的数据 一次跑一周的数据 ,建表用插入数据的方法:

 

 1 import os, sys  2 reload(sys); sys.setdefaultencoding( "utf-8" )  3 flag_server = int(os.popen('ifconfig | grep "inet addr:172" | wc -l').read().strip())  4 dir_scripts = '/app/home/zhangb/' if flag_server else '/Users/zhangb/Desktop/'
 5 dir_data = '/app/home/zhangbo/' if flag_server else '/Users/zhangb/Desktop/'
 6 dir_server = '/app/home/'
 7 sys.path.append(dir_scripts)  8 
 9 import datetime 10 import time 11 db_name = 'zhangb'
12 
13 def hive_day_cid(create_date,type11_duration): 14         #原始表geohash表关联,找到cid
15         # ------------------ 建立 geohash 表 -------------------- #
16         print "# ---------------------------------------------------------------------------------- #"
17         
18         start_date_str = (create_date - datetime.timedelta(days=type11_duration-1)).strftime("%Y%m%d") 19         end_date_str   = create_date.strftime("%Y%m%d") 20         
21         hive_command = ( '''
22 hive -e " use %s; 23  create table if not exists hive_day_cid_provider(provider string,day int, cnt_cid bigint,dist_cid bigint ); 24 
25  insert into hive_day_cid_provider 26  select a.provider,a.day,count(a.cid) as cnt_cid,count(distinct(a.cid)) as dist_cid from 27  (select day,provider,token_md5 as cid from report_ods_mdp.upload_bi_type11 28  where day >=%s and day <= %s and length(token_md5)>0 and provider in ('gps','network','passive','none') ) a 29  group by a.provider,a.day 30     
31 ;" 32     ''' % ( db_name,start_date_str, end_date_str) ) 33 
34         print hive_command 35         if flag_server: 36  os.system(hive_command) 37         print "\n"
38         
39         
40 
41 
42 if __name__ == '__main__': 43     
44     start = time.time() 45     business_name = 'brand48'
46     # ----------------------------------------
47     #7号是这个周期中的最后一天,是周期结束日期
48     for (i,j) in [ (11,7),(11,14),(11,28),(12,5),(12,12),(12,19),(12,26)]: 49     # for (i, j) in [(2, 7), (2, 14), (2, 21), (2, 28)]:
50         create_date = datetime.date(2016,i,j) 51         type11_duration=7
52  hive_day_cid(create_date,type11_duration) 53         print "\r"
54         print '# Time: ', str(datetime.timedelta(seconds=(time.time() - start))) 55         print '# the end'
56         print '\n'
57     
58     #hive_imei_time_list(create_date,type11_duration)
59     print '# Time: ', str(datetime.timedelta(seconds=(time.time() - start))) 60         
61      # Beintoo_day.hive_output(create_date, cnt_duration=7)
62 '''    
63 for i in range(1,30): 64  a=datetime.date(2016, 2, 23) 65  b=a+ datetime.timedelta(7*i) 66  print b 67 '''
68 #===跨年的时候处理方法1
69 '''
70 date_begin = datetime.date(2016,12,1) 71  # date_end = date_begin 72  date_end = datetime.date(2017,1,10) 73  for i in range(0,(date_end - date_begin).days+1,7): 74  create_date = date_begin + datetime.timedelta(days=i) 75 
76  print create_date 77 
78 #方法2 79 date_begin = datetime.date(2016,12,1) 80  # date_end = date_begin 81  date_end = datetime.date(2017,1,10) 82 
83  while date_begin <= date_end: 84  print date_begin 85  date_begin = date_begin + datetime.timedelta(days=7) 86 '''

 


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM