在hive上建表與普通分區表創建方法一樣;
1 CREATE TABLE `dwa_m_user_association_circle`( 2 `device_number` string, 3 `oppo_number` string, 4 `prov_id_oppo` string, 5 `area_id_oppo` string, 6 `dealer_oppo` string, 7 `short_call_nums` bigint, 8 `long3_call_nums` bigint, 9 `long5_call_nums` bigint, 10 `long10_call_nums` bigint, 11 `short_total_nums` bigint, 12 `long3_total_nums` bigint, 13 `long5_total_nums` bigint, 14 `long10_total_nums` bigint, 15 `area_id` string) 16 PARTITIONED BY ( 17 `month_id` string, 18 `prov_id` string, 19 `dealer` string) 20 ROW FORMAT DELIMITED 21 FIELDS TERMINATED BY '|' 22 NULL DEFINED AS '' 23 STORED AS INPUTFORMAT 24 'org.apache.hadoop.mapred.TextInputFormat' 25 OUTPUTFORMAT 26 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' 27 LOCATION 28 'hdfs://beh/user/hive/warehouse/all_ana_pro.db/dwa_m_user_association_circle' 29 TBLPROPERTIES ( 30 'numPartitions'='248', 31 'numFiles'='648', 32 'transient_lastDdlTime'='1542952067', 33 'totalSize'='247584222644', 34 'numRows'='0', 35 'rawDataSize'='0')
在shell腳本中,需設置的參數:
set hive.exec.dynamic.partition=true; #開啟動態分區,默認是false
set hive.exec.dynamic.partition.mode=nostrict; #開啟允許所有分區都是動態的,否則必須要有靜態分區才能使用。
set hive.exec.max.created.files=1000000; #允許創建的最大文件數,當分區是2個或三個分區時,文件會被分成很多小文件,該設置就是將文件的最大數目設成100w;
1 insert overwrite table ALL_ANA_PRO.dwa_m_user_association_circle PARTITION (month_id=${v_month},prov_id,dealer) 2 select device_number, 3 oppo_number , 4 prov_id_oppo , 5 area_id_oppo , 6 dealer_oppo , 7 short_call_nums, 8 long3_call_nums, 9 long5_call_nums, 10 long10_call_nums, 11 sum(short_call_nums) over(distribute by device_number) short_total_nums, 12 sum(long3_call_nums ) over(distribute by device_number) long3_total_nums, 13 sum(long5_call_nums) over(distribute by device_number) long5_total_nums, 14 sum(long10_call_nums) over(distribute by device_number) long10_total_nums, 15 area_id , 16 prov_id , 17 dealer 18 from 19 (SELECT device_number, 20 prov_id , 21 area_id , 22 dealer , 23 oppo_number , 24 prov_id_oppo , 25 area_id_oppo , 26 dealer_oppo , 27 sum(case when t.bill_times < 60 then 1 else 0 end ) short_call_nums, 28 sum(case when t.bill_times >180 then 1 else 0 end ) long3_call_nums, 29 sum(case when t.bill_times >300 then 1 else 0 end ) long5_call_nums, 30 sum(case when t.bill_times >600 then 1 else 0 end ) long10_call_nums 31 FROM ( SELECT prov_id , 32 area_id , 33 device_number, 34 0 dealer , 35 prov_id_oppo , 36 area_id_oppo , 37 oppo_number , 38 dealer_oppo , 39 bill_times 40 FROM ALL_ANA_PRO.DWA_M_CALL_RING_BASE t 41 WHERE month_id = '${v_month}' 42 AND ticket_type = 1 43 AND dealer_oppo > -1 44 UNION ALL 45 SELECT prov_id_oppo prov_id , 46 area_id_oppo area_id , 47 oppo_number device_number , 48 dealer_oppo dealer , 49 prov_id prov_id_oppo , 50 area_id area_id_oppo , 51 device_number oppo_number , 52 0 dealer_oppo , 53 bill_times 54 FROM ALL_ANA_PRO.DWA_M_CALL_RING_BASE t 55 WHERE month_id = '${v_month}' 56 AND ticket_type = 1 57 AND dealer_oppo > -1 58 ) t 59 GROUP BY 60 device_number, 61 prov_id , 62 area_id , 63 dealer , 64 oppo_number , 65 prov_id_oppo , 66 area_id_oppo , 67 dealer_oppo 68 ) t;
這里,需要說明的是,向目標表插數:
insert overwrite table ALL_ANA_PRO.dwa_m_user_association_circle PARTITION (month_id=${v_month},prov_id,dealer)
其中select語句中動態分區prov_id,dealer兩個字段一般在最后面;