shell腳本中向hive動態分區插入數據


在hive上建表與普通分區表創建方法一樣;

 1 CREATE  TABLE `dwa_m_user_association_circle`(
 2   `device_number` string, 
 3   `oppo_number` string, 
 4   `prov_id_oppo` string, 
 5   `area_id_oppo` string, 
 6   `dealer_oppo` string, 
 7   `short_call_nums` bigint, 
 8   `long3_call_nums` bigint, 
 9   `long5_call_nums` bigint, 
10   `long10_call_nums` bigint, 
11   `short_total_nums` bigint, 
12   `long3_total_nums` bigint, 
13   `long5_total_nums` bigint, 
14   `long10_total_nums` bigint, 
15   `area_id` string)
16 PARTITIONED BY ( 
17   `month_id` string, 
18   `prov_id` string, 
19   `dealer` string)
20 ROW FORMAT DELIMITED 
21   FIELDS TERMINATED BY '|' 
22   NULL DEFINED AS '' 
23 STORED AS INPUTFORMAT 
24   'org.apache.hadoop.mapred.TextInputFormat' 
25 OUTPUTFORMAT 
26   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
27 LOCATION
28   'hdfs://beh/user/hive/warehouse/all_ana_pro.db/dwa_m_user_association_circle'
29 TBLPROPERTIES (
30   'numPartitions'='248', 
31   'numFiles'='648', 
32   'transient_lastDdlTime'='1542952067', 
33   'totalSize'='247584222644', 
34   'numRows'='0', 
35   'rawDataSize'='0')

 

在shell腳本中,需設置的參數: 

set hive.exec.dynamic.partition=true;                     #開啟動態分區,默認是false
set hive.exec.dynamic.partition.mode=nostrict;      #開啟允許所有分區都是動態的,否則必須要有靜態分區才能使用。

set hive.exec.max.created.files=1000000;             #允許創建的最大文件數,當分區是2個或三個分區時,文件會被分成很多小文件,該設置就是將文件的最大數目設成100w;

 1 insert overwrite table ALL_ANA_PRO.dwa_m_user_association_circle  PARTITION (month_id=${v_month},prov_id,dealer) 
 2 select device_number,
 3         oppo_number  ,
 4         prov_id_oppo ,
 5         area_id_oppo ,
 6         dealer_oppo ,
 7         short_call_nums,
 8         long3_call_nums,
 9         long5_call_nums,
10         long10_call_nums,
11         sum(short_call_nums) over(distribute by device_number)  short_total_nums,
12         sum(long3_call_nums ) over(distribute by device_number) long3_total_nums,
13         sum(long5_call_nums) over(distribute by  device_number) long5_total_nums,
14         sum(long10_call_nums) over(distribute by device_number) long10_total_nums,
15         area_id      ,
16         prov_id      ,
17         dealer   
18 from 
19         (SELECT device_number,
20                 prov_id      ,
21                 area_id      ,
22                 dealer       ,
23                 oppo_number  ,
24                 prov_id_oppo ,
25                 area_id_oppo ,
26                 dealer_oppo  ,
27                 sum(case when t.bill_times < 60 then 1 else 0 end ) short_call_nums,
28                 sum(case when t.bill_times >180 then 1 else 0 end ) long3_call_nums,
29                 sum(case when t.bill_times >300 then 1 else 0 end ) long5_call_nums,
30                 sum(case when t.bill_times >600 then 1 else 0 end ) long10_call_nums
31         FROM ( SELECT prov_id      ,
32                       area_id      ,
33                       device_number,
34                       0 dealer     ,
35                       prov_id_oppo ,
36                       area_id_oppo ,
37                       oppo_number  ,
38                       dealer_oppo  ,
39                       bill_times
40         FROM   ALL_ANA_PRO.DWA_M_CALL_RING_BASE t
41         WHERE  month_id = '${v_month}'
42         AND    ticket_type = 1
43         AND    dealer_oppo > -1
44         UNION ALL
45         SELECT prov_id_oppo prov_id      ,
46               area_id_oppo area_id      ,
47               oppo_number device_number ,
48               dealer_oppo dealer        ,
49               prov_id prov_id_oppo      ,
50               area_id area_id_oppo      ,
51               device_number oppo_number ,
52               0 dealer_oppo             ,
53               bill_times
54         FROM   ALL_ANA_PRO.DWA_M_CALL_RING_BASE t
55         WHERE  month_id = '${v_month}'
56         AND    ticket_type = 1
57         AND    dealer_oppo > -1
58            ) t
59         GROUP BY
60               device_number,
61               prov_id      ,
62               area_id      ,
63               dealer       ,
64               oppo_number  ,
65               prov_id_oppo ,
66               area_id_oppo ,
67               dealer_oppo
68         ) t;

 這里,需要說明的是,向目標表插數:

insert overwrite table ALL_ANA_PRO.dwa_m_user_association_circle  PARTITION (month_id=${v_month},prov_id,dealer)
其中select語句中動態分區prov_id,dealer兩個字段一般在最后面;

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM