環境准備
集成jar包:hudi-hadoop-mr-bundle-0.10.1.jar,放入$HIVE_HOME/lib目錄下
建外部表
create database db_hudi; use db_hudi; CREATE EXTERNAL TABLE IF NOT EXISTS tbl_hudi_didi( order_id BIGINT, product_id INT, city_id INT, district INT, county INT, type INT, combo_type INT, traffic_type INT, passenger_count INT, driver_product_id INT, start_dest_distance INT, arrive_time STRING, departure_time STRING, pre_total_fee DOUBLE, normal_time STRING, bubble_trace_id STRING, product_1level INT, dest_lng DOUBLE, dest_lat DOUBLE, starting_lng DOUBLE, starting_lat DOUBLE, ts BIGINT, partitionpath STRING ) PARTITIONED BY( date_str string ) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hudi.hadoop.HoodieParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' LOCATION '/hudi-warehouse/tbl_didi_haikou';
手動加入分區
--手動添加分區 ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-22') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-22'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-23') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-23'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-24') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-24'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-25') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-25'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-26') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-26'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-27') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-27'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-28') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-28'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-29') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-29'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-30') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-30'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-31') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-31'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-1') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-1'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-2') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-2'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-3') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-3'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-4') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-4'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-5') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-5'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-6') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-6'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-7') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-7'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-8') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-8'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-9') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-9'; ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-10') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-10';
查看分區
SHOW PARTITIONS db_hudi.tbl_hudi_didi;

指標統計
-- 開發測試,設置運行模式為本地模式 set hive.exec.mode.local.auto = true; set hive.exec.mode.local.auto.tasks.max = 10; set hive.exec.mode.local.auto.inputbytes.max=88801103; set hive.exec.mode.local.auto.input.files.max=50; SET hive.mapred.mode=nonstrict; -- 指標一:訂單類型統計 WITH tmp as ( SELECT product_id, COUNT(1) AS total FROM db_hudi.tbl_hudi_didi GROUP BY product_id ) SELECT CASE product_id WHEN 1 THEN "滴滴專車" WHEN 2 THEN "滴滴企業專車" WHEN 3 THEN "滴滴快車" WHEN 4 THEN "滴滴企業快車" ELSE "未知" END AS order_type, total FROM tmp ; -- 指標二:訂單時效性統計 WITH tmp as ( SELECT type, COUNT(1) AS total FROM db_hudi.tbl_hudi_didi GROUP BY type ) SELECT CASE type WHEN 0 THEN "實時" WHEN 1 THEN "預約" ELSE "未知" END AS order_type, total FROM tmp ; --指標三:訂單交通類型統計 SELECT traffic_type, COUNT(1) AS total FROM db_hudi.tbl_hudi_didi GROUP BY traffic_type; -- 指標五:訂單價格統計,先將價格划分區間,再統計,此處使用WHEN函數和SUM函數 SELECT SUM( CASE WHEN pre_total_fee BETWEEN 0 AND 15 THEN 1 ELSE 0 END ) AS 0_15, SUM( CASE WHEN pre_total_fee BETWEEN 16 AND 30 THEN 1 ELSE 0 END ) AS 16_30, SUM( CASE WHEN pre_total_fee BETWEEN 31 AND 50 THEN 1 ELSE 0 END ) AS 31_50, SUM( CASE WHEN pre_total_fee BETWEEN 51 AND 100 THEN 1 ELSE 0 END ) AS 51_100, SUM( CASE WHEN pre_total_fee > 100 THEN 1 ELSE 0 END ) AS 100_ FROM db_hudi.tbl_hudi_didi;