拉鏈表測試:
有如下測試數據
--2019/12/1號訂單的全量數據 id status create_time operation_time 1 待支付 2019-12-01 2 待支付 2019-12-01 3 已支付 2019-12-01 --2019/12/2號訂單的全量數據 id status create_time operation_time 1 待支付 2019-12-01 2 已支付 2019-12-01 2019-12-02 3 已支付 2019-12-01 4 待支付 2019-12-02 5 已支付 2019-12-02
--創建訂單表 drop table if exists order_info; create table order_info( id int, status string, create_time string, operate_time string ) partitioned by(dt string) row format delimited fields terminated by '\t';
--加載12/1的數據 load data local inpath "/opt/data/order_info1" into table chain.order_info partition(dt='2019-12-01'); --加載12/2的數據 load data local inpath "/opt/data/order_info2" into table chain.order_info partition(dt='2019-12-02');
--創建拉鏈表 drop table if exists order_info_chain; create table order_info_chain( id int, status string, create_time string, operate_time string, start_time string, end_time string ) row format delimited fields terminated by '\t'; --初始化拉鏈表,加載12/1號的數據 insert overwrite table order_info_chain select id, status, create_time, operate_time, '2019-12-01', '9999-99-99' from order_info where dt='2019-12-01';
--訂單變化表 create table order_change( id string, status string, create_time string, operate_time string ) partitioned by (dt string); --灌入12/2變化和新增的數據,通過create_time,create_time判斷 insert overwrite table order_change partition(dt='2019-12-02') select id, status, create_time, operate_time from order_info where create_time='2019-12-02' or operate_time='2019-12-02';
--創建臨時拉鏈表 create table tmp_chain( id string, status string, create_time string COMMENT '創建時間', operate_time string COMMENT '修改時間', start_time string COMMENT '有效開始時間', end_time string COMMENT '有效結束時間' );
--向臨時拉鏈表導入數據 insert overwrite table tmp_chain select * from ( select id, status, create_time, operate_time, '2019-12-02' as start_time, '9999-99-99' as end_time from order_change where dt='2019-12-02' union all --修改發生過更新的記錄的end_time為前一天 select orch.id, orch.status, orch.create_time, orch.operate_time, orch.start_time, if(ch.id is null, orch.end_time, date_add(ch.dt,-1)) as end_time from order_info_chain orch left join (select * from order_change where dt='2019-12-02') ch on orch.id = ch.id and orch.end_time='9999-99-99' ) t order by t.id,t.start_time;
--把臨時表覆蓋給拉鏈表 insert overwrite table order_info_chain select * from tmp_chain;