Hive拉鏈表實現


拉鏈表測試:

有如下測試數據

--2019/12/1號訂單的全量數據
id    status    create_time    operation_time
1    待支付    2019-12-01    
2    待支付    2019-12-01    
3    已支付    2019-12-01    

--2019/12/2號訂單的全量數據
id    status    create_time    operation_time
1    待支付    2019-12-01    
2    已支付    2019-12-01    2019-12-02
3    已支付    2019-12-01    
4    待支付    2019-12-02    
5    已支付    2019-12-02    

 

--創建訂單表
drop table if exists order_info;
create table order_info(
id int, 
status string,
create_time string,
operate_time string
)
partitioned by(dt string)
row format
delimited fields terminated by '\t';

 

--加載12/1的數據
load data local inpath "/opt/data/order_info1" into table chain.order_info partition(dt='2019-12-01');

--加載12/2的數據
load data local inpath "/opt/data/order_info2" into table chain.order_info partition(dt='2019-12-02');

 

--創建拉鏈表
drop table if exists order_info_chain;
create table order_info_chain(
id int, 
status string,
create_time string,
operate_time string,
start_time string,
end_time string
)
row format 
delimited fields terminated by '\t';

--初始化拉鏈表,加載12/1號的數據
insert overwrite table order_info_chain
select 
id,
status,
create_time,
operate_time,
'2019-12-01',
'9999-99-99'
from order_info
where dt='2019-12-01';

 

--訂單變化表
create table order_change(
id string,
status string,
create_time string,
operate_time string
) partitioned by (dt string);

--灌入12/2變化和新增的數據,通過create_time,create_time判斷
insert overwrite table order_change partition(dt='2019-12-02')
select 
id,
status,
create_time,
operate_time
from order_info where create_time='2019-12-02' or operate_time='2019-12-02';

 

--創建臨時拉鏈表
create table tmp_chain(
id string,
status string,
create_time string COMMENT '創建時間',
operate_time string COMMENT '修改時間',
start_time string COMMENT '有效開始時間',
end_time string COMMENT '有效結束時間'
);

 

--向臨時拉鏈表導入數據
insert overwrite table tmp_chain
select * from
(
    select
        id,
        status,
        create_time,
        operate_time,
        '2019-12-02' as start_time,
        '9999-99-99' as end_time
    from order_change where dt='2019-12-02'
    
    union all
    
    --修改發生過更新的記錄的end_time為前一天
    select 
        orch.id,
        orch.status,
        orch.create_time,
        orch.operate_time,
        orch.start_time,
        if(ch.id is null, orch.end_time, date_add(ch.dt,-1)) as end_time
    from order_info_chain orch
    left join
        (select * from order_change where dt='2019-12-02') ch
    on orch.id = ch.id and orch.end_time='9999-99-99'
) t 
order by t.id,t.start_time;

 

--把臨時表覆蓋給拉鏈表
insert overwrite table order_info_chain
select * from tmp_chain;

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM