拉鏈表需求:
1.數據量比較大
2.變化的比例和頻率比較小,例如客戶的住址信息,聯系方式等,比如有1千萬的用戶數據,每天全量存儲會存儲很多不變的信息,對存儲也是浪費,因此可以使用拉鏈表的算法來節省存儲空間
3.拉鏈歷史表,既能反映每個客戶不同時間的不同狀態,也可查看某個時間點的全量快照信息
拉鏈表設計

設計的拉鏈歷史表:

反映A客戶的狀態信息
select * from ods_account where cst_id='A';

反映20190601歷史數據:
select * from ods_account where eff_date<='20190601' and end_date>'20190601';

反映20190602歷史全量數據:
select * from ods_account where eff_date<='20190602' and end_date>'20190602';

建表:
use edw; drop table if exists src_account; create table if not exists src_account( cst_id varchar(64) comment '客戶唯一編號', bal float comment '余額', date_id varchar(16) comment '日期' )ENGINE=InnoDB DEFAULT CHARSET=utf8; alter table src_account add primary key(cst_id,date_id); drop table if exists delta_account; create table if not exists delta_account( cst_id varchar(64) comment '客戶唯一編號', bal float comment '余額', etl_flag varchar(16) comment 'ETL標記' )ENGINE=InnoDB DEFAULT CHARSET=utf8; alter table delta_account add primary key(cst_id,etl_flag); drop table if exists odshis_account; create table if not exists odshis_account( cst_id varchar(64) comment '客戶唯一編號', bal float comment '余額', eff_date varchar(16) comment '生效日期', end_date varchar(16) comment '失效日期', job_seq_id varchar(16) comment '批次號', new_job_seq_id varchar(16) comment '最新批次號' )ENGINE=InnoDB DEFAULT CHARSET=utf8; alter table odshis_account add primary key(cst_id,new_job_seq_id); drop table if exists ods_account; create table if not exists ods_account( cst_id varchar(64) comment '客戶唯一編號', bal float comment '余額', eff_date varchar(16) comment '生效日期', end_date varchar(16) comment '失效日期', job_seq_id varchar(16) comment '批次號' )ENGINE=InnoDB DEFAULT CHARSET=utf8; alter table ods_account add primary key(cst_id,eff_date,end_date);
加載原始數據:
delete from src_account; insert into src_account values('A','20','20190601'); insert into src_account values('B','30','20190601'); insert into src_account values('C','50','20190601'); insert into src_account values('D','35','20190601'); insert into src_account values('A','20','20190602'); insert into src_account values('B','50','20190602'); insert into src_account values('D','20','20190602'); insert into src_account values('E','50','20190602'); insert into src_account values('A','30','20190603'); insert into src_account values('B','50','20190603'); insert into src_account values('C','50','20190603'); insert into src_account values('D','20','20190603'); insert into src_account values('A','30','20190604'); insert into src_account values('B','40','20190604'); insert into src_account values('C','30','20190604'); insert into src_account values('D','20','20190604'); insert into src_account values('E','20','20190604'); insert into src_account values('F','20','20190604'); insert into src_account values('G','20','
