注意: 斷一天 也算連續登陸
1、創建建表語句
create table demo( id string, dt string ) row format delimited fields terminated by '\t';
2、數據准備
1001 2021-08-01 1001 2021-08-02 1001 2021-08-03 1001 2021-08-05 1001 2021-08-06 1001 2021-08-07 1001 2021-08-10 1001 2021-08-12
3、數據導入
load data local inpath '/home/hadoop/demo.txt' into table demo;
4、sql 語句

select id, flag, datediff(max(dt), min(dt)) + 1 as days from( select id, dt, lag_dt, dt_diff, sum(if(dt_diff > 2, 1, 0)) over(partition by id order by dt) as flag from( select id, dt, lag_dt, datediff(dt, lag_dt) as dt_diff from( select id, dt, lag(dt, 1, '1970-01-01') over(partition by id order by dt) as lag_dt from demo ) t1 ) t2 ) t3 group by id, flag having datediff(max(dt), min(dt)) + 1 > 6
5、sql 解析
首先將日期下移一位,用於得到當前日期和上一個日期之間相差的天數
select id, dt, lag_dt, datediff(dt, lag_dt) as dt_diff from( select id, dt, lag(dt, 1, '1970-01-01') over(partition by id order by dt) as lag_dt from demo ) t1
針對相差的天數大於2的做等差數列,區別是否連續天數
select id, dt, lag_dt, dt_diff, sum(if(dt_diff > 2, 1, 0)) over(partition by id order by dt) as flag -- 根據連續的定義更改 dt_diff > 2 值就可以 from( select id, dt, lag_dt, datediff(dt, lag_dt) as dt_diff from( select id, dt, lag(dt, 1, '1970-01-01') over(partition by id order by dt) as lag_dt from demo ) t1 ) t2
分組聚合,查找連續登陸天數的最大值
select id, flag, datediff(max(dt), min(dt)) + 1 as days from( select id, dt, lag_dt, dt_diff, sum(if(dt_diff > 2, 1, 0)) over(partition by id order by dt) as flag from( select id, dt, lag_dt, datediff(dt, lag_dt) as dt_diff from( select id, dt, lag(dt, 1, '1970-01-01') over(partition by id order by dt) as lag_dt from demo ) t1 ) t2 ) t3 group by id, flag