求某段时间内用户的连续活跃区间
Posted fullfresh
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了求某段时间内用户的连续活跃区间相关的知识,希望对你有一定的参考价值。
drop table user_active ;
create table user_active(uid String,dt String)
row format delimited fields terminated by ',' stored as textfile;
``--原始数据
spark,2021-01-01
spark,2021-01-02
spark,2021-01-03
spark,2021-01-06
spark,2021-01-07
spark,2021-01-09
hive,2021-01-01
hive,2021-01-03
hive,2021-01-10
hive,2021-01-11
hive,2021-01-12
hive,2021-01-13
load data local inpath '/root/user_active.txt' overwrite into table user_active;
select
t.uid,
date_sub(t.dt,t.rn) as sub,
min(t.dt),
count(1),
max(t.dt)
from
(
select
uid,
dt,
row_number() over(partition by uid order by dt) as rn
from
user_active
where dt>='2021-01-01' and dt<'2021-01-30'
) t
group by uid,date_sub(t.dt,t.rn);
--子查询结果
uid dt rn
hive 2021-01-01 1
hive 2021-01-03 2
hive 2021-01-10 3
hive 2021-01-11 4
hive 2021-01-12 5
hive 2021-01-13 6
spark 2021-01-01 1
spark 2021-01-02 2
spark 2021-01-03 3
spark 2021-01-06 4
spark 2021-01-07 5
spark 2021-01-09 6
select
t.uid,
date_sub(t.dt,t.rn) as sub
from
(select
uid,
dt,
row_number() over(partition by uid order by dt) as rn
from
user_active
where dt>='2021-01-01' and dt<'2021-01-30'
) t
;
t.uid sub
hive 2020-12-31
hive 2021-01-01
hive 2021-01-07
hive 2021-01-07
hive 2021-01-07
hive 2021-01-07
spark 2020-12-31
spark 2020-12-31
spark 2020-12-31
spark 2021-01-02
spark 2021-01-02
spark 2021-01-03
--最终结果
--uid --sub --活跃开始区间 --活跃天数 -- 区间截止时间
hive 2020-12-31 2021-01-01 1 2021-01-01
hive 2021-01-01 2021-01-03 1 2021-01-03
hive 2021-01-07 2021-01-10 4 2021-01-13
spark 2020-12-31 2021-01-01 3 2021-01-03
spark 2021-01-02 2021-01-06 2 2021-01-07
spark 2021-01-03 2021-01-09 1 2021-01-09`
以上是关于求某段时间内用户的连续活跃区间的主要内容,如果未能解决你的问题,请参考以下文章