求某段时间内用户的连续活跃区间

Posted fullfresh

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了求某段时间内用户的连续活跃区间相关的知识,希望对你有一定的参考价值。

drop table user_active ;
create table user_active(uid String,dt String)
row format delimited fields terminated by ',' stored as textfile;

``--原始数据
spark,2021-01-01
spark,2021-01-02
spark,2021-01-03
spark,2021-01-06
spark,2021-01-07
spark,2021-01-09
hive,2021-01-01
hive,2021-01-03
hive,2021-01-10
hive,2021-01-11
hive,2021-01-12
hive,2021-01-13

load data local inpath '/root/user_active.txt' overwrite into table user_active;

select 
t.uid,
date_sub(t.dt,t.rn) as sub,
min(t.dt),
count(1),
max(t.dt) 
from
    ( 
     select
       uid,
       dt,
       row_number() over(partition by uid order by dt) as rn
     from 
       user_active 
     where  dt>='2021-01-01' and dt<'2021-01-30' 
    )  t
group by uid,date_sub(t.dt,t.rn);

--子查询结果
uid     dt             rn
hive    2021-01-01      1
hive    2021-01-03      2
hive    2021-01-10      3
hive    2021-01-11      4
hive    2021-01-12      5
hive    2021-01-13      6
spark   2021-01-01      1
spark   2021-01-02      2
spark   2021-01-03      3
spark   2021-01-06      4
spark   2021-01-07      5
spark   2021-01-09      6


select 
t.uid,
date_sub(t.dt,t.rn) as sub
from
    (select
       uid,
       dt,
       row_number() over(partition by uid order by dt) as rn
     from 
       user_active 
     where  dt>='2021-01-01' and dt<'2021-01-30' 
    )  t
;

t.uid   sub
hive    2020-12-31
hive    2021-01-01
hive    2021-01-07
hive    2021-01-07
hive    2021-01-07
hive    2021-01-07

spark   2020-12-31
spark   2020-12-31
spark   2020-12-31
spark   2021-01-02
spark   2021-01-02
spark   2021-01-03

--最终结果
--uid    --sub         --活跃开始区间   --活跃天数   -- 区间截止时间
hive    2020-12-31      2021-01-01      1       2021-01-01
hive    2021-01-01      2021-01-03      1       2021-01-03
hive    2021-01-07      2021-01-10      4       2021-01-13

spark   2020-12-31      2021-01-01      3       2021-01-03
spark   2021-01-02      2021-01-06      2       2021-01-07
spark   2021-01-03      2021-01-09      1       2021-01-09`

以上是关于求某段时间内用户的连续活跃区间的主要内容,如果未能解决你的问题,请参考以下文章

最近七天内连续三天活跃用户数

面试题: Hive-SQL查询连续活跃登录用户思路详解

面试题: Hive-SQL查询连续活跃登录用户思路详解

面试题: Hive-SQL查询连续活跃登录用户思路详解

hive中如何快速查询区间留存

大数据 SQL 题目