第2节 网站点击流项目(下):5访客分析
Posted mediocreworld
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了第2节 网站点击流项目(下):5访客分析相关的知识,希望对你有一定的参考价值。
-- 独立访客
--需求:按照时间维度来统计独立访客及其产生的pv量
按照时间维度比如小时来统计独立访客及其产生的 pv 。
时间维度:时
drop table dw_user_dstc_ip_h;
create table dw_user_dstc_ip_h(
remote_addr string,
pvs bigint,
hour string);
insert into table dw_user_dstc_ip_h
select remote_addr,count(1) as pvs,concat(month,day,hour) as hour
from ods_weblog_detail
Where datestr=‘20130918‘
group by concat(month,day,hour),remote_addr;
--在上述基础之上,可以继续分析,比如每小时独立访客总数
select count(1) as dstc_ip_cnts,hour from dw_user_dstc_ip_h group by hour;
时间维度:日
select remote_addr,count(1) as counts,concat(month,day) as day
from ods_weblog_detail
Where datestr=‘20130918‘
group by concat(month,day),remote_addr;
时间维度: 月
select remote_addr,count(1) as counts,month
from ods_weblog_detail
group by month,remote_addr;
----------------------------------------------------------------------------------------
-- 每日新访客
-- 需求:将每天的新访客统计出来。
--历史去重访客累积表
drop table dw_user_dsct_history;
create table dw_user_dsct_history(
day string,
ip string)
partitioned by(datestr string);
--每日新访客表
drop table dw_user_new_d;
create table dw_user_new_d (
day string,
ip string)
partitioned by(datestr string);
select a.remote_addr ,a.day
from (
select remote_addr,‘20130918‘ as day
from ods_weblog_detail newIp
where datestr =‘20130918‘
group by remote_addr
) a
left join dw_user_dsct_history hist
on a.remote_addr = hist.ip
where hist.ip is null;
--每日新用户插入新访客表(写的太麻烦,有空优化之)
insert into table dw_user_new_d partition(datestr=‘20130918‘)
select tmp.day as day,tmp.today_addr as new_ip
from
(
select today.day as day,today.remote_addr as today_addr,old.ip as old_addr
from
(
select distinct remote_addr as remote_addr,"20130918" as day
from ods_weblog_detail where datestr="20130918"
) today
left outer join
dw_user_dsct_history old
on today.remote_addr=old.ip
) tmp
where tmp.old_addr is null;
--每日新用户追加到历史累计表
insert into table dw_user_dsct_history partition(datestr=‘20130918‘)
select day,ip from dw_user_new_d where datestr=‘20130918‘;
验证:
select count(distinct remote_addr) from ods_weblog_detail;
select count(1) from dw_user_dsct_history where datestr=‘20130918‘;
select count(1) from dw_user_new_d where datestr=‘20130918‘;
以上是关于第2节 网站点击流项目(下):5访客分析的主要内容,如果未能解决你的问题,请参考以下文章