数仓搭建DWT层
Posted 今夜月色很美
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了数仓搭建DWT层相关的知识,希望对你有一定的参考价值。
1、DWT层作用
以DWS层为基础,对数据进行累积汇总。一行信息代表一个主题对象的累积行为,例如一个用户从注册那天开始至今一共下了多少次单。
2、访客主题
1)建表语句
DROP TABLE IF EXISTS dwt_visitor_topic;
CREATE EXTERNAL TABLE dwt_visitor_topic
(
`mid_id` STRING COMMENT '设备id',
`brand` STRING COMMENT '手机品牌',
`model` STRING COMMENT '手机型号',
`channel` ARRAY<STRING> COMMENT '渠道',
`os` ARRAY<STRING> COMMENT '操作系统',
`area_code` ARRAY<STRING> COMMENT '地区ID',
`version_code` ARRAY<STRING> COMMENT '应用版本',
`visit_date_first` STRING COMMENT '首次访问时间',
`visit_date_last` STRING COMMENT '末次访问时间',
`visit_last_1d_count` BIGINT COMMENT '最近1日访问次数',
`visit_last_1d_day_count` BIGINT COMMENT '最近1日访问天数',
`visit_last_7d_count` BIGINT COMMENT '最近7日访问次数',
`visit_last_7d_day_count` BIGINT COMMENT '最近7日访问天数',
`visit_last_30d_count` BIGINT COMMENT '最近30日访问次数',
`visit_last_30d_day_count` BIGINT COMMENT '最近30日访问天数',
`visit_count` BIGINT COMMENT '累积访问次数',
`visit_day_count` BIGINT COMMENT '累积访问天数'
) COMMENT '设备主题宽表'
PARTITIONED BY (`dt` STRING)
STORED AS PARQUET
LOCATION '/warehouse/gmall/dwt/dwt_visitor_topic'
TBLPROPERTIES ("parquet.compression"="lzo");
2)数据装载
insert overwrite table dwt_visitor_topic partition(dt='2022-04-11')
select
nvl(1d_ago.mid_id,old.mid_id),
nvl(1d_ago.brand,old.brand),
nvl(1d_ago.model,old.model),
nvl(1d_ago.channel,old.channel),
nvl(1d_ago.os,old.os),
nvl(1d_ago.area_code,old.area_code),
nvl(1d_ago.version_code,old.version_code),
case when old.mid_id is null and 1d_ago.is_new=1 then '2022-04-11'
when old.mid_id is null and 1d_ago.is_new=0 then '2020-06-13'--无法获取准确的首次登录日期,给定一个数仓搭建日之前的日期
else old.visit_date_first end,
if(1d_ago.mid_id is not null,'2022-04-11',old.visit_date_last),
nvl(1d_ago.visit_count,0),
if(1d_ago.mid_id is null,0,1),
nvl(old.visit_last_7d_count,0)+nvl(1d_ago.visit_count,0)- nvl(7d_ago.visit_count,0),
nvl(old.visit_last_7d_day_count,0)+if(1d_ago.mid_id is null,0,1)- if(7d_ago.mid_id is null,0,1),
nvl(old.visit_last_30d_count,0)+nvl(1d_ago.visit_count,0)- nvl(30d_ago.visit_count,0),
nvl(old.visit_last_30d_day_count,0)+if(1d_ago.mid_id is null,0,1)- if(30d_ago.mid_id is null,0,1),
nvl(old.visit_count,0)+nvl(1d_ago.visit_count,0),
nvl(old.visit_day_count,0)+if(1d_ago.mid_id is null,0,1)
from
(
select
mid_id,
brand,
model,
channel,
os,
area_code,
version_code,
visit_date_first,
visit_date_last,
visit_last_1d_count,
visit_last_1d_day_count,
visit_last_7d_count,
visit_last_7d_day_count,
visit_last_30d_count,
visit_last_30d_day_count,
visit_count,
visit_day_count
from dwt_visitor_topic
where dt=date_add('2022-04-11',-1)
)old
full outer join
(
select
mid_id,
brand,
model,
is_new,
channel,
os,
area_code,
version_code,
visit_count
from dws_visitor_action_daycount
where dt='2022-04-11'
)1d_ago
on old.mid_id=1d_ago.mid_id
left join
(
select
mid_id,
brand,
model,
is_new,
channel,
os,
area_code,
version_code,
visit_count
from dws_visitor_action_daycount
where dt=date_add('2022-04-11',-7)
)7d_ago
on old.mid_id=7d_ago.mid_id
left join
(
select
mid_id,
brand,
model,
is_new,
channel,
os,
area_code,
version_code,
visit_count
from dws_visitor_action_daycount
where dt=date_add('2022-04-11',-30)
)30d_ago
on old.mid_id=30d_ago.mid_id;
3、用户主题
1)建表语句
DROP TABLE IF EXISTS dwt_user_topic;
CREATE EXTERNAL TABLE dwt_user_topic
(
`user_id` STRING COMMENT '用户id',
`login_date_first` STRING COMMENT '首次活跃日期',
`login_date_last` STRING COMMENT '末次活跃日期',
`login_date_1d_count` STRING COMMENT '最近1日登录次数',
`login_last_1d_day_count` BIGINT COMMENT '最近1日登录天数',
`login_last_7d_count` BIGINT COMMENT '最近7日登录次数',
`login_last_7d_day_count` BIGINT COMMENT '最近7日登录天数',
`login_last_30d_count` BIGINT COMMENT '最近30日登录次数',
`login_last_30d_day_count` BIGINT COMMENT '最近30日登录天数',
`login_count` BIGINT COMMENT '累积登录次数',
`login_day_count` BIGINT COMMENT '累积登录天数',
`order_date_first` STRING COMMENT '首次下单时间',
`order_date_last` STRING COMMENT '末次下单时间',
`order_last_1d_count` BIGINT COMMENT '最近1日下单次数',
`order_activity_last_1d_count` BIGINT COMMENT '最近1日订单参与活动次数',
`order_activity_reduce_last_1d_amount` DECIMAL(16,2) COMMENT '最近1日订单减免金额(活动)',
`order_coupon_last_1d_count` BIGINT COMMENT '最近1日下单用券次数',
`order_coupon_reduce_last_1d_amount` DECIMAL(16,2) COMMENT '最近1日订单减免金额(优惠券)',
`order_last_1d_original_amount` DECIMAL(16,2) COMMENT '最近1日原始下单金额',
`order_last_1d_final_amount` DECIMAL(16,2) COMMENT '最近1日最终下单金额',
`order_last_7d_count` BIGINT COMMENT '最近7日下单次数',
`order_activity_last_7d_count` BIGINT COMMENT '最近7日订单参与活动次数',
`order_activity_reduce_last_7d_amount` DECIMAL(16,2) COMMENT '最近7日订单减免金额(活动)',
`order_coupon_last_7d_count` BIGINT COMMENT '最近7日下单用券次数',
`order_coupon_reduce_last_7d_amount` DECIMAL(16,2) COMMENT '最近7日订单减免金额(优惠券)',
`order_last_7d_original_amount` DECIMAL(16,2) COMMENT '最近7日原始下单金额',
`order_last_7d_final_amount` DECIMAL(16,2) COMMENT '最近7日最终下单金额',
`order_last_30d_count` BIGINT COMMENT '最近30日下单次数',
`order_activity_last_30d_count` BIGINT COMMENT '最近30日订单参与活动次数',
`order_activity_reduce_last_30d_amount` DECIMAL(16,2) COMMENT '最近30日订单减免金额(活动)',
`order_coupon_last_30d_count` BIGINT COMMENT '最近30日下单用券次数',
`order_coupon_reduce_last_30d_amount` DECIMAL(16,2) COMMENT '最近30日订单减免金额(优惠券)',
`order_last_30d_original_amount` DECIMAL(16,2) COMMENT '最近30日原始下单金额',
`order_last_30d_final_amount` DECIMAL(16,2) COMMENT '最近30日最终下单金额',
`order_count` BIGINT COMMENT '累积下单次数',
`order_activity_count` BIGINT COMMENT '累积订单参与活动次数',
`order_activity_reduce_amount` DECIMAL(16,2) COMMENT '累积订单减免金额(活动)',
`order_coupon_count` BIGINT COMMENT '累积下单用券次数',
`order_coupon_reduce_amount` DECIMAL(16,2) COMMENT '累积订单减免金额(优惠券)',
`order_original_amount` DECIMAL(16,2) COMMENT '累积原始下单金额',
`order_final_amount` DECIMAL(16,2) COMMENT '累积最终下单金额',
`payment_date_first` STRING COMMENT '首次支付时间',
`payment_date_last` STRING COMMENT '末次支付时间',
`payment_last_1d_count` BIGINT COMMENT '最近1日支付次数',
`payment_last_1d_amount` DECIMAL(16,2) COMMENT '最近1日支付金额',
`payment_last_7d_count` BIGINT COMMENT '最近7日支付次数',
`payment_last_7d_amount` DECIMAL(16,2) COMMENT '最近7日支付金额',
`payment_last_30d_count` BIGINT COMMENT '最近30日支付次数',
`payment_last_30d_amount` DECIMAL(16,2) COMMENT '最近30日支付金额',
`payment_count` BIGINT COMMENT '累积支付次数',
`payment_amount` DECIMAL(16,2) COMMENT '累积支付金额',
`refund_order_last_1d_count` BIGINT COMMENT '最近1日退单次数',
`refund_order_last_1d_num` BIGINT COMMENT '最近1日退单件数',
`refund_order_last_1d_amount` DECIMAL(16,2) COMMENT '最近1日退单金额',
`refund_order_last_7d_count` BIGINT COMMENT '最近7日退单次数',
`refund_order_last_7d_num` BIGINT COMMENT '最近7日退单件数',
`refund_order_last_7d_amount` DECIMAL(16,2) COMMENT '最近7日退单金额',
`refund_order_last_30d_count` BIGINT COMMENT '最近30日退单次数',
`refund_order_last_30d_num` BIGINT COMMENT '最近30日退单件数',
`refund_order_last_30d_amount` DECIMAL(16,2) COMMENT '最近30日退单金额',
`refund_order_count` BIGINT COMMENT '累积退单次数',
`refund_order_num` BIGINT COMMENT '累积退单件数',
`refund_order_amount` DECIMAL(16,2) COMMENT '累积退单金额',
`refund_payment_last_1d_count` BIGINT COMMENT '最近1日退款次数',
`refund_payment_last_1d_num` BIGINT COMMENT '最近1日退款件数',
`refund_payment_last_1d_amount` DECIMAL(16,2) COMMENT '最近1日退款金额',
`refund_payment_last_7d_count` BIGINT COMMENT '最近7日退款次数',
`refund_payment_last_7d_num` BIGINT COMMENT '最近7日退款件数',
`refund_payment_last_7d_amount` DECIMAL(16,2) COMMENT '最近7日退款金额',
`refund_payment_last_30d_count` BIGINT COMMENT '最近30日退款次数',
`refund_payment_last_30d_num` BIGINT COMMENT '最近30日退款件数',
`refund_payment_last_30d_amount` DECIMAL(16,2) COMMENT '最近30日退款金额',
`refund_payment_count` BIGINT COMMENT '累积退款次数',
`refund_payment_num` BIGINT COMMENT '累积退款件数',
`refund_payment_amount` DECIMAL(16,2) COMMENT '累积退款金额',
`cart_last_1d_count` BIGINT COMMENT '最近1日加入购物车次数',
`cart_last_7d_count` BIGINT COMMENT '最近7日加入购物车次数',
`cart_last_30d_count` BIGINT COMMENT '最近30日加入购物车次数',
`cart_count` BIGINT COMMENT '累积加入购物车次数',
`favor_last_1d_count` BIGINT COMMENT '最近1日收藏次数',
`favor_last_7d_count` BIGINT COMMENT '最近7日收藏次数',
`favor_last_30d_count` BIGINT COMMENT '最近30日收藏次数',
`favor_count` BIGINT COMMENT '累积收藏次数',
`coupon_last_1d_get_count` BIGINT COMMENT '最近1日领券次数',
`coupon_last_1d_using_count` BIGINT COMMENT '最近1日用券(下单)次数',
`coupon_last_1d_used_count` BIGINT COMMENT '最近1日用券(支付)次数',
`coupon_last_7d_get_count` BIGINT COMMENT '最近7日领券次数',
`coupon_last_7d_using_count` BIGINT COMMENT '最近7日用券(下单)次数',
`coupon_last_7d_used_count` BIGINT COMMENT '最近7日用券(支付)次数',
`coupon_last_30d_get_count` BIGINT COMMENT '最近30日领券次数',
`coupon_last_30d_using_count` BIGINT COMMENT '最近30日用券(下单)次数',
`coupon_last_30d_used_count` BIGINT COMMENT '最近30日用券(支付)次数',
`coupon_get_count` BIGINT COMMENT '累积领券次数',
`coupon_using_count` BIGINT COMMENT '累积用券(下单)次数',
`coupon_used_count` BIGINT COMMENT '累积用券(支付)次数',
`appraise_last_1d_good_count` BIGINT COMMENT '最近1日好评次数',
`appraise_last_1d_mid_count` BIGINT COMMENT '最近1日中评次数',
`appraise_last_1d_bad_count` BIGINT COMMENT '最近1日差评次数',
`appraise_last_1d_default_count` BIGINT COMMENT '最近1日默认评价次数',
`appraise_last_7d_good_count` BIGINT COMMENT '最近7日好评次数',
`appraise_last_7d_mid_count` BIGINT COMMENT '最近7日中评次数',
`appraise_last_7d_bad_count` BIGINT COMMENT '最近7日差评次数',
`appraise_last_7d_default_count` BIGINT COMMENT '最近7日默认评价次数',
`appraise_last_30d_good_count` BIGINT COMMENT '最近30日好评次数',
`appraise_last_30d_mid_count` BIGINT COMMENT '最近30日中评次数',
`以上是关于数仓搭建DWT层的主要内容,如果未能解决你的问题,请参考以下文章