拉链表
Posted yin-fei
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了拉链表相关的知识,希望对你有一定的参考价值。
1.数据准备
create table sospdm.tmp_ods_user ( cust_num string comment ‘会员编码‘ ,mbl_phone string comment ‘会员手机号‘ )partitioned by (statis_date string comment ‘统计时间‘) stored as rcfile ; set hive.exec.dynamic.partition=true; set hive.exec.dynamic.partition.mode=nonstrict; insert overwrite table sospdm.tmp_ods_user partition (statis_date) select ‘001‘ as cust_num,‘111111‘ as mbl_phone,‘20170101‘ as statis_date from sospdm.dual union all select ‘002‘ as cust_num,‘222222‘ as mbl_phone,‘20170101‘ as statis_date from sospdm.dual union all select ‘003‘ as cust_num,‘333333‘ as mbl_phone,‘20170101‘ as statis_date from sospdm.dual union all select ‘004‘ as cust_num,‘444444‘ as mbl_phone,‘20170101‘ as statis_date from sospdm.dual union all select ‘001‘ as cust_num,‘111111‘ as mbl_phone,‘20170102‘ as statis_date from sospdm.dual union all select ‘002‘ as cust_num,‘233333‘ as mbl_phone,‘20170102‘ as statis_date from sospdm.dual union all select ‘003‘ as cust_num,‘333333‘ as mbl_phone,‘20170102‘ as statis_date from sospdm.dual union all select ‘004‘ as cust_num,‘432432‘ as mbl_phone,‘20170102‘ as statis_date from sospdm.dual union all select ‘005‘ as cust_num,‘555555‘ as mbl_phone,‘20170102‘ as statis_date from sospdm.dual union all select ‘001‘ as cust_num,‘111111‘ as mbl_phone,‘20170103‘ as statis_date from sospdm.dual union all select ‘002‘ as cust_num,‘233333‘ as mbl_phone,‘20170103‘ as statis_date from sospdm.dual union all select ‘003‘ as cust_num,‘333333‘ as mbl_phone,‘20170103‘ as statis_date from sospdm.dual union all select ‘004‘ as cust_num,‘654321‘ as mbl_phone,‘20170103‘ as statis_date from sospdm.dual union all select ‘005‘ as cust_num,‘115115‘ as mbl_phone,‘20170103‘ as statis_date from sospdm.dual union all select ‘006‘ as cust_num,‘666666‘ as mbl_phone,‘20170103‘ as statis_date from sospdm.dual union all select ‘001‘ as cust_num,‘111111‘ as mbl_phone,‘20170104‘ as statis_date from sospdm.dual union all select ‘002‘ as cust_num,‘233333‘ as mbl_phone,‘20170104‘ as statis_date from sospdm.dual union all select ‘003‘ as cust_num,‘333333‘ as mbl_phone,‘20170104‘ as statis_date from sospdm.dual union all select ‘004‘ as cust_num,‘654321‘ as mbl_phone,‘20170104‘ as statis_date from sospdm.dual union all select ‘005‘ as cust_num,‘115115‘ as mbl_phone,‘20170104‘ as statis_date from sospdm.dual union all select ‘006‘ as cust_num,‘666767‘ as mbl_phone,‘20170104‘ as statis_date from sospdm.dual;
2.拉链逻辑
-- 全量拉链 -- 初始化拉链表 create table sospdm.tmp_dwd_his ( cust_num string comment ‘会员编码‘ ,mbl_phone string comment ‘手机号‘ ,start_date string comment ‘开始时间‘ )partitioned by (end_date string comment ‘结束时间‘) ; insert overwrite table sospdm.tmp_dwd_his partition(end_date) select cust_num ,mbl_phone ,‘20170101‘ as statis_date ,‘99991231‘ as end_date from tmp_ods_user where statis_date=‘20170101‘ ; -- 取开链数据与今日全量数据对比 找出变化的 -- 字段改变或者不变的 insert overwrite table sospdm.tmp_dwd_his partition(end_date) select t1.cust_num ,t1.mbl_phone ,‘${statis_date}‘ as start_date ,‘99991231‘ as end_date from ( select cust_num ,mbl_phone from tmp_ods_user where statis_date=‘${statis_date}‘ ) t1 left join ( select cust_num ,mbl_phone from tmp_dwd_his where start_date<=‘${statis_date}‘ and end_date>=‘${statis_date}‘ ) t2 on t1.cust_num=t2.cust_num where t2.cust_num is null -- 新增 or t1.mbl_phone <> t2.mbl_phone -- 改变 union all select t1.cust_num ,t1.mbl_phone ,start_date ,‘${statis_date}‘ as end_date -- 改变的进行闭链 from ( select cust_num ,mbl_phone from tmp_ods_user where statis_date=‘${statis_date}‘ ) t1 inner join ( select cust_num ,mbl_phone ,start_date ,end_date from tmp_dwd_his where start_date<=‘${statis_date}‘ and end_date>=‘${statis_date}‘ ) t2 on t1.cust_num=t2.cust_num where t1.mbl_phone <> t2.mbl_phone-- 改变 union all select t1.cust_num ,t1.mbl_phone ,start_date ,end_date from ( select cust_num ,mbl_phone from tmp_ods_user where statis_date=‘${statis_date}‘ ) t1 inner join ( select cust_num ,mbl_phone ,start_date ,end_date from tmp_dwd_his where start_date<=‘${statis_date}‘ and end_date>=‘${statis_date}‘ ) t2 on t1.cust_num=t2.cust_num and t1.mbl_phone = t2.mbl_phone -- 不变 ;
3.拉链回滚
-- 回滚 -- 回滚日期之前的闭链 还是闭链 select cust_num ,mbl_phone ,start_date ,end_date from sospdm.tmp_dwd_his where end_date < ‘20170103‘ --回滚日期之前的开链 union all select cust_num ,mbl_phone ,start_date ,‘99991231‘ as end_date from sospdm.tmp_dwd_his where end_date >= ‘20170103‘ ;
以上是关于拉链表的主要内容,如果未能解决你的问题,请参考以下文章