数据分析SQL日期维度表生成(含节假日)
Posted 小基基o_O
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了数据分析SQL日期维度表生成(含节假日)相关的知识,希望对你有一定的参考价值。
文章目录
HIVE日期维度表
datediff
SELECT datediff('2022-01-04','2022-01-01');
输出:3
repeat
SELECT repeat('a',3);
输出:aaa
split
SELECT split('aaa','a');
输出:
["","","",""]
posexplode
SELECT posexplode(split('aaa','a'));
输出:
date_add
SELECT date_add('2022-01-01',1)
输出:
2022-01-02
生成日期序列
WITH t AS (SELECT posexplode(split(repeat('a',3),'a')))
SELECT date_add('2022-01-01',pos) AS ymd FROM t;
输出:
2022-01-01
2022-01-02
2022-01-03
2022-01-04
创建节日临时表(以2022为例,缺双十一、七夕……)
CREATE TABLE temp_holiday2022 AS
SELECT '2022-01-01','元旦',false UNION ALL
SELECT '2022-01-02','元旦',false UNION ALL
SELECT '2022-01-03','元旦',false UNION ALL
SELECT '2022-01-29','补班',true UNION ALL
SELECT '2022-01-30','补班',true UNION ALL
SELECT '2022-01-31','春节',false UNION ALL
SELECT '2022-02-01','春节',false UNION ALL
SELECT '2022-02-02','春节',false UNION ALL
SELECT '2022-02-03','春节',false UNION ALL
SELECT '2022-02-04','春节',false UNION ALL
SELECT '2022-02-05','春节',false UNION ALL
SELECT '2022-02-06','春节',false UNION ALL
SELECT '2022-04-02','补班',true UNION ALL
SELECT '2022-04-03','清明节',false UNION ALL
SELECT '2022-04-04','清明节',false UNION ALL
SELECT '2022-04-05','清明节',false UNION ALL
SELECT '2022-04-24','补班',true UNION ALL
SELECT '2022-04-30','劳动节',false UNION ALL
SELECT '2022-05-01','劳动节',false UNION ALL
SELECT '2022-05-02','劳动节',false UNION ALL
SELECT '2022-05-03','劳动节',false UNION ALL
SELECT '2022-05-04','劳动节',false UNION ALL
SELECT '2022-05-07','补班',true UNION ALL
SELECT '2022-06-03','端午节',false UNION ALL
SELECT '2022-06-04','端午节',false UNION ALL
SELECT '2022-06-05','端午节',false UNION ALL
SELECT '2022-09-11','中秋节',false UNION ALL
SELECT '2022-09-12','中秋节',false UNION ALL
SELECT '2022-09-10','中秋节',false UNION ALL
SELECT '2022-10-01','国庆节',false UNION ALL
SELECT '2022-10-02','国庆节',false UNION ALL
SELECT '2022-10-03','国庆节',false UNION ALL
SELECT '2022-10-04','国庆节',false UNION ALL
SELECT '2022-10-05','国庆节',false UNION ALL
SELECT '2022-10-06','国庆节',false UNION ALL
SELECT '2022-10-07','国庆节',false UNION ALL
SELECT '2022-10-08','补班',true UNION ALL
SELECT '2022-10-09','补班',true;
SELECT `_c0`,`_c1`,`_c2` FROM temp_holiday2022 ORDER BY `_c0`;
生成日期维度
WITH
t0 AS (SELECT posexplode(split(repeat('a',datediff('2022-12-31','2022-01-01')),'a'))),
t1 AS (SELECT date_add('2022-01-01',pos) AS d FROM t0),
t2 AS (
SELECT date_format(d,'yyyy-MM-dd') AS ymd
,year(d) AS y
,date_format(d,'yyyy-MM') AS ym
,concat(year(d),'q',quarter(d)) AS yq
,CAST(date_format(d,'D') AS SMALLINT) AS day_of_year
,CAST(date_format(d,'w') AS SMALLINT) AS week_of_year
,CAST(date_format(d,'u') AS TINYINT) AS day_of_week
,date_format(d,'EEEE') AS day_of_week_en
FROM t1
)
SELECT t2.*
,NVL(`_c1`,IF(day_of_week>5,'休息日','工作日')) AS holiday
,NVL(`_c2`,day_of_week<6) AS is_workday
FROM t2
LEFT JOIN temp_holiday2022 ON ymd=`_c0`;
日期维度表
CREATE TABLE dim_date (
ymd STRING COMMENT '日期',
y INT COMMENT '年',
ym STRING COMMENT '年月',
yq STRING COMMENT '季度',
day_of_year SMALLINT COMMENT '一年中的第几天',
week_of_year TINYINT COMMENT '一年中的第几周,以周日为起点',
day_of_week TINYINT COMMENT '星期',
day_of_week_en STRING COMMENT '星期',
holiday STRING COMMENT '节假日',
is_workday BOOLEAN COMMENT 'true=工作日、false=休息日'
) COMMENT '日期维度表';
INSERT INTO TABLE dim_date
SELECT t2.*
,NVL(`_c1`,IF(day_of_week>5,'休息日','工作日')) AS holiday
,NVL(`_c2`,day_of_week<6) AS is_workday
FROM (
SELECT date_format(d,'yyyy-MM-dd') AS ymd
,year(d) AS y
,date_format(d,'yyyy-MM') AS ym
,concat(year(d),'q',quarter(d)) AS yq
,CAST(date_format(d,'D') AS SMALLINT) AS day_of_year
,CAST(date_format(d,'w') AS SMALLINT) AS week_of_year
,CAST(date_format(d,'u') AS TINYINT) AS day_of_week
,date_format(d,'EEEE') AS day_of_week_en
FROM (
SELECT date_add('2022-01-01',pos) AS d FROM (
SELECT posexplode(split(repeat('a',datediff('2022-12-31','2022-01-01')),'a')))t0)t1
)t2
LEFT JOIN temp_holiday2022 ON ymd=`_c0`;
HIVE近30天日期序列
-- 近30天
WITH t AS (SELECT posexplode(split(repeat('a',29),'a')))
SELECT date_sub(current_date(),pos) AS ymd FROM t ORDER BY ymd;
-- 近7天
WITH t AS (SELECT posexplode(split(repeat('a',6),'a')))
SELECT date_sub(current_date(),pos) AS ymd FROM t ORDER BY ymd;
HIVE近48小时序列
WITH t AS (SELECT posexplode(split(repeat('a',47),'a')))
SELECT date_format(from_unixtime(unix_timestamp()-(pos*3600)),'yyyy-MM-dd HH') AS h
FROM t ORDER BY h;
mysql日期维度表
创建MySQL日期维度表,借助Python写入数据
MySQL建表
CREATE TABLE dim_date (
ymd DATE COMMENT '日期',
ym CHAR(7) COMMENT '年月',
yq CHAR(6) COMMENT '季度',
holiday VARCHAR(9) COMMENT '节假日',
is_workday BOOLEAN COMMENT 'true=工作日、false=休息日',
PRIMARY KEY (ymd)
) COMMENT '日期维度表';
Python3写入
import datetime, math, pymysql
START_DATE = '2022-01-01'
END_DATE = '2022-12-31'
HOLIDAY =
'2022-01-01': ('元旦', False), '2022-01-02': ('元旦', False),
'2022-01-03': ('元旦', False), '2022-01-29': ('补班', True),
'2022-01-30': ('补班', True), '2022-01-31': ('春节', False), '2022-02-01': ('春节', False),
'2022-02-02': ('春节', False), '2022-02-03': ('春节', False), '2022-02-04': ('春节', False),
'2022-02-05': ('春节', False), '2022-02-06': ('春节', False),
'2022-04-02': ('补班', True), '2022-04-03': ('清明节', False),
'2022-04-04': ('清明节', False), '2022-04-05': ('清明节', False),
'2022-04-24': ('补班', True), '2022-04-30': ('劳动节', False),
'2022-05-01': ('劳动节', False), '2022-05-02': ('劳动节', False),
'2022-05-03': ('劳动节', False), '2022-05-04': ('劳动节', False), '2022-05-07': ('补班', True),
'2022-06-03': ('端午节', False), '2022-06-04': ('端午节', False), '2022-06-05': ('端午节', False),
'2022-09-11': ('中秋节', False), '2022-09-12': ('中秋节', False), '2022-09-10': ('中秋节', False),
'2022-10-01': ('国庆节', False), '2022-10-02': ('国庆节', False),
'2022-10-03': ('国庆节', False), '2022-10-04': ('国庆节', False),
'2022-10-05': ('国庆节', False), '2022-10-06': ('国庆节', False),
'2022-10-07': ('国庆节', False), '2022-10-08': ('补班', True), '2022-10-09': ('补班', True),
TABLE = 'dim_date'
def insert(dt):
db = pymysql.connect(host='localhost', user='root', password='123456', charset='utf8', db='b0')
cursor = db.cursor()
ls = [(k, dt[k]) for k in dt if dt[k] is not None]
sql = 'insert %s (' % TABLE + ','.join(i[0] for i in ls) + \\
') values (' + ','.join('%r' % i[1] for i in ls) + ')'
cursor.execute(sql)
db.commit()
cursor.close()
db.close()
start = datetime.datetime.strptime(START_DATE, '%Y-%m-%d')
end = datetime.datetime.strptime(END_DATE, '%Y-%m-%d')
while end >= start:
ymd = start.strftime('%Y-%m-%d')
ym = start.strftime('%Y-%m')
yq = '%d%s%d' % (start.year, 'q', math.ceil(start.month / 3))
holiday, is_workday = HOLIDAY.get(ymd, (
'休息日' if start.weekday() > 4 else '工作日',
start.weekday() < 5))
print(ymd, ym, yq, holiday, is_workday)
insert('ymd': ymd, 'ym': ym, 'yq': yq, 'holiday': holiday, 'is_workday': is_workday)
start += datetime.timedelta(days=1)
以上是关于数据分析SQL日期维度表生成(含节假日)的主要内容,如果未能解决你的问题,请参考以下文章