数据分析SQL日期维度表生成(含节假日)

Posted 小基基o_O

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了数据分析SQL日期维度表生成(含节假日)相关的知识,希望对你有一定的参考价值。

文章目录

HIVE日期维度表

datediff

SELECT datediff('2022-01-04','2022-01-01');

输出:3

repeat

SELECT repeat('a',3);

输出:aaa

split

SELECT split('aaa','a');

输出:["","","",""]

posexplode

SELECT posexplode(split('aaa','a'));

输出:

date_add

SELECT date_add('2022-01-01',1)

输出:
2022-01-02

生成日期序列

WITH t AS (SELECT posexplode(split(repeat('a',3),'a')))
SELECT date_add('2022-01-01',pos) AS ymd FROM t;

输出:
2022-01-01
2022-01-02
2022-01-03
2022-01-04

创建节日临时表(以2022为例,缺双十一、七夕……)

CREATE TABLE temp_holiday2022 AS
SELECT '2022-01-01','元旦',false UNION ALL
SELECT '2022-01-02','元旦',false UNION ALL
SELECT '2022-01-03','元旦',false UNION ALL
SELECT '2022-01-29','补班',true UNION ALL
SELECT '2022-01-30','补班',true UNION ALL
SELECT '2022-01-31','春节',false UNION ALL
SELECT '2022-02-01','春节',false UNION ALL
SELECT '2022-02-02','春节',false UNION ALL
SELECT '2022-02-03','春节',false UNION ALL
SELECT '2022-02-04','春节',false UNION ALL
SELECT '2022-02-05','春节',false UNION ALL
SELECT '2022-02-06','春节',false UNION ALL
SELECT '2022-04-02','补班',true UNION ALL
SELECT '2022-04-03','清明节',false UNION ALL
SELECT '2022-04-04','清明节',false UNION ALL
SELECT '2022-04-05','清明节',false UNION ALL
SELECT '2022-04-24','补班',true UNION ALL
SELECT '2022-04-30','劳动节',false UNION ALL
SELECT '2022-05-01','劳动节',false UNION ALL
SELECT '2022-05-02','劳动节',false UNION ALL
SELECT '2022-05-03','劳动节',false UNION ALL
SELECT '2022-05-04','劳动节',false UNION ALL
SELECT '2022-05-07','补班',true UNION ALL
SELECT '2022-06-03','端午节',false UNION ALL
SELECT '2022-06-04','端午节',false UNION ALL
SELECT '2022-06-05','端午节',false UNION ALL
SELECT '2022-09-11','中秋节',false UNION ALL
SELECT '2022-09-12','中秋节',false UNION ALL
SELECT '2022-09-10','中秋节',false UNION ALL
SELECT '2022-10-01','国庆节',false UNION ALL
SELECT '2022-10-02','国庆节',false UNION ALL
SELECT '2022-10-03','国庆节',false UNION ALL
SELECT '2022-10-04','国庆节',false UNION ALL
SELECT '2022-10-05','国庆节',false UNION ALL
SELECT '2022-10-06','国庆节',false UNION ALL
SELECT '2022-10-07','国庆节',false UNION ALL
SELECT '2022-10-08','补班',true UNION ALL
SELECT '2022-10-09','补班',true;

SELECT `_c0`,`_c1`,`_c2` FROM temp_holiday2022 ORDER BY `_c0`;

生成日期维度

WITH
t0 AS (SELECT posexplode(split(repeat('a',datediff('2022-12-31','2022-01-01')),'a'))),
t1 AS (SELECT date_add('2022-01-01',pos) AS d FROM t0),
t2 AS (
  SELECT date_format(d,'yyyy-MM-dd') AS ymd
        ,year(d) AS y
        ,date_format(d,'yyyy-MM') AS ym
        ,concat(year(d),'q',quarter(d)) AS yq
        ,CAST(date_format(d,'D') AS SMALLINT) AS day_of_year
        ,CAST(date_format(d,'w') AS SMALLINT) AS week_of_year
        ,CAST(date_format(d,'u') AS TINYINT) AS day_of_week
        ,date_format(d,'EEEE') AS day_of_week_en
  FROM t1
)
SELECT t2.*
      ,NVL(`_c1`,IF(day_of_week>5,'休息日','工作日')) AS holiday
      ,NVL(`_c2`,day_of_week<6) AS is_workday
FROM t2
LEFT JOIN temp_holiday2022 ON ymd=`_c0`;

日期维度表

CREATE TABLE dim_date (
  ymd            STRING   COMMENT '日期',
  y              INT      COMMENT '年',
  ym             STRING   COMMENT '年月',
  yq             STRING   COMMENT '季度',
  day_of_year    SMALLINT COMMENT '一年中的第几天',
  week_of_year   TINYINT  COMMENT '一年中的第几周,以周日为起点',
  day_of_week    TINYINT  COMMENT '星期',
  day_of_week_en STRING   COMMENT '星期',
  holiday        STRING   COMMENT '节假日',
  is_workday     BOOLEAN  COMMENT 'true=工作日、false=休息日'
) COMMENT '日期维度表';

INSERT INTO TABLE dim_date
SELECT t2.*
      ,NVL(`_c1`,IF(day_of_week>5,'休息日','工作日')) AS holiday
      ,NVL(`_c2`,day_of_week<6) AS is_workday
FROM (
  SELECT date_format(d,'yyyy-MM-dd') AS ymd
        ,year(d) AS y
        ,date_format(d,'yyyy-MM') AS ym
        ,concat(year(d),'q',quarter(d)) AS yq
        ,CAST(date_format(d,'D') AS SMALLINT) AS day_of_year
        ,CAST(date_format(d,'w') AS SMALLINT) AS week_of_year
        ,CAST(date_format(d,'u') AS TINYINT) AS day_of_week
        ,date_format(d,'EEEE') AS day_of_week_en
  FROM (
    SELECT date_add('2022-01-01',pos) AS d FROM (
      SELECT posexplode(split(repeat('a',datediff('2022-12-31','2022-01-01')),'a')))t0)t1
)t2
LEFT JOIN temp_holiday2022 ON ymd=`_c0`;

HIVE近30天日期序列

-- 近30天
WITH t AS (SELECT posexplode(split(repeat('a',29),'a')))
SELECT date_sub(current_date(),pos) AS ymd FROM t ORDER BY ymd;
-- 近7天
WITH t AS (SELECT posexplode(split(repeat('a',6),'a')))
SELECT date_sub(current_date(),pos) AS ymd FROM t ORDER BY ymd;

HIVE近48小时序列

WITH t AS (SELECT posexplode(split(repeat('a',47),'a')))
SELECT date_format(from_unixtime(unix_timestamp()-(pos*3600)),'yyyy-MM-dd HH') AS h
FROM t ORDER BY h;

mysql日期维度表

创建MySQL日期维度表,借助Python写入数据

MySQL建表

CREATE TABLE dim_date (
  ymd         DATE         COMMENT '日期',
  ym          CHAR(7)      COMMENT '年月',
  yq          CHAR(6)      COMMENT '季度',
  holiday     VARCHAR(9)   COMMENT '节假日',
  is_workday  BOOLEAN      COMMENT 'true=工作日、false=休息日',
  PRIMARY KEY (ymd)
) COMMENT '日期维度表';

Python3写入

import datetime, math, pymysql

START_DATE = '2022-01-01'
END_DATE = '2022-12-31'
HOLIDAY = 
    '2022-01-01': ('元旦', False), '2022-01-02': ('元旦', False),
    '2022-01-03': ('元旦', False), '2022-01-29': ('补班', True),
    '2022-01-30': ('补班', True), '2022-01-31': ('春节', False), '2022-02-01': ('春节', False),
    '2022-02-02': ('春节', False), '2022-02-03': ('春节', False), '2022-02-04': ('春节', False),
    '2022-02-05': ('春节', False), '2022-02-06': ('春节', False),
    '2022-04-02': ('补班', True), '2022-04-03': ('清明节', False),
    '2022-04-04': ('清明节', False), '2022-04-05': ('清明节', False),
    '2022-04-24': ('补班', True), '2022-04-30': ('劳动节', False),
    '2022-05-01': ('劳动节', False), '2022-05-02': ('劳动节', False),
    '2022-05-03': ('劳动节', False), '2022-05-04': ('劳动节', False), '2022-05-07': ('补班', True),
    '2022-06-03': ('端午节', False), '2022-06-04': ('端午节', False), '2022-06-05': ('端午节', False),
    '2022-09-11': ('中秋节', False), '2022-09-12': ('中秋节', False), '2022-09-10': ('中秋节', False),
    '2022-10-01': ('国庆节', False), '2022-10-02': ('国庆节', False),
    '2022-10-03': ('国庆节', False), '2022-10-04': ('国庆节', False),
    '2022-10-05': ('国庆节', False), '2022-10-06': ('国庆节', False),
    '2022-10-07': ('国庆节', False), '2022-10-08': ('补班', True), '2022-10-09': ('补班', True),

TABLE = 'dim_date'


def insert(dt):
    db = pymysql.connect(host='localhost', user='root', password='123456', charset='utf8', db='b0')
    cursor = db.cursor()
    ls = [(k, dt[k]) for k in dt if dt[k] is not None]
    sql = 'insert %s (' % TABLE + ','.join(i[0] for i in ls) + \\
          ') values (' + ','.join('%r' % i[1] for i in ls) + ')'
    cursor.execute(sql)
    db.commit()
    cursor.close()
    db.close()


start = datetime.datetime.strptime(START_DATE, '%Y-%m-%d')
end = datetime.datetime.strptime(END_DATE, '%Y-%m-%d')
while end >= start:
    ymd = start.strftime('%Y-%m-%d')
    ym = start.strftime('%Y-%m')
    yq = '%d%s%d' % (start.year, 'q', math.ceil(start.month / 3))
    holiday, is_workday = HOLIDAY.get(ymd, (
        '休息日' if start.weekday() > 4 else '工作日',
        start.weekday() < 5))
    print(ymd, ym, yq, holiday, is_workday)
    insert('ymd': ymd, 'ym': ym, 'yq': yq, 'holiday': holiday, 'is_workday': is_workday)
    start += datetime.timedelta(days=1)

以上是关于数据分析SQL日期维度表生成(含节假日)的主要内容,如果未能解决你的问题,请参考以下文章

JAVA判断当前日期是节假日还是工作日

JAVA判断当前日期是节假日还是工作日

得到指定的偏移日期,遇到节假日就跳过,重新偏移定位日期

得到指定的偏移日期,遇到节假日就跳过,重新偏移定位日期

MySQL排除节假日,计算日期差

选择日期+3天,不包括周末和节假日