用sql对含有时间段字段(起始时间结束时间)的记录做并集处理
Posted 九命猫幺的笔记
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了用sql对含有时间段字段(起始时间结束时间)的记录做并集处理相关的知识,希望对你有一定的参考价值。
来自于一个基友的问题:
他的博客同问题链接 sql时间段取并集、合并 https://blog.csdn.net/Seandba/article/details/105152412
计算通道的总开放时长,只要有任意一个终端开放通道就算开放,难点在于各种终端开放时间重叠包含
问题测试数据:
--问题一、测试数据--计算总开放时长(小时) TRUNCATE TABLE xcp; insert into xcp values(\'1\',\'A1\',to_date(\'20200317 01:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200317 06:00:00\',\'yyyymmdd hh24:mi:ss\')); insert into xcp values(\'2\',\'A1\',to_date(\'20200317 01:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200317 06:00:00\',\'yyyymmdd hh24:mi:ss\')); insert into xcp values(\'2\',\'A1\',to_date(\'20200317 01:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200317 08:00:00\',\'yyyymmdd hh24:mi:ss\')); insert into xcp values(\'2\',\'A1\',to_date(\'20200317 02:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200317 07:00:00\',\'yyyymmdd hh24:mi:ss\')); insert into xcp values(\'2\',\'A1\',to_date(\'20200317 03:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200317 07:00:00\',\'yyyymmdd hh24:mi:ss\')); insert into xcp values(\'2\',\'A1\',to_date(\'20200317 05:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200317 09:00:00\',\'yyyymmdd hh24:mi:ss \')); insert into xcp values(\'3\',\'A1\',to_date(\'20200317 09:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200317 11:00:00\',\'yyyymmdd hh24:mi:ss\')); insert into xcp values(\'3\',\'A1\',to_date(\'20200317 12:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200317 13:00:00\',\'yyyymmdd hh24:mi:ss\')); insert into xcp values(\'2\',\'A1\',to_date(\'20200317 14:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200317 19:00:00\',\'yyyymmdd hh24:mi:ss \')); insert into xcp values(\'3\',\'A1\',to_date(\'20200317 16:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200317 19:00:00\',\'yyyymmdd hh24:mi:ss\')); insert into xcp values(\'3\',\'A1\',to_date(\'20200317 18:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200317 19:00:00\',\'yyyymmdd hh24:mi:ss\')); insert into xcp values(\'3\',\'A1\',to_date(\'20200317 18:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200317 21:00:00\',\'yyyymmdd hh24:mi:ss\')); commit; SELECT * FROM xcp;
--问题1 WITH tmp1 AS ( --取所有时间节点 SELECT channel,BEGIN_TIME TIME FROM xcp UNION SELECT channel,end_time FROM xcp UNION SELECT channel,MIN(begin_time) FROM xcp GROUP BY channel UNION SELECT channel,MAX(end_time) FROM xcp GROUP BY channel), tmp2 AS(--每个时间节点连接到下个节点 形成时间段 SELECT a.channel,a.time,LEAD(a.time,1) OVER(PARTITION BY a.channel ORDER BY a.time) nexttime FROM tmp1 a), tmp3 AS(--每个时间段取中值 SELECT b.channel,b.TIME,b.nexttime,(b.nexttime-b.time)/2+b.time midtime FROM tmp2 b WHERE b.nexttime IS NOT NULL), tmp4 AS(--若中值处于原始记录中 则该段时间为通道开通时间 否则通道不开通 SELECT c.*, CASE WHEN EXISTS (SELECT 1 FROM xcp o WHERE c.midtime BETWEEN o.begin_time AND o.end_time) THEN 1 ELSE 0 END * (c.nexttime-c.time)*24 duration FROM tmp3 c) SELECT nvl(d.channel,\'合计时长\') 通道,d.TIME 开始时间,d.nexttime 结束时间, SUM(duration) "通道开通时间(小时)" FROM tmp4 d GROUP BY rollup((d.channel,d.TIME,d.nexttime)) ORDER BY 2;
看着就很垃圾的sql,执行计划一定垃圾,记录以备后查询吧
原理是吧时间节点拿出来,对没两个时间节点之间的时间段,取中间值到原始记录表查询,如果是,这段时间就是属于并集后的,然后对并集后的记录求和
问题2:求17日的的通道开放时长
--问题2、测试数据--计算27号开放时长(小时) TRUNCATE TABLE xcp; insert into xcp values(\'13\',\'A1\',to_date(\'20200314 08:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200315 09:00:00\',\'yyyymmdd hh24:mi:ss\')); insert into xcp values(\'14\',\'A1\',to_date(\'20200317 08:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200317 09:00:00\',\'yyyymmdd hh24:mi:ss\')); insert into xcp values(\'15\',\'A1\',to_date(\'20200316 03:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200317 05:00:00\',\'yyyymmdd hh24:mi:ss\')); insert into xcp values(\'16\',\'A1\',to_date(\'20200317 08:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200318 10:00:00\',\'yyyymmdd hh24:mi:ss\')); insert into xcp values(\'17\',\'A1\',to_date(\'20200316 08:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200318 10:00:00\',\'yyyymmdd hh24:mi:ss\')); insert into xcp values(\'18\',\'A1\',to_date(\'20200320 08:00:00\',\'yyyymmdd hh24:mi:ss\'),to_date(\'20200321 10:00:00\',\'yyyymmdd hh24:mi:ss\')); commit; SELECT * FROM xcp ORDER BY begin_time
----问题2 WITH tmp1 AS ( --取所有时间节点 取17号就加入17号0点和24点两个时间 SELECT channel,BEGIN_TIME TIME FROM xcp UNION SELECT channel,end_time FROM xcp UNION SELECT channel,MIN(begin_time) FROM xcp GROUP BY channel UNION SELECT channel,MAX(end_time) FROM xcp GROUP BY channel UNION SELECT DISTINCT channel,to_date(\'20200317\',\'yyyymmdd\') FROM xcp UNION SELECT DISTINCT channel,to_date(\'20200318\',\'yyyymmdd\') FROM xcp), tmp2 AS(--每个时间节点连接到下个节点 形成时间段 SELECT a.channel,a.time,LEAD(a.time,1) OVER(PARTITION BY a.channel ORDER BY a.time) nexttime FROM tmp1 a), tmp3 AS(--每个时间段取中值 SELECT b.channel,b.TIME,b.nexttime,(b.nexttime-b.time)/2+b.time midtime FROM tmp2 b WHERE b.nexttime IS NOT NULL AND to_char(b.TIME,\'yyyymmdd\')=20200317), tmp4 AS(--若中值处于原始记录中 则该段时间为通道开通时间 否则通道不开通 SELECT c.*, CASE WHEN EXISTS (SELECT 1 FROM xcp o WHERE c.midtime BETWEEN o.begin_time AND o.end_time) THEN 1 ELSE 0 END * (c.nexttime-c.time)*24 duration FROM tmp3 c) SELECT nvl(d.channel,\'合计时长\') 通道,d.TIME 开始时间,d.nexttime 结束时间, SUM(duration) "通道开通时间(小时)" FROM tmp4 d GROUP BY rollup((d.channel,d.TIME,d.nexttime)) ORDER BY 2;
思路是在第一步取时间节点的时候单独加入17日0点24点的时间点即可
优化:
上述代码全表扫描5次,效率垃圾,小强优化到一次扫描搞定了,代码
--第8的特征:下一条记录开始时间 大于 截止当前行的最大结束时间;那么就把这部分时间记下来,最后减掉即可 select (max(end_time) - min(begin_time)) * 24 - sum(decode(sign(next_begin_time - max_end_time), 1, (next_begin_time - max_end_time) * 24, 0)) 通道开通时间 from (select a.channel, a.begin_time, a.end_time, max(a.end_time) over(partition by a.channel order by a.begin_time rows between unbounded preceding and current row) max_end_time, --截止当前行的最大结束时间 lead(a.begin_time, 1) over(partition by a.channel order by a.begin_time) next_begin_time --下一条记录的开始时间 from xcp a) tmp;
我用plsql也优化了一个出来
/*思路: 第一步:两两合并,两条记录之间的关系只有两种:有交集 和 无交集 1)对于有交集的:两两合并,取MIN(begin_time),MAX(end_time)作为新记录, 2)对于无交集的:同样取MIN(begin_time),MAX(end_time)作为新记录,不过把中间空白部分计入duration_del 第二步:然后将第一步合并的新纪录和下一条记录再两两合并,以此类推,直至合并完所有记录 第三步:结果就是 最终合并记录的 end_time-begin_time-duration_del*/ DECLARE duration_del NUMBER:=0;--存储无交集的两两记录之间的空白时间 --用于存储合并后的时间 begin_time_merge DATE; end_time_merge DATE; --用于输入要查询的时间段 day1 DATE:=to_date(20200314,\'yyyymmdd\'); day2 DATE:=to_date(20200330,\'yyyymmdd\'); BEGIN FOR i IN (SELECT ROWNUM rnow,aa.* FROM (SELECT a.channel,GREATEST(a.begin_time, day1) begin_time,LEAST(a.end_time,day2) end_time FROM xcp a WHERE NOT (end_time < day1 OR begin_time> day2) ORDER BY 2)aa )LOOP --扫描一次全表 IF i.rnow=1 THEN --第一条记录用于初始化begin_time_merge end_time_merge begin_time_merge :=i.begin_time; end_time_merge:=i.end_time; ELSE IF i.begin_time>end_time_merge THEN duration_del:= duration_del+ (i.begin_time-end_time_merge)*24;--空白部分计入duration_del END IF; end_time_merge := GREATEST(end_time_merge,i.end_time); END IF; END LOOP; DBMS_OUTPUT.PUT_LINE((end_time_merge-begin_time_merge)*24-duration_del||\'个小时通道开放\'); END; /
就这样 以备后查
以上是关于用sql对含有时间段字段(起始时间结束时间)的记录做并集处理的主要内容,如果未能解决你的问题,请参考以下文章