用sql對含有時間段字段(起始時間、結束時間)的記錄做並集處理


來自於一個基友的問題:

他的博客同問題鏈接    sql時間段取並集、合並 https://blog.csdn.net/Seandba/article/details/105152412 

計算通道的總開放時長,只要有任意一個終端開放通道就算開放,難點在於各種終端開放時間重疊包含

aa128bd9b772d921814e068c67d5e7f4

問題測試數據:

--問題一、測試數據--計算總開放時長(小時)
TRUNCATE TABLE xcp;
insert into xcp values('1','A1',to_date('20200317 01:00:00','yyyymmdd hh24:mi:ss'),to_date('20200317 06:00:00','yyyymmdd hh24:mi:ss'));
insert into xcp values('2','A1',to_date('20200317 01:00:00','yyyymmdd hh24:mi:ss'),to_date('20200317 06:00:00','yyyymmdd hh24:mi:ss'));
insert into xcp values('2','A1',to_date('20200317 01:00:00','yyyymmdd hh24:mi:ss'),to_date('20200317 08:00:00','yyyymmdd hh24:mi:ss'));
insert into xcp values('2','A1',to_date('20200317 02:00:00','yyyymmdd hh24:mi:ss'),to_date('20200317 07:00:00','yyyymmdd hh24:mi:ss'));
insert into xcp values('2','A1',to_date('20200317 03:00:00','yyyymmdd hh24:mi:ss'),to_date('20200317 07:00:00','yyyymmdd hh24:mi:ss'));

insert into xcp values('2','A1',to_date('20200317 05:00:00','yyyymmdd hh24:mi:ss'),to_date('20200317 09:00:00','yyyymmdd hh24:mi:ss '));
insert into xcp values('3','A1',to_date('20200317 09:00:00','yyyymmdd hh24:mi:ss'),to_date('20200317 11:00:00','yyyymmdd hh24:mi:ss'));
insert into xcp values('3','A1',to_date('20200317 12:00:00','yyyymmdd hh24:mi:ss'),to_date('20200317 13:00:00','yyyymmdd hh24:mi:ss'));

insert into xcp values('2','A1',to_date('20200317 14:00:00','yyyymmdd hh24:mi:ss'),to_date('20200317 19:00:00','yyyymmdd hh24:mi:ss '));
insert into xcp values('3','A1',to_date('20200317 16:00:00','yyyymmdd hh24:mi:ss'),to_date('20200317 19:00:00','yyyymmdd hh24:mi:ss'));
insert into xcp values('3','A1',to_date('20200317 18:00:00','yyyymmdd hh24:mi:ss'),to_date('20200317 19:00:00','yyyymmdd hh24:mi:ss'));
insert into xcp values('3','A1',to_date('20200317 18:00:00','yyyymmdd hh24:mi:ss'),to_date('20200317 21:00:00','yyyymmdd hh24:mi:ss'));
commit;

SELECT * FROM xcp;

image2問題核心是求多條記錄之間的並集操作 ,我寫的sql如下,

--問題1
WITH tmp1 AS (  --取所有時間節點
SELECT channel,BEGIN_TIME TIME FROM xcp
UNION SELECT channel,end_time FROM xcp
UNION SELECT channel,MIN(begin_time) FROM xcp GROUP BY channel
UNION SELECT channel,MAX(end_time) FROM xcp GROUP BY channel),

tmp2 AS(--每個時間節點連接到下個節點  形成時間段
SELECT a.channel,a.time,LEAD(a.time,1) OVER(PARTITION BY a.channel ORDER BY a.time) nexttime
FROM tmp1 a),

tmp3 AS(--每個時間段取中值
SELECT b.channel,b.TIME,b.nexttime,(b.nexttime-b.time)/2+b.time midtime
FROM tmp2 b
WHERE b.nexttime IS NOT NULL),

tmp4 AS(--若中值處於原始記錄中  則該段時間為通道開通時間 否則通道不開通
SELECT c.*,
CASE WHEN EXISTS (SELECT 1 FROM xcp o WHERE c.midtime BETWEEN o.begin_time AND o.end_time) THEN 1 ELSE 0 END *
(c.nexttime-c.time)*24 duration
FROM tmp3 c)

SELECT nvl(d.channel,'合計時長') 通道,d.TIME 開始時間,d.nexttime 結束時間,
SUM(duration) "通道開通時間(小時)" FROM tmp4 d
GROUP BY rollup((d.channel,d.TIME,d.nexttime))
ORDER BY 2;

09e02bd2c2d7e12249c24dc5380b754看着就很垃圾的sql,執行計划一定垃圾,記錄以備后查詢吧

原理是吧時間節點拿出來,對沒兩個時間節點之間的時間段,取中間值到原始記錄表查詢,如果是,這段時間就是屬於並集后的,然后對並集后的記錄求和


問題2:求17日的的通道開放時長

--問題2、測試數據--計算27號開放時長(小時)
TRUNCATE TABLE xcp;
insert into xcp values('13','A1',to_date('20200314 08:00:00','yyyymmdd hh24:mi:ss'),to_date('20200315 09:00:00','yyyymmdd hh24:mi:ss'));
insert into xcp values('14','A1',to_date('20200317 08:00:00','yyyymmdd hh24:mi:ss'),to_date('20200317 09:00:00','yyyymmdd hh24:mi:ss'));
insert into xcp values('15','A1',to_date('20200316 03:00:00','yyyymmdd hh24:mi:ss'),to_date('20200317 05:00:00','yyyymmdd hh24:mi:ss'));
insert into xcp values('16','A1',to_date('20200317 08:00:00','yyyymmdd hh24:mi:ss'),to_date('20200318 10:00:00','yyyymmdd hh24:mi:ss'));
insert into xcp values('17','A1',to_date('20200316 08:00:00','yyyymmdd hh24:mi:ss'),to_date('20200318 10:00:00','yyyymmdd hh24:mi:ss'));
insert into xcp values('18','A1',to_date('20200320 08:00:00','yyyymmdd hh24:mi:ss'),to_date('20200321 10:00:00','yyyymmdd hh24:mi:ss'));
commit;

SELECT * FROM xcp ORDER BY begin_time

image5sql如下:

----問題2
WITH tmp1 AS (  --取所有時間節點    取17號就加入17號0點和24點兩個時間
SELECT channel,BEGIN_TIME TIME FROM xcp
UNION SELECT channel,end_time FROM xcp
UNION SELECT channel,MIN(begin_time) FROM xcp GROUP BY channel
UNION SELECT channel,MAX(end_time) FROM xcp GROUP BY channel
UNION SELECT DISTINCT channel,to_date('20200317','yyyymmdd') FROM xcp
UNION SELECT DISTINCT channel,to_date('20200318','yyyymmdd') FROM xcp),

tmp2 AS(--每個時間節點連接到下個節點  形成時間段
SELECT a.channel,a.time,LEAD(a.time,1) OVER(PARTITION BY a.channel ORDER BY a.time) nexttime
FROM tmp1 a),

tmp3 AS(--每個時間段取中值
SELECT b.channel,b.TIME,b.nexttime,(b.nexttime-b.time)/2+b.time midtime
FROM tmp2 b
WHERE b.nexttime IS NOT NULL
AND to_char(b.TIME,'yyyymmdd')=20200317),

tmp4 AS(--若中值處於原始記錄中  則該段時間為通道開通時間 否則通道不開通
SELECT c.*,
CASE WHEN EXISTS (SELECT 1 FROM xcp o WHERE c.midtime BETWEEN o.begin_time AND o.end_time) THEN 1 ELSE 0 END *
(c.nexttime-c.time)*24 duration
FROM tmp3 c)

SELECT nvl(d.channel,'合計時長') 通道,d.TIME 開始時間,d.nexttime 結束時間,
SUM(duration) "通道開通時間(小時)" FROM tmp4 d
GROUP BY rollup((d.channel,d.TIME,d.nexttime))
ORDER BY 2;

image思路是在第一步取時間節點的時候單獨加入17日0點24點的時間點即可

優化:

上述代碼全表掃描5次,效率垃圾,小強優化到一次掃描搞定了,代碼

--第8的特征:下一條記錄開始時間  大於  截止當前行的最大結束時間;那么就把這部分時間記下來,最后減掉即可
select (max(end_time) - min(begin_time)) * 24 -
       sum(decode(sign(next_begin_time - max_end_time),
                  1,
                  (next_begin_time - max_end_time) * 24,
                  0)) 通道開通時間
  from (select a.channel,
               a.begin_time,
               a.end_time,
               max(a.end_time) over(partition by a.channel order by a.begin_time rows between unbounded preceding and current row) max_end_time, --截止當前行的最大結束時間
               lead(a.begin_time, 1) over(partition by a.channel order by a.begin_time) next_begin_time --下一條記錄的開始時間
          from xcp a) tmp;

我用plsql也優化了一個出來

/*思路:    
第一步:兩兩合並,兩條記錄之間的關系只有兩種:有交集 和 無交集
        1)對於有交集的:兩兩合並,取MIN(begin_time),MAX(end_time)作為新記錄,
        2)對於無交集的:同樣取MIN(begin_time),MAX(end_time)作為新記錄,不過把中間空白部分計入duration_del
第二步:然后將第一步合並的新紀錄和下一條記錄再兩兩合並,以此類推,直至合並完所有記錄
第三步:結果就是 最終合並記錄的  end_time-begin_time-duration_del*/
DECLARE
    duration_del NUMBER:=0;--存儲無交集的兩兩記錄之間的空白時間
    --用於存儲合並后的時間
    begin_time_merge DATE; end_time_merge DATE;
    --用於輸入要查詢的時間段
    day1 DATE:=to_date(20200314,'yyyymmdd');
    day2 DATE:=to_date(20200330,'yyyymmdd');
BEGIN
    FOR i IN (SELECT ROWNUM rnow,aa.* FROM 
                          (SELECT a.channel,GREATEST(a.begin_time, day1) begin_time,LEAST(a.end_time,day2) end_time
                            FROM xcp a WHERE NOT (end_time < day1 OR  begin_time> day2) ORDER BY 2)aa
                    )LOOP  --掃描一次全表
        IF i.rnow=1 THEN   --第一條記錄用於初始化begin_time_merge  end_time_merge
            begin_time_merge :=i.begin_time; end_time_merge:=i.end_time;
        ELSE 
             IF i.begin_time>end_time_merge THEN
                duration_del:= duration_del+ (i.begin_time-end_time_merge)*24;--空白部分計入duration_del
             END IF;
             end_time_merge := GREATEST(end_time_merge,i.end_time); 
        END IF;        
    END LOOP;  
     DBMS_OUTPUT.PUT_LINE((end_time_merge-begin_time_merge)*24-duration_del||'個小時通道開放');
END;
/

就這樣 以備后查


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM