16. 窗口函數 (Window Function) 的使用


從SQL Server 2005起,SQL Server開始支持窗口函數 (Window Function),以及到SQL Server 2012,窗口函數功能增強,目前為止支持以下幾種窗口函數:

1. 排序函數 (Ranking Function) ;

2. 聚合函數 (Aggregate Function) ;

3. 分析函數 (Analytic Function) ;

4. NEXT VALUE FOR Function, 這是給sequence專用的一個函數;

 

. 排序函數(Ranking Function)

幫助文檔里的代碼示例很全。

排序函數中,ROW_NUMBER()較為常用,可用於去重、分頁、分組中選擇數據,生成數字輔助表等等;

排序函數在語法上要求OVER子句里必須含ORDER BY,否則語法不通過,對於不想排序的場景可以這樣變通;

drop table if exists test_ranking

create table test_ranking
( 
id int not null,
name varchar(20) not null,
value int not null
) 

insert test_ranking 
select 1,'name1',1 union all 
select 1,'name2',2 union all 
select 2,'name3',2 union all 
select 3,'name4',2

select id , name, ROW_NUMBER() over (PARTITION by id ORDER BY name) as num
from test_ranking

select id , name, ROW_NUMBER() over (PARTITION by id) as num
from test_ranking
/*
Msg 4112, Level 15, State 1, Line 1
The function 'ROW_NUMBER' must have an OVER clause with ORDER BY.
*/

--ORDERY BY后面給一個和原表無關的派生列
select id , name, ROW_NUMBER() over (PARTITION by id ORDER BY GETDATE()) as num
from test_ranking

select id , name, ROW_NUMBER() over (PARTITION by id ORDER BY (select 0)) as num
from test_ranking

 

. 聚合函數 (Aggregate Function)

SQL Server 2005中,窗口聚合函數僅支持PARTITION BY,也就是說僅能對分組的數據整體做聚合運算;

SQL Server 2012開始,窗口聚合函數支持ORDER BY,以及ROWS/RAGNE選項,原本需要子查詢來實現的需求,如: 移動平均 (moving averages), 總計聚合 (cumulative aggregates), 累計求和 (running totals) 等,變得更加方便;

 

代碼示例1:總計/小計/累計求和

drop table if exists test_aggregate;

create table test_aggregate
(
event_id      varchar(100),
rk            int,
price         int
)

insert into test_aggregate
values
('a',1,10),
('a',2,10),
('a',3,50),
('b',1,10),
('b',2,20),
('b',3,30)


--1. 沒有窗口函數時,用子查詢
select a.event_id, 
       a.rk,  --build ranking column if needed
       a.price, 
     (select sum(price) from test_aggregate b where b.event_id = a.event_id and b.rk <= a.rk) as totalprice 
  from test_aggregate a


--2. 從SQL Server 2012起,用窗口函數
--2.1 
--沒有PARTITION BY, 沒有ORDER BY,為全部總計;
--只有PARTITION BY, 沒有ORDER BY,為分組小計;
--只有ORDER BY,沒有PARTITION BY,為全部累計求和(RANGE選項,見2.2)
select *,
     sum(price) over() as TotalPrice,
     sum(price) over(partition by event_id) as SubTotalPrice,
       sum(price) over(order by rk) as RunningTotalPrice
  from test_aggregate a

--2.2 注意ORDER BY列的選擇,可能會帶來不同結果
select *,
     sum(price) over(partition by event_id order by rk) as totalprice 
  from test_aggregate a
/*
event_id    rk    price    totalprice
a    1    10    10
a    2    10    20
a    3    50    70
b    1    10    10
b    2    20    30
b    3    30    60
*/

select *,
     sum(price) over(partition by event_id order by price) as totalprice 
  from test_aggregate a
/*
event_id    rk    price    totalprice
a    1    10    20
a    2    10    20
a    3    50    70
b    1    10    10
b    2    20    30
b    3    30    60
*/

--因為ORDER BY還有個子選項ROWS/RANGE,不指定的情況下默認為RANGE UNBOUNDED PRECEDING AND CURRENT ROW 
--RANGE按照ORDER BY中的列值,將相同的值的行均視為當前同一行
select  *,sum(price) over(partition by event_id order by price) as totalprice from test_aggregate a
select  *,sum(price) over(partition by event_id order by price range between unbounded preceding and current row) as totalprice from test_aggregate a

--如果ORDER BY中的列值有重復值,手動改用ROWS選項即可實現逐行累計求和
select  *,sum(price) over(partition by event_id order by price rows between unbounded preceding and current row) as totalprice from test_aggregate a

 

代碼示例2:移動平均

--移動平均,舉個例子,就是求前N天的平均值,和股票市場的均線類似
drop table if exists test_moving_avg

create table test_moving_avg
(
ID    int, 
Value int,
DT    datetime
)

insert into test_moving_avg 
values
(1,10,GETDATE()-10),
(2,110,GETDATE()-9),
(3,100,GETDATE()-8),
(4,80,GETDATE()-7),
(5,60,GETDATE()-6),
(6,40,GETDATE()-5),
(7,30,GETDATE()-4),
(8,50,GETDATE()-3),
(9,20,GETDATE()-2),
(10,10,GETDATE()-1)

--1. 沒有窗口函數時,用子查詢
select *,
(select AVG(Value) from test_moving_avg a where a.DT >= DATEADD(DAY, -5, b.DT) AND a.DT < b.DT) AS avg_value_5days
from test_moving_avg b

--2. 從SQL Server 2012起,用窗口函數
--三個內置常量,第一行,最后一行,當前行:UNBOUNDED PRECEDING, UNBOUNDED FOLLOWING, CURRENT ROW 
--在行間移動,用BETWEEN m preceding AND n following (m, n > 0)
SELECT *,
       sum(value) over (ORDER BY DT ROWS BETWEEN 5 preceding AND CURRENT ROW) moving_sum,
       avg(value) over (ORDER BY DT ROWS BETWEEN 4 preceding AND CURRENT ROW) moving_avg1,
       avg(value) over (ORDER BY DT ROWS BETWEEN 5 preceding AND 1 preceding) moving_avg2,
       avg(value) over (ORDER BY DT ROWS BETWEEN 3 preceding AND 1 following) moving_avg3
FROM  test_moving_avg
ORDER BY DT

 

. 分析函數 (Analytic Function)

代碼示例1:取當前行某列的前一個/下一個值

drop table if exists test_analytic

create table test_analytic
(
SalesYear         varchar(10),
Revenue           int,
Offset            int
)

insert into test_analytic
values
(2013,1001,1),
(2014,1002,1),
(2015,1003,1),
(2016,1004,1),
(2017,1005,1),
(2018,1006,1)

--當年及去年的銷售額
select *,lag(Revenue,1,null) over(order by SalesYear asc) as PreviousYearRevenue from test_analytic
select *,lag(Revenue,Offset,null) over(order by SalesYear asc) as PreviousYearRevenue from test_analytic
select *,lead(Revenue,1,null) over(order by SalesYear desc) as PreviousYearRevenue from test_analytic

--當年及下一年的銷售額
select *,lead(Revenue,1,null) over(order by SalesYear asc) as NextYearRevenue from test_analytic
select *,lead(Revenue,Offset,null) over(order by SalesYear asc) as NextYearRevenue from test_analytic
select *,lag(Revenue,1,null) over(order by SalesYear desc) as NextYearRevenue from test_analytic

--可以根據offset調整跨度

 

代碼示例2:分組中某列最大/最小值,對應的其他列值

假設有個門禁系統,在員工每次進門時寫入一條記錄,記錄了“身份號碼”,“進門時間”,“衣服顏色",查詢每個員工最后一次進門時的“衣服顏色”。

drop table if exists test_first_last

create table test_first_last
(
EmployeeID             int,
EnterTime              datetime,
ColorOfClothes         varchar(20)
)

insert into test_first_last
values
(1001, GETDATE()-9, 'GREEN'),
(1001, GETDATE()-8, 'RED'),
(1001, GETDATE()-7, 'YELLOW'),
(1001, GETDATE()-6, 'BLUE'),
(1002, GETDATE()-5, 'BLACK'),
(1002, GETDATE()-4, 'WHITE')

--1. 用子查詢
--LastColorOfColthes
select * from test_first_last a
where not exists(select 1 from test_first_last b where a.EmployeeID = b.EmployeeID and a.EnterTime < b.EnterTime)

--LastColorOfColthes
select *
from 
(select *, ROW_NUMBER() over(partition by EmployeeID order by EnterTime DESC) num
from test_first_last ) t
where t.num =1


--2. 用窗口函數
--用LAST_VALUE時,必須加上ROWS/RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING,否則結果不正確
select *, 
       FIRST_VALUE(ColorOfClothes) OVER (PARTITION BY EmployeeID ORDER BY EnterTime DESC) as LastColorOfClothes,
       FIRST_VALUE(ColorOfClothes) OVER (PARTITION BY EmployeeID ORDER BY EnterTime ASC) as FirstColorOfClothes,
       LAST_VALUE(ColorOfClothes) OVER (PARTITION BY EmployeeID ORDER BY EnterTime ASC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as LastColorOfClothes,
       LAST_VALUE(ColorOfClothes) OVER (PARTITION BY EmployeeID ORDER BY EnterTime DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as FirstColorOfClothes
from test_first_last

--對於顯示表中所有行,並追加Last/First字段時用窗口函數方便些
--對於挑選表中某一行/多行時,用子查詢更方便

 

. NEXT VALUE FOR Function

drop sequence if exists test_seq

create sequence test_seq
start with 1
increment by 1;

GO

drop table if exists test_next_value

create table test_next_value
(
ID         int,
Name       varchar(10)
)

insert into test_next_value(Name)
values
('AAA'),
('AAA'),
('BBB'),
('CCC')

--對於多行數據獲取sequence的next value,是否使用窗口函數都會逐行計數
--窗口函數中ORDER BY用於控制不同列值的計數順序
select *, NEXT VALUE FOR test_seq from test_next_value
select *, NEXT VALUE FOR test_seq OVER(ORDER BY Name DESC) from test_next_value

 

參考:

SELECT - OVER Clause (Transact-SQL)

https://docs.microsoft.com/en-us/sql/t-sql/queries/select-over-clause-transact-sql?view=sql-server-2017

SQL Server Windowing Functions: ROWS vs. RANGE

https://www.sqlpassion.at/archive/2015/01/22/sql-server-windowing-functions-rows-vs-range/


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM