oracle數據去重
一、創建測試表
-- Create table
create table TEST3
(
id NUMBER,
name VARCHAR2(20),
card VARCHAR2(20),
address VARCHAR2(20),
zone VARCHAR2(20)
);
二、插入測試數據
insert into test3 values(1,'張三','132111111111111111','中國','河南');
insert into test3 values(2,'李四','132111122222222211','美國','紐約');
insert into test3 values(3,'王五','132111122222222211','英國','倫敦');
insert into test3 values(4,'趙六','132111111111111111','法國','巴黎');
insert into test3 values(5,'嚴七','132111111333331111','中國','河北');
commit;
三、distinct去重
select distinct t.card from test3 t;
distinct去重局限性很大,結果中只能有去重列。
四、row_number()函數去重
以card字段分組,然后在分組內部按id號排序,序號從1開始遞增,沒有重復。只取序號為1的記錄,即完成去重。
查詢去重后的數據
select *
from (select t1.*,
row_number() over(partition by t1.card order by t1.id) as nu
from test3 t1) t1
where t1.nu = 1
row_number() over(partition by t1.card order by t1.id desc) as nu,這條語句為使用t1.card字段進行分組,然后按t1.id在分組內部排序,序號從1開始,最后將排序后的序號賦值給nu字段。
測試發現,如果沒有唯一字段如ID字段,將ID字段替換成一個數字也行:
select *
from (select t1.*,
row_number() over(partition by t1.card order by 1 desc) as nu
from test3 t1) t1
where t1.nu = 1
五、查詢出所有重復的數據
group by方式:
select *
from test3 t1
where t1.card in
(select t2.card from test3 t2 group by t2.card having count(1) > 1)
over分組方式:
select *
from (select t1.*, count(1) over(partition by t1.card order by 1) as nu
from test3 t1) t1
where t1.nu > 1
六、刪除重復數據只保留一條
over分組方式:
delete from test3 t1
where t1.rowid in (select t2.rowid
from (select t2.rowid,
count(1) over(partition by t2.card order by t2.id) nu
from test3 t2) t2
where t2.nu > 1)
group by方式:
delete from test3 t1
where t1.rowid not in
(select max(t2.rowid) from test3 t2 group by t2.card)