彻底理解删除重复记录,只保留一条
Posted 清_澈
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了彻底理解删除重复记录,只保留一条相关的知识,希望对你有一定的参考价值。
已oracle数据库为例:
--1、建表
-- Create table
create table DEMO
(
ID INTEGER,
NAME VARCHAR2(50),
IDCARD VARCHAR2(18),
AMT NUMBER(*,2),
END_DATE DATE,
UPDATE_TIME TIMESTAMP(6)
)
tablespace PLATEDATA
pctfree 10
initrans 1
maxtrans 255
storage
(
initial 64K
next 1M
minextents 1
maxextents unlimited
);
-- Add comments to the columns
comment on column DEMO.ID
is '主键';
comment on column DEMO.NAME
is '姓名';
comment on column DEMO.IDCARD
is '身份证号';
comment on column DEMO.AMT
is '金额';
comment on column DEMO.END_DATE
is '截止时间';
comment on column DEMO.UPDATE_TIME
is '更新时间';
--2、插入数据 (重复数据为370305197812097560,需要删除 齐白石)
insert into demo (ID, NAME, IDCARD, AMT, END_DATE, UPDATE_TIME)
values (2, '齐白石', '370305197812097560', 50000.00, to_date('16-11-2022', 'dd-mm-yyyy'), '');
insert into demo (ID, NAME, IDCARD, AMT, END_DATE, UPDATE_TIME)
values (1, '苏东坡', '370300198211118113', 20000.00, to_date('17-11-2022', 'dd-mm-yyyy'), '');
insert into demo (ID, NAME, IDCARD, AMT, END_DATE, UPDATE_TIME)
values (3, '鲁赤水', '370305197812097560', 40000.00, to_date('30-11-2022', 'dd-mm-yyyy'), '');
insert into demo (ID, NAME, IDCARD, AMT, END_DATE, UPDATE_TIME)
values (4, '张大千', '370302195602068945', 26000.00, to_date('17-11-2022', 'dd-mm-yyyy'), '');
--3、查询重复的数据
select * from demo t ;
select IDCard from demo t group by IDCard ; --排重后,一共有这些条数据
select name,IDCard,end_date from demo t group by IDCard ,name ,end_date ;
select IDCard from demo t group by IDCard having count(IDCard) > 1;--查询重复的身份证号
--查出重复的条数信息
SELECT * from demo
where end_date not in
(select t.max_id
from (select max(end_date) as max_id from demo group by IDCard) t);
--4、删除重复数据,只(根据最新时间,或者最大id)保留一条
--按最大id删除 delete from demo where id not in (select t.max_id from (select max(id) as max_id from demo group by IDCard) t); --或者按最新时间删除 delete from demo where end_date not in (select t.max_id from (select max(end_date) as max_id from demo group by IDCard) t); --此语句不起效,还未查明原因 DELETE FROM demo t WHERE IDCard NOT IN (SELECT t.IDCard FROM (SELECT IDCard FROM demo t1 WHERE t1.end_date = (SELECT max(t2.end_date) FROM demo t2 WHERE t2.IDCard = t1.IDCard group by IDCard )) t);
以上是关于彻底理解删除重复记录,只保留一条的主要内容,如果未能解决你的问题,请参考以下文章