1. 程式人生 > >聯合主鍵 去重複記錄的方法

聯合主鍵 去重複記錄的方法

方法1、
就是將三個欄位連線起來作為主鍵,進行資料是否重複的判斷。這裡值得注意的是某列的值可能為空,所以要賦一個空字串過去:
select * from t1 where nvl(col1,'')|| nvl(col2,'')|| col3 not in (select col1||col2||col3 from t2)
方法2、
統計多表聯合查詢 去重複記錄的方法
兩表結構不一樣,或者一樣的,多個表結合起來查詢的,都可以
1. --> 生成測試資料表:a
2.
3. IF NOT OBJECT_ID('[a]') IS NULL
4. DROP TABLE [a]
5. GO
6. CREATE TABLE [a]([id] INT,[name] NVARCHAR(10),[parentid] INT)
7. INSERT [a]
8. SELECT 1,'area',0 UNION ALL
9. SELECT 2,'category',0 UNION ALL
10. SELECT 3,'north',1 UNION ALL
11. SELECT 4,'south',1 UNION ALL
12. SELECT 5,'Shanghai',4 UNION ALL
13. SELECT 6,'Beijing',3 UNION ALL
14. SELECT 7,'pudong',5 UNION ALL
15. SELECT 8,'xuhui',5 UNION ALL
16. SELECT 9,'chaoyang',6 UNION ALL
17. SELECT 10,'desk',2 UNION ALL
18. SELECT 11,'chair',2 UNION ALL
19. SELECT 12,'bed',2
20. GO
21.
22. --> 生成測試資料表:b
23.
24. IF NOT OBJECT_ID('[b]') IS NULL
25. DROP TABLE [b]
26. GO
27. CREATE TABLE [b]([id] INT,[area] INT,[city] INT,[district] NVARCHAR(10))
28. INSERT [b]
29. SELECT 1,4,5,'pudong' UNION ALL
30. SELECT 2,4,5,'xuhui' UNION ALL
31. SELECT 3,4,6,'chaoyang'
32. GO
33. --> 生成測試資料表:c
34.
35. IF NOT OBJECT_ID('[c]') IS NULL
36. DROP TABLE [c]
37. GO
38. CREATE TABLE [c]([id] INT,[category] INT,[area] INT,[city] INT,[district] INT)
39. INSERT [c]
40. SELECT 1,10,4,5,7 UNION ALL
41. SELECT 2,10,4,5,7 UNION ALL
42. SELECT 3,11,4,5,8 UNION ALL
43. SELECT 4,11,3,6,9 UNION ALL
44. SELECT 5,10,3,6,9
45. GO
46.
47. -->SQL查詢如下:
48. --SELECT * FROM [a]
49. --SELECT * FROM [b]
50. --SELECT * FROM [c]
51.
52. -->SQL查詢如下:
53. select a0.name area,
54. a1.name city,
55. a2.name district,
56. '' address,
57. MAX(case a3.name when 'desk' then 數量 else 0 end) 桌子數量,
58. MAX(case a3.name when 'chair' then 數量 else 0 end) 椅子數量,
59. MAX(case a3.name when 'bed' then 數量 else 0 end) 床數量
60. from (
61. select category,area,city,[district],COUNT(1) 數量
62. from c
63. group by category,area,city,[district]
64. ) c
65. join a a0 on a0.id=c.area
66. join a a1 on a1.id=c.[city]
67. join a a2 on a2.id=c.district
68. join a a3 on a3.id=c.category
69. left join b on b.area=c.area and c.city=b.city
70. group by a0.name,a1.name,a2.name
71. /*
72. area city district address 桌子數量 椅子數量 床數量
73. ---------- ---------- ---------- ------- ----------- ----------- -----------
74. north Beijing chaoyang 1 1 0
75. south Shanghai pudong 2 0 0
76. south Shanghai xuhui 0 1 0
77.
78. (3 行受影響)
79. */
1. --處理表重複記錄(查詢和刪除)
2. /******************************************************************************************************************************************************
3. 1、Num、Name相同的重複值記錄,沒有大小關係只保留一條
4. 2、Name相同,ID有大小關係時,保留大或小其中一個記錄
5. 整理人:中國風(Roy)
6.
7. 日期:2008.06.06
8. ******************************************************************************************************************************************************/
9.
10. --1、用於查詢重複處理記錄(如果列沒有大小關係時2000用生成自增列和臨時表處理,SQL2005用row_number函式處理)
11.
12. --> --> (Roy)生成測試資料
13.
14. if not object_id('Tempdb..#T') is null
15. drop table #T
16. Go
17. Create table #T([ID] int,[Name] nvarchar(1),[Memo] nvarchar(2))
18. Insert #T
19. select 1,N'A',N'A1' union all
20. select 2,N'A',N'A2' union all
21. select 3,N'A',N'A3' union all
22. select 4,N'B',N'B1' union all
23. select 5,N'B',N'B2'
24. Go
25.
26.
27. --I、Name相同ID最小的記錄(推薦用1,2,3),方法3在SQl05時,效率高於1、2
28. 方法1:
29. Select * from #T a where not exists(select 1 from #T where Name=a.Name and ID<a.ID)
30.
31. 方法2:
32. select a.* from #T a join (select min(ID)ID,Name from #T group by Name) b on a.Name=b.Name and a.ID=b.ID
33.
34. 方法3:
35. select * from #T a where ID=(select min(ID) from #T where Name=a.Name)
36.
37. 方法4:
38. select a.* from #T a join #T b on a.Name=b.Name and a.ID>=b.ID group by a.ID,a.Name,a.Memo having count(1)=1
39.
40. 方法5:
41. select * from #T a group by ID,Name,Memo having ID=(select min(ID)from #T where Name=a.Name)
42.
43. 方法6:
44. select * from #T a where (select count(1) from #T where Name=a.Name and ID<a.ID)=0
45.
46. 方法7:
47. select * from #T a where ID=(select top 1 ID from #T where Name=a.name order by ID)
48.
49. 方法8:
50. select * from #T a where ID!>all(select ID from #T where Name=a.Name)
51.
52. 方法9(注:ID為唯一時可用):
53. select * from #T a where ID in(select min(ID) from #T group by Name)
54.
55. --SQL2005:
56.
57. 方法10:
58. select ID,Name,Memo from (select *,min(ID)over(partition by Name) as MinID from #T a)T where ID=MinID
59.
60. 方法11:
61.
62. select ID,Name,Memo from (select *,row_number()over(partition by Name order by ID) as MinID from #T a)T where MinID=1
63.
64. 生成結果:
65. /*
66. ID Name Memo
67. ----------- ---- ----
68. 1 A A1
69. 4 B B1
70.
71. (2 行受影響)
72. */
73.
74.
75. --II、Name相同ID最大的記錄,與min相反:
76. 方法1:
77. Select * from #T a where not exists(select 1 from #T where Name=a.Name and ID>a.ID)
78.
79. 方法2:
80. select a.* from #T a join (select max(ID)ID,Name from #T group by Name) b on a.Name=b.Name and a.ID=b.ID order by ID
81.
82. 方法3:
83. select * from #T a where ID=(select max(ID) from #T where Name=a.Name) order by ID
84.
85. 方法4:
86. select a.* from #T a join #T b on a.Name=b.Name and a.ID<=b.ID group by a.ID,a.Name,a.Memo having count(1)=1
87.
88. 方法5:
89. select * from #T a group by ID,Name,Memo having ID=(select max(ID)from #T where Name=a.Name)
90.
91. 方法6:
92. select * from #T a where (select count(1) from #T where Name=a.Name and ID>a.ID)=0
93.
94. 方法7:
95. select * from #T a where ID=(select top 1 ID from #T where Name=a.name order by ID desc)
96.
97. 方法8:
98. select * from #T a where ID!<all(select ID from #T where Name=a.Name)
99.
100. 方法9(注:ID為唯一時可用):
101. select * from #T a where ID in(select max(ID) from #T group by Name)
102.
103. --SQL2005:
104.
105. 方法10:
106. select ID,Name,Memo from (select *,max(ID)over(partition by Name) as MinID from #T a)T where ID=MinID
107.
108. 方法11:
109. select ID,Name,Memo from (select *,row_number()over(partition by Name order by ID desc) as MinID from #T a)T where MinID=1
110.
111. 生成結果2:
112. /*
113. ID Name Memo
114. ----------- ---- ----
115. 3 A A3
116. 5 B B2
117.
118. (2 行受影響)
119. */
1. --分組取其中某欄位最小,去重複
2. if object_id('[tb]') is not null drop table [tb]
3. go
4. create table [tb]([EID] varchar(2),[OID] varchar(2),[Value] int)
5. insert [tb]
6. select 'E1','O1',4 union all
7. select 'E2','O2',16 union all
8. select 'E3','O1',5 union all
9. select 'E4','O2',8 union all
10. select 'E5','O1',3 union all
11. select 'E6','O3',9
12.
13. select t1.* from tb t1
14. where EID = (
15. select top 1 t2. EID from tb t2
16. where t2.Value = (
17. select min(t3.Value) from tb t3
18. where t2.EID=t3.EID
19. ) and t1.OID=t2.OID
20. )
21. and t1.EID in ('E1','E2','E4')
1. --按某一欄位分組取最大(小)值所在行的資料
2. --(愛新覺羅.毓華(十八年風雨,守得冰山雪蓮花開) 2007-10-23於浙江杭州)
3. /*
4. 資料如下:
5. name val memo
6. a 2 a2(a的第二個值)
7. a 1 a1--a的第一個值
8. a 3 a3:a的第三個值
9. b 1 b1--b的第一個值
10. b 3 b3:b的第三個值
11. b 2 b2b2b2b2
12. b 4 b4b4
13. b 5 b5b5b5b5b5
14. */
15. --建立表並插入資料:
16. create table tb(name varchar(10),val int,memo varchar(20))
17. insert into tb values('a', 2, 'a2(a的第二個值)')
18. insert into tb values('a', 1, 'a1--a的第一個值')
19. insert into tb values('a', 3, 'a3:a的第三個值')
20. insert into tb values('b', 1, 'b1--b的第一個值')
21. insert into tb values('b', 3, 'b3:b的第三個值')
22. insert into tb values('b', 2, 'b2b2b2b2')
23. insert into tb values('b', 4, 'b4b4')
24. insert into tb values('b', 5, 'b5b5b5b5b5')
25. go
26.
27. --一、按name分組取val最大的值所在行的資料。
28. --方法1:
29. select a.* from tb a where val = (select max(val) from tb where name = a.name) order by a.name
30. --方法2:
31. select a.* from tb a where not exists(select 1 from tb where name = a.name and val > a.val)
32. --方法3:
33. select a.* from tb a,(select name,max(val) val from tb group by name) b where a.name = b.name and a.val = b.val order by a.name
34. --方法4:
35. select a.* from tb a inner join (select name , max(val) val from tb group by name) b on a.name = b.name and a.val = b.val order by a.name
36. --方法5
37. select a.* from tb a where 1 > (select count(*) from tb where name = a.name and val > a.val ) order by a.name
38. /*
39. name val memo
40. ---------- ----------- --------------------
41. a 3 a3:a的第三個值
42. b 5 b5b5b5b5b5
43. */
44.
45. --二、按name分組取val最小的值所在行的資料。
46. --方法1:
47. select a.* from tb a where val = (select min(val) from tb where name = a.name) order by a.name
48. --方法2:
49. select a.* from tb a where not exists(select 1 from tb where name = a.name and val < a.val)
50. --方法3:
51. select a.* from tb a,(select name,min(val) val from tb group by name) b where a.name = b.name and a.val = b.val order by a.name
52. --方法4:
53. select a.* from tb a inner join (select name , min(val) val from tb group by name) b on a.name = b.name and a.val = b.val order by a.name
54. --方法5
55. select a.* from tb a where 1 > (select count(*) from tb where name = a.name and val < a.val) order by a.name
56. /*
57. name val memo
58. ---------- ----------- --------------------
59. a 1 a1--a的第一個值
60. b 1 b1--b的第一個值
61. */
62.
63. --三、按name分組取第一次出現的行所在的資料。
64. select a.* from tb a where val = (select top 1 val from tb where name = a.name) order by a.name
65. /*
66. name val memo
67. ---------- ----------- --------------------
68. a 2 a2(a的第二個值)
69. b 1 b1--b的第一個值
70. */
71.
72. --四、按name分組隨機取一條資料。
73. select a.* from tb a where val = (select top 1 val from tb where name = a.name order by newid()) order by a.name
74. /*
75. name val memo
76. ---------- ----------- --------------------
77. a 1 a1--a的第一個值
78. b 5 b5b5b5b5b5
79. */
80.
81. --五、按name分組取最小的兩個(N個)val
82. select a.* from tb a where 2 > (select count(*) from tb where name = a.name and val < a.val ) order by a.name,a.val
83. select a.* from tb a where val in (select top 2 val from tb where name=a.name order by val) order by a.name,a.val
84. select a.* from tb a where exists (select count(*) from tb where name = a.name and val < a.val having Count(*) < 2) order by a.name,a.val
85. /*
86. name val memo
87. ---------- ----------- --------------------
88. a 1 a1--a的第一個值
89. a 2 a2(a的第二個值)
90. b 1 b1--b的第一個值
91. b 2 b2b2b2b2
92. */
93.
94. --六、按name分組取最大的兩個(N個)val
95. select a.* from tb a where 2 > (select count(*) from tb where name = a.name and val > a.val ) order by a.name,a.val
96. select a.* from tb a where val in (select top 2 val from tb where name=a.name order by val desc) order by a.name,a.val
97. select a.* from tb a where exists (select count(*) from tb where name = a.name and val > a.val having Count(*) < 2) order by a.name , a.val
98. /*
99. name val memo
100. ---------- ----------- --------------------
101. a 2 a2(a的第二個值)
102. a 3 a3:a的第三個值
103. b 4 b4b4
104. b 5 b5b5b5b5b5
105. */
106. --七,如果整行資料有重複,所有的列都相同。
107. /*
108. 資料如下:
109. name val memo
110. a 2 a2(a的第二個值)
111. a 1 a1--a的第一個值
112. a 1 a1--a的第一個值
113. a 3 a3:a的第三個值
114. a 3 a3:a的第三個值
115. b 1 b1--b的第一個值
116. b 3 b3:b的第三個值
117. b 2 b2b2b2b2
118. b 4 b4b4
119. b 5 b5b5b5b5b5
120. */
121. --在sql server 2000中只能用一個臨時表來解決,生成一個自增列,先對val取最大或最小,然後再通過自增列來取資料。
122. --建立表並插入資料:
123. create table tb(name varchar(10),val int,memo varchar(20))
124. insert into tb values('a', 2, 'a2(a的第二個值)')
125. insert into tb values('a', 1, 'a1--a的第一個值')
126. insert into tb values('a', 1, 'a1--a的第一個值')
127. insert into tb values('a', 3, 'a3:a的第三個值')
128. insert into tb values('a', 3, 'a3:a的第三個值')
129. insert into tb values('b', 1, 'b1--b的第一個值')
130. insert into tb values('b', 3, 'b3:b的第三個值')
131. insert into tb values('b', 2, 'b2b2b2b2')
132. insert into tb values('b', 4, 'b4b4')
133. insert into tb values('b', 5, 'b5b5b5b5b5')
134. go
135.
136. select * , px = identity(int,1,1) into tmp from tb
137.
138. select m.name,m.val,m.memo from
139. (
140. select t.* from tmp t where val = (select min(val) from tmp where name = t.name)
141. ) m where px = (select min(px) from
142. (
143. select t.* from tmp t where val = (select min(val) from tmp where name = t.name)
144. ) n where n.name = m.name)
145.
146. drop table tb,tmp
147.
148. /*
149. name val memo
150. ---------- ----------- --------------------
151. a 1 a1--a的第一個值
152. b 1 b1--b的第一個值
153.
154. (2 行受影響)
155. */
156. --在sql server 2005中可以使用row_number函式,不需要使用臨時表。
157. --建立表並插入資料:
158. create table tb(name varchar(10),val int,memo varchar(20))
159. insert into tb values('a', 2, 'a2(a的第二個值)')
160. insert into tb values('a', 1, 'a1--a的第一個值')
161. insert into tb values('a', 1, 'a1--a的第一個值')
162. insert into tb values('a', 3, 'a3:a的第三個值')
163. insert into tb values('a', 3, 'a3:a的第三個值')
164. insert into tb values('b', 1, 'b1--b的第一個值')
165. insert into tb values('b', 3, 'b3:b的第三個值')
166. insert into tb values('b', 2, 'b2b2b2b2')
167. insert into tb values('b', 4, 'b4b4')
168. insert into tb values('b', 5, 'b5b5b5b5b5')
169. go
170.
171. select m.name,m.val,m.memo from
172. (
173. select * , px = row_number() over(order by name , val) from tb
174. ) m where px = (select min(px) from
175. (
176. select * , px = row_number() over(order by name , val) from tb
177. ) n where n.name = m.name)
178.
179. drop table tb
180.
181. /*
182. name val memo
183. ---------- ----------- --------------------
184. a 1 a1--a的第一個值
185. b 1 b1--b的第一個值
186.
187. (2 行受影響)
188. */