-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRemoving Duplicates.sql
80 lines (64 loc) · 1.5 KB
/
Removing Duplicates.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
-- DATA CLEANING
SELECT *
FROM layoffs;
CREATE TABLE layoff_staging
LIKE layoffs;
SELECT *
FROM layoff_staging;
INSERT layoff_staging
SELECT *
FROM layoffs;
-- REMOVING DUPLICATES
SELECT *,
ROW_NUMBER() OVER(
PARTITION BY company, industry, total_laid_off, 'date') AS row_num
FROM layoff_staging;
-- CREATING CTE
WITH duplicate_cte AS
(
SELECT *,
ROW_NUMBER() OVER(
PARTITION BY company, location, industry, total_laid_off, percentage_laid_off, stage, country, funds_raised_millions, 'date') AS row_num
FROM layoff_staging
)
SELECT *
FROM duplicate_cte
WHERE row_num > 1;
SELECT *
FROM layoff_staging
WHERE company = 'casper';
WITH duplicate_cte AS
(
SELECT *,
ROW_NUMBER() OVER(
PARTITION BY company, location, industry, total_laid_off, percentage_laid_off, stage, country, funds_raised_millions, 'date') AS row_num
FROM layoff_staging
)
DELETE
FROM duplicate_cte
WHERE row_num > 1;
CREATE TABLE Layoffs_staging2 (
company text,
location text,
industry text ,
total_laid_off int DEFAULT NULL ,
percentage_laid_off text ,
date text ,
stage text ,
country text ,
funds_raised_millions int DEFAULT NULL,
row_num INT );
SELECT *
FROM layoffs_staging2;
INSERT INTO layoffs_staging2
SELECT *,
ROW_NUMBER() OVER(
PARTITION BY company, location, industry, total_laid_off, percentage_laid_off, stage, country, funds_raised_millions, 'date') AS row_num
FROM layoff_staging;
SELECT *
FROM layoffs_staging2
WHERE row_num > 1;
DELETE
FROM layoffs_staging2
WHERE row_num > 1;
-- DUPLICATES ARE REMOVED!