diff --git a/sql-queries-11/delete-identical-rows/create-table.sql b/sql-queries-11/delete-identical-rows/create-table.sql new file mode 100644 index 00000000..f27a780e --- /dev/null +++ b/sql-queries-11/delete-identical-rows/create-table.sql @@ -0,0 +1,16 @@ +-- Create sample table +CREATE TABLE DuplicateRecords ( +RecordID SERIAL PRIMARY KEY, -- Use INT AUTO_INCREMENT for MySQL +Value1 VARCHAR(50), +Value2 INT, +Value3 DATE +); + +-- Insert sample data with duplicates +INSERT INTO DuplicateRecords (Value1, Value2, Value3) VALUES +('Apple', 10, '2023-01-01'), +('Banana', 20, '2023-02-01'), +('Apple', 10, '2023-01-01'), -- Identical to the first row (logical duplicate) +('Cherry', 30, '2023-03-01'), +('Banana', 20, '2023-02-01'), -- Identical to the second row (logical duplicate) +('Apple', 10, '2023-01-01'); -- Another identical row \ No newline at end of file diff --git a/sql-queries-11/delete-identical-rows/using-delete-with-self-join.sql b/sql-queries-11/delete-identical-rows/using-delete-with-self-join.sql new file mode 100644 index 00000000..7fdfb0ad --- /dev/null +++ b/sql-queries-11/delete-identical-rows/using-delete-with-self-join.sql @@ -0,0 +1,26 @@ +-- PostgreSQL +DELETE FROM DuplicateRecords DR1 +USING DuplicateRecords DR2 +WHERE + DR1.Value1 = DR2.Value1 AND + DR1.Value2 = DR2.Value2 AND + DR1.Value3 = DR2.Value3 AND + DR1.RecordID > DR2.RecordID; + +-- MySQL +DELETE FROM DR1 +USING DuplicateRecords DR1 +JOIN DuplicateRecords DR2 ON + DR1.Value1 = DR2.Value1 AND + DR1.Value2 = DR2.Value2 AND + DR1.Value3 = DR2.Value3 AND + DR1.RecordID > DR2.RecordID; + +-- MS SQL Server +DELETE DR1 +FROM DuplicateRecords DR1 +JOIN DuplicateRecords DR2 + ON DR1.Value1 = DR2.Value1 + AND DR1.Value2 = DR2.Value2 + AND DR1.Value3 = DR2.Value3 + AND DR1.RecordID > DR2.RecordID; diff --git a/sql-queries-11/delete-identical-rows/using-min-max-subquery.sql b/sql-queries-11/delete-identical-rows/using-min-max-subquery.sql new file mode 100644 index 00000000..0a311fca --- /dev/null +++ b/sql-queries-11/delete-identical-rows/using-min-max-subquery.sql @@ -0,0 +1,17 @@ +-- PostgreSQL / MS SQL Server +DELETE FROM DuplicateRecords +WHERE RecordID NOT IN ( + SELECT MIN(RecordID) + FROM DuplicateRecords + GROUP BY Value1, Value2, Value3 +); + +-- MySQL +DELETE FROM DuplicateRecords +WHERE RecordID NOT IN ( + SELECT T2.MinRecordID FROM ( + SELECT MIN(RecordID) AS MinRecordID + FROM DuplicateRecords + GROUP BY Value1, Value2, Value3 + ) AS T2 +); \ No newline at end of file diff --git a/sql-queries-11/delete-identical-rows/using-row-number.sql b/sql-queries-11/delete-identical-rows/using-row-number.sql new file mode 100644 index 00000000..d9ebe45c --- /dev/null +++ b/sql-queries-11/delete-identical-rows/using-row-number.sql @@ -0,0 +1,13 @@ +-- PostgreSQL / MS SQL Server / MySQL Server +WITH CTE_DuplicateRecords AS ( + SELECT + RecordID, + Value1, + Value2, + Value3, + ROW_NUMBER() OVER (PARTITION BY Value1, Value2, Value3 ORDER BY RecordID) as rn + FROM + DuplicateRecords +) +DELETE FROM DuplicateRecords +WHERE RecordID IN (SELECT RecordID FROM CTE_DuplicateRecords WHERE rn > 1); \ No newline at end of file